<a href="https://colab.research.google.com/github/JellyJoa/DataAnalysis/blob/master/Dacon/sound_classify/baseline2_insert2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import librosa 
import librosa.display as dsp
from IPython.display import Audio
import pandas as pd
import numpy as np
from tqdm import tqdm
import os
import torch

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [2]:
import random

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(929)

In [3]:
train = pd.read_csv('/content/drive/MyDrive/DL/sound_classify/train.csv')
train.head()

Unnamed: 0,file_name,label
0,001.wav,9
1,002.wav,0
2,004.wav,1
3,005.wav,8
4,006.wav,0


In [4]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   file_name  400 non-null    object
 1   label      400 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 6.4+ KB


In [5]:
data, sample_rate = librosa.load('/content/drive/MyDrive/DL/sound_classify/train/001.wav', sr = 16000)
print('sample_rate:', sample_rate, ', audio shape:', data.shape)
print('length:', data.shape[0]/float(sample_rate), 'secs')

sample_rate: 16000 , audio shape: (10192,)
length: 0.637 secs


In [6]:
def train_dataset():
    folder = "/content/drive/MyDrive/DL/sound_classify/train/"
    dataset = []
    for file in tqdm(os.listdir(folder),colour='green'):
        if 'wav' in file:
            abs_file_path = os.path.join(folder,file)
            data, sr = librosa.load(abs_file_path, sr = 16000)
            class_label = int(train[train.file_name == file].label)
            dataset.append([data,class_label])
    
    print("Dataset 생성 완료")
    return pd.DataFrame(dataset,columns=['data','label'])

In [7]:
def test_dataset():
    folder = "/content/drive/MyDrive/DL/sound_classify/test/"
    dataset = []
    for file in tqdm(os.listdir(folder),colour='green'):
        if 'wav' in file:
            abs_file_path = os.path.join(folder,file)
            data, sr = librosa.load(abs_file_path, sr = 16000)
            
            dataset.append([data, file])
    
    print("Dataset 생성 완료")
    return pd.DataFrame(dataset,columns=['data', 'file_name'])

In [8]:
train_wav = train_dataset()
test_wav = test_dataset()

100%|[32m██████████[0m| 400/400 [00:12<00:00, 32.32it/s]


Dataset 생성 완료


100%|[32m██████████[0m| 200/200 [00:05<00:00, 33.86it/s]

Dataset 생성 완료





In [9]:
train_wav.head()

Unnamed: 0,data,label
0,"[3.6655838e-05, -3.7366447e-06, 3.4776433e-05,...",5
1,"[0.00011985076, 0.00016174652, 0.00017246709, ...",9
2,"[1.2653453e-05, 2.3892262e-05, -7.51332e-06, 4...",0
3,"[-0.00010586961, -0.00020532755, -0.0002140045...",7
4,"[-4.289015e-05, 9.891299e-05, 2.6636611e-05, 0...",4


In [10]:
train_x = np.array(train_wav.data)
test_x = np.array(test_wav.data)

In [11]:
def get_mini(data):

    mini = 9999999
    for i in data:
        if len(i) < mini:
            mini = len(i)

    return mini

train_mini = get_mini(train_x)
test_mini = get_mini(test_x)

#음성들의 길이를 맞춰줍니다.

mini = np.min([train_mini, test_mini])

In [12]:
print('가장 작은 길이 :', mini)

가장 작은 길이 : 5711


In [13]:
def set_length(data, d_mini):

    result = []
    for i in data:
        result.append(i[:d_mini])
    result = np.array(result)

    return result

train_x = set_length(train_x, mini)
test_x = set_length(test_x, mini)

In [14]:
print('train :', train_x.shape)
print('test :', test_x.shape)

train : (400, 5711)
test : (200, 5711)


In [15]:
extracted_features = librosa.feature.mfcc(y=train_x[0], sr=16000, n_mfcc=65)
extracted_features.shape

(65, 12)

In [16]:
def preprocess_dataset(data):
    mfccs = []
    for i in data:
        extracted_features = librosa.feature.mfcc(y=i,
                                              sr=16000,
                                              n_mfcc=65)
        mfccs.append(extracted_features)
            
    return mfccs

In [17]:
train_mfccs = preprocess_dataset(train_x)
train_mfccs = np.array(train_mfccs)
train_mfccs = train_mfccs.reshape(-1, train_mfccs.shape[1], train_mfccs.shape[2], 1)
#test_x = test_x.reshape(-1, test_x.shape[1], test_x.shape[2], 1)

In [18]:
np.array(train_mfccs).shape

(400, 65, 12, 1)

In [19]:
import torchvision.datasets as datasets # 데이터셋 집합체
import torchvision.transforms as transforms # 변환 툴

from torch.utils.data import DataLoader # 학습 및 배치로 모델에 넣어주기 위한 툴
from torch.utils.data import DataLoader, Dataset

class CustomDataset(Dataset):
    def __init__(self, X, y, train_mode=True, transforms=None): #필요한 변수들을 선언
        self.X = X
        self.y = y
        self.train_mode = train_mode
        self.transforms = transforms

    def __getitem__(self, index): #index번째 data를 return
        X = self.X[index]
        
        if self.transforms is not None:
            X = self.transforms(X)

        if self.train_mode:
            y = self.y[index]
            return X, y
        else:
            return X
    
    def __len__(self): #길이 return
        return len(self.X)

In [20]:
train_X = train_mfccs[:300]
vali_X = train_mfccs[300:]

In [21]:
train_y = train_wav.label[:300]
vali_y = train_wav.label[300:].reset_index(drop = True)

In [22]:
# 에포크 설정
num_epochs = 100

# 배치 사이즈 설정
batch_size = 10

#만든 train dataset를 DataLoader에 넣어 batch 만들기
train_dataset = CustomDataset(X=train_X, y=train_y)
train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle=True)

vali_dataset = CustomDataset(X=vali_X, y=vali_y)
vali_loader = DataLoader(vali_dataset, batch_size = batch_size, shuffle=False)

In [23]:
train_batches = len(train_loader)
vali_batches = len(vali_loader)

print('/ total train batches :', train_batches)
print('/ total valid batches :', vali_batches)

/ total train batches : 30
/ total valid batches : 10


In [25]:
from tqdm.auto import tqdm
import torch.nn as nn # 신경망들이 포함됨

class CNNclassification(torch.nn.Module):
    def __init__(self):
        super(CNNclassification, self).__init__()
        self.layer1 = torch.nn.Sequential(
            nn.Conv2d(65, 10, kernel_size=2, stride=1, padding=1), #cnn layer
            nn.ReLU(), #activation function
            nn.MaxPool2d(kernel_size=2, stride=2)) #pooling layer
        
        self.layer2 = torch.nn.Sequential(
            nn.Conv2d(10, 100, kernel_size=2, stride=1, padding=1), #cnn layer
            nn.ReLU(), #activation function
            nn.MaxPool2d(kernel_size=2, stride=2)) #pooling layer
        
        self.layer3 = torch.nn.Sequential(
            nn.Conv2d(100, 200, kernel_size=2, stride=1, padding=1), #cnn layer
            nn.ReLU(), #activation function
            nn.MaxPool2d(kernel_size=2, stride=2)) #pooling layer
        
        self.layer4 = torch.nn.Sequential(
            nn.Conv2d(200, 300, kernel_size=2, stride=1, padding=1), #cnn layer
            nn.ReLU(), #activation function
            nn.MaxPool2d(kernel_size=2, stride=2)) #pooling layer
        
        self.fc_layer = nn.Sequential( 
            nn.Linear(300, 10) #fully connected layer(ouput layer)
        )    
        
    def forward(self, x):
        
        x = self.layer1(x) #1층
        
        x = self.layer2(x) #2층
         
        x = self.layer3(x) #3층
        
        x = self.layer4(x) #4층
        
        x = torch.flatten(x, start_dim=1) # N차원 배열 -> 1차원 배열
        
        out = self.fc_layer(x)
        return out

In [26]:
import torch.optim as optim # 최적화 알고리즘들이 포함힘

model = CNNclassification().to(device)
criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(params = model.parameters(), lr = 1e-3 )
scheduler = None

In [27]:
model(torch.rand(10, 65, 12, 1).to(device))

tensor([[-0.0002, -0.0490, -0.1010, -0.0371, -0.0610,  0.0279, -0.0544, -0.0124,
         -0.0470,  0.0099],
        [-0.0011, -0.0509, -0.1025, -0.0356, -0.0625,  0.0291, -0.0575, -0.0122,
         -0.0494,  0.0131],
        [-0.0009, -0.0487, -0.1009, -0.0352, -0.0585,  0.0279, -0.0533, -0.0102,
         -0.0491,  0.0119],
        [-0.0012, -0.0508, -0.1032, -0.0353, -0.0595,  0.0291, -0.0578, -0.0103,
         -0.0493,  0.0134],
        [-0.0031, -0.0496, -0.1034, -0.0359, -0.0597,  0.0300, -0.0569, -0.0137,
         -0.0477,  0.0131],
        [-0.0015, -0.0520, -0.1030, -0.0371, -0.0625,  0.0279, -0.0542, -0.0137,
         -0.0484,  0.0120],
        [-0.0016, -0.0490, -0.1048, -0.0342, -0.0597,  0.0281, -0.0591, -0.0122,
         -0.0461,  0.0114],
        [-0.0029, -0.0514, -0.1036, -0.0371, -0.0596,  0.0271, -0.0554, -0.0117,
         -0.0488,  0.0128],
        [-0.0024, -0.0501, -0.1054, -0.0368, -0.0619,  0.0308, -0.0569, -0.0120,
         -0.0473,  0.0130],
        [-0.0022, -

In [28]:
from tqdm.auto import tqdm

def train(model, optimizer, train_loader, scheduler, device): 
    model.to(device)
    n = len(train_loader)
    best_acc = 0
    
    for epoch in range(1,num_epochs): #에포크 설정
        model.train() #모델 학습
        running_loss = 0.0
        
        for wav, label in tqdm(iter(train_loader)):
            
            wav, label = wav.to(device), label.to(device) #배치 데이터
            optimizer.zero_grad() #배치마다 optimizer 초기화
        
            # Data -> Model -> Output
            logit = model(wav) #예측값 산출
            loss = criterion(logit, label) #손실함수 계산
            
            # 역전파
            loss.backward() #손실함수 기준 역전파 
            optimizer.step() #가중치 최적화
            running_loss += loss.item()
             
        print('[%d] Train loss: %.10f' %(epoch, running_loss / len(train_loader)))
        
        if scheduler is not None:
            scheduler.step()
            
            
        #Validation set 평가
        model.eval() #evaluation 과정에서 사용하지 않아야 하는 layer들을 알아서 off 시키도록 하는 함수
        vali_loss = 0.0
        correct = 0
       
        with torch.no_grad(): #파라미터 업데이트 안하기 때문에 no_grad 사용
            for wav, label in tqdm(iter(vali_loader)):
                
                wav, label = wav.to(device), label.to(device)
                logit = model(wav)
                vali_loss += criterion(logit, label)
                pred = logit.argmax(dim=1, keepdim=True)  #10개의 class중 가장 값이 높은 것을 예측 label로 추출
                correct += pred.eq(label.view_as(pred)).sum().item() #예측값과 실제값이 맞으면 1 아니면 0으로 합산
        vali_acc = 100 * correct / len(vali_loader.dataset)
        print('Vail set: Loss: {:.4f}, Accuracy: {}/{} ( {:.0f}%)\n'.format(vali_loss / len(vali_loader), correct, len(vali_loader.dataset), 100 * correct / len(vali_loader.dataset)))
        
        #베스트 모델 저장
        if best_acc < vali_acc:
            best_acc = vali_acc
            torch.save(model.state_dict(), '/content/drive/MyDrive/DL/sound_classify/saved/best_model2.pth') #이 디렉토리에 best_model.pth을 저장
            print('Model Saved.')

In [29]:
train(model, optimizer, train_loader, scheduler, device)

  0%|          | 0/30 [00:00<?, ?it/s]

[1] Train loss: 2.3211733103


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 2.2671, Accuracy: 7/100 ( 7%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[2] Train loss: 2.2143139601


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 2.1685, Accuracy: 17/100 ( 17%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[3] Train loss: 2.1539887865


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 2.1091, Accuracy: 13/100 ( 13%)



  0%|          | 0/30 [00:00<?, ?it/s]

[4] Train loss: 2.0788371841


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 2.0069, Accuracy: 37/100 ( 37%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[5] Train loss: 1.9691124757


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.9427, Accuracy: 20/100 ( 20%)



  0%|          | 0/30 [00:00<?, ?it/s]

[6] Train loss: 1.8870245457


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.8259, Accuracy: 29/100 ( 29%)



  0%|          | 0/30 [00:00<?, ?it/s]

[7] Train loss: 1.7849049052


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.6889, Accuracy: 53/100 ( 53%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[8] Train loss: 1.7132471402


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.6966, Accuracy: 37/100 ( 37%)



  0%|          | 0/30 [00:00<?, ?it/s]

[9] Train loss: 1.6203467886


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.5333, Accuracy: 50/100 ( 50%)



  0%|          | 0/30 [00:00<?, ?it/s]

[10] Train loss: 1.5533933957


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.4325, Accuracy: 57/100 ( 57%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[11] Train loss: 1.4659885605


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.4002, Accuracy: 55/100 ( 55%)



  0%|          | 0/30 [00:00<?, ?it/s]

[12] Train loss: 1.4400023699


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.3418, Accuracy: 48/100 ( 48%)



  0%|          | 0/30 [00:00<?, ?it/s]

[13] Train loss: 1.3179637988


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.3703, Accuracy: 44/100 ( 44%)



  0%|          | 0/30 [00:00<?, ?it/s]

[14] Train loss: 1.2824224989


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.1864, Accuracy: 57/100 ( 57%)



  0%|          | 0/30 [00:00<?, ?it/s]

[15] Train loss: 1.3016748766


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.3188, Accuracy: 47/100 ( 47%)



  0%|          | 0/30 [00:00<?, ?it/s]

[16] Train loss: 1.1989336113


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.3160, Accuracy: 46/100 ( 46%)



  0%|          | 0/30 [00:00<?, ?it/s]

[17] Train loss: 1.0830116967


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.0931, Accuracy: 56/100 ( 56%)



  0%|          | 0/30 [00:00<?, ?it/s]

[18] Train loss: 1.1837192615


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.0695, Accuracy: 63/100 ( 63%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[19] Train loss: 1.0928109487


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.9394, Accuracy: 67/100 ( 67%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[20] Train loss: 0.9621282657


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.9370, Accuracy: 71/100 ( 71%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[21] Train loss: 0.9322376778


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.8256, Accuracy: 67/100 ( 67%)



  0%|          | 0/30 [00:00<?, ?it/s]

[22] Train loss: 0.9502130171


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.9160, Accuracy: 64/100 ( 64%)



  0%|          | 0/30 [00:00<?, ?it/s]

[23] Train loss: 0.9305977007


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.7217, Accuracy: 81/100 ( 81%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[24] Train loss: 0.8472710331


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.7891, Accuracy: 69/100 ( 69%)



  0%|          | 0/30 [00:00<?, ?it/s]

[25] Train loss: 0.8387377203


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.8471, Accuracy: 66/100 ( 66%)



  0%|          | 0/30 [00:00<?, ?it/s]

[26] Train loss: 0.8376924266


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.7900, Accuracy: 70/100 ( 70%)



  0%|          | 0/30 [00:00<?, ?it/s]

[27] Train loss: 0.8472149998


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.8106, Accuracy: 75/100 ( 75%)



  0%|          | 0/30 [00:00<?, ?it/s]

[28] Train loss: 0.7705419769


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.7805, Accuracy: 67/100 ( 67%)



  0%|          | 0/30 [00:00<?, ?it/s]

[29] Train loss: 0.7783818394


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.6180, Accuracy: 80/100 ( 80%)



  0%|          | 0/30 [00:00<?, ?it/s]

[30] Train loss: 0.6191361070


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.6158, Accuracy: 83/100 ( 83%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[31] Train loss: 0.6937858765


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.6710, Accuracy: 73/100 ( 73%)



  0%|          | 0/30 [00:00<?, ?it/s]

[32] Train loss: 0.6880421455


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.5749, Accuracy: 80/100 ( 80%)



  0%|          | 0/30 [00:00<?, ?it/s]

[33] Train loss: 0.6238926897


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.7134, Accuracy: 75/100 ( 75%)



  0%|          | 0/30 [00:00<?, ?it/s]

[34] Train loss: 0.6175832580


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.5276, Accuracy: 86/100 ( 86%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[35] Train loss: 0.5676517049


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.6505, Accuracy: 79/100 ( 79%)



  0%|          | 0/30 [00:00<?, ?it/s]

[36] Train loss: 0.5838881562


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.6750, Accuracy: 76/100 ( 76%)



  0%|          | 0/30 [00:00<?, ?it/s]

[37] Train loss: 0.5579695751


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.5396, Accuracy: 80/100 ( 80%)



  0%|          | 0/30 [00:00<?, ?it/s]

[38] Train loss: 0.5737655344


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.5650, Accuracy: 82/100 ( 82%)



  0%|          | 0/30 [00:00<?, ?it/s]

[39] Train loss: 0.5158094421


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4527, Accuracy: 88/100 ( 88%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[40] Train loss: 0.4976610991


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4984, Accuracy: 82/100 ( 82%)



  0%|          | 0/30 [00:00<?, ?it/s]

[41] Train loss: 0.5083172247


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.5081, Accuracy: 85/100 ( 85%)



  0%|          | 0/30 [00:00<?, ?it/s]

[42] Train loss: 0.4784856188


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4421, Accuracy: 86/100 ( 86%)



  0%|          | 0/30 [00:00<?, ?it/s]

[43] Train loss: 0.4789683113


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4961, Accuracy: 83/100 ( 83%)



  0%|          | 0/30 [00:00<?, ?it/s]

[44] Train loss: 0.4673380663


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.6366, Accuracy: 76/100 ( 76%)



  0%|          | 0/30 [00:00<?, ?it/s]

[45] Train loss: 0.4653656503


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4803, Accuracy: 84/100 ( 84%)



  0%|          | 0/30 [00:00<?, ?it/s]

[46] Train loss: 0.4059899723


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4615, Accuracy: 86/100 ( 86%)



  0%|          | 0/30 [00:00<?, ?it/s]

[47] Train loss: 0.4029275412


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4244, Accuracy: 84/100 ( 84%)



  0%|          | 0/30 [00:00<?, ?it/s]

[48] Train loss: 0.3714446979


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4739, Accuracy: 85/100 ( 85%)



  0%|          | 0/30 [00:00<?, ?it/s]

[49] Train loss: 0.3876930632


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3839, Accuracy: 87/100 ( 87%)



  0%|          | 0/30 [00:00<?, ?it/s]

[50] Train loss: 0.3570684448


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4345, Accuracy: 86/100 ( 86%)



  0%|          | 0/30 [00:00<?, ?it/s]

[51] Train loss: 0.3640248078


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4689, Accuracy: 82/100 ( 82%)



  0%|          | 0/30 [00:00<?, ?it/s]

[52] Train loss: 0.3460342055


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.5002, Accuracy: 83/100 ( 83%)



  0%|          | 0/30 [00:00<?, ?it/s]

[53] Train loss: 0.3518517837


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.5347, Accuracy: 80/100 ( 80%)



  0%|          | 0/30 [00:00<?, ?it/s]

[54] Train loss: 0.3219190764


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4411, Accuracy: 82/100 ( 82%)



  0%|          | 0/30 [00:00<?, ?it/s]

[55] Train loss: 0.3408012961


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4975, Accuracy: 84/100 ( 84%)



  0%|          | 0/30 [00:00<?, ?it/s]

[56] Train loss: 0.3211163302


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3396, Accuracy: 89/100 ( 89%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[57] Train loss: 0.2844566745


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3955, Accuracy: 86/100 ( 86%)



  0%|          | 0/30 [00:00<?, ?it/s]

[58] Train loss: 0.2899444453


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4366, Accuracy: 82/100 ( 82%)



  0%|          | 0/30 [00:00<?, ?it/s]

[59] Train loss: 0.3221716384


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3092, Accuracy: 91/100 ( 91%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[60] Train loss: 0.3223668925


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3859, Accuracy: 82/100 ( 82%)



  0%|          | 0/30 [00:00<?, ?it/s]

[61] Train loss: 0.2880925955


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3452, Accuracy: 89/100 ( 89%)



  0%|          | 0/30 [00:00<?, ?it/s]

[62] Train loss: 0.2718455995


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4031, Accuracy: 85/100 ( 85%)



  0%|          | 0/30 [00:00<?, ?it/s]

[63] Train loss: 0.3016680213


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4814, Accuracy: 83/100 ( 83%)



  0%|          | 0/30 [00:00<?, ?it/s]

[64] Train loss: 0.2596426005


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3518, Accuracy: 90/100 ( 90%)



  0%|          | 0/30 [00:00<?, ?it/s]

[65] Train loss: 0.2370346406


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3027, Accuracy: 91/100 ( 91%)



  0%|          | 0/30 [00:00<?, ?it/s]

[66] Train loss: 0.2386747564


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4280, Accuracy: 87/100 ( 87%)



  0%|          | 0/30 [00:00<?, ?it/s]

[67] Train loss: 0.2264135311


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3310, Accuracy: 87/100 ( 87%)



  0%|          | 0/30 [00:00<?, ?it/s]

[68] Train loss: 0.2355348413


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3020, Accuracy: 84/100 ( 84%)



  0%|          | 0/30 [00:00<?, ?it/s]

[69] Train loss: 0.2111841398


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3031, Accuracy: 92/100 ( 92%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[70] Train loss: 0.2635653435


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3228, Accuracy: 89/100 ( 89%)



  0%|          | 0/30 [00:00<?, ?it/s]

[71] Train loss: 0.2193371275


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.2980, Accuracy: 88/100 ( 88%)



  0%|          | 0/30 [00:00<?, ?it/s]

[72] Train loss: 0.1903834011


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3763, Accuracy: 88/100 ( 88%)



  0%|          | 0/30 [00:00<?, ?it/s]

[73] Train loss: 0.1974740612


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.2765, Accuracy: 91/100 ( 91%)



  0%|          | 0/30 [00:00<?, ?it/s]

[74] Train loss: 0.1855253354


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.6954, Accuracy: 76/100 ( 76%)



  0%|          | 0/30 [00:00<?, ?it/s]

[75] Train loss: 0.1876972736


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3178, Accuracy: 88/100 ( 88%)



  0%|          | 0/30 [00:00<?, ?it/s]

[76] Train loss: 0.1773935303


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3139, Accuracy: 88/100 ( 88%)



  0%|          | 0/30 [00:00<?, ?it/s]

[77] Train loss: 0.1818688929


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.2622, Accuracy: 91/100 ( 91%)



  0%|          | 0/30 [00:00<?, ?it/s]

[78] Train loss: 0.1717766230


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.2729, Accuracy: 91/100 ( 91%)



  0%|          | 0/30 [00:00<?, ?it/s]

[79] Train loss: 0.1680134704


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.2924, Accuracy: 89/100 ( 89%)



  0%|          | 0/30 [00:00<?, ?it/s]

[80] Train loss: 0.1575129783


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3505, Accuracy: 87/100 ( 87%)



  0%|          | 0/30 [00:00<?, ?it/s]

[81] Train loss: 0.1464814795


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3736, Accuracy: 87/100 ( 87%)



  0%|          | 0/30 [00:00<?, ?it/s]

[82] Train loss: 0.1504773891


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.2782, Accuracy: 87/100 ( 87%)



  0%|          | 0/30 [00:00<?, ?it/s]

[83] Train loss: 0.1238578377


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.2791, Accuracy: 90/100 ( 90%)



  0%|          | 0/30 [00:00<?, ?it/s]

[84] Train loss: 0.1178614181


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.2965, Accuracy: 92/100 ( 92%)



  0%|          | 0/30 [00:00<?, ?it/s]

[85] Train loss: 0.1257531663


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3231, Accuracy: 90/100 ( 90%)



  0%|          | 0/30 [00:00<?, ?it/s]

[86] Train loss: 0.1282689271


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.2778, Accuracy: 90/100 ( 90%)



  0%|          | 0/30 [00:00<?, ?it/s]

[87] Train loss: 0.1136558370


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3050, Accuracy: 87/100 ( 87%)



  0%|          | 0/30 [00:00<?, ?it/s]

[88] Train loss: 0.1558275886


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.2822, Accuracy: 88/100 ( 88%)



  0%|          | 0/30 [00:00<?, ?it/s]

[89] Train loss: 0.0983699011


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3291, Accuracy: 88/100 ( 88%)



  0%|          | 0/30 [00:00<?, ?it/s]

[90] Train loss: 0.1078492587


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3422, Accuracy: 87/100 ( 87%)



  0%|          | 0/30 [00:00<?, ?it/s]

[91] Train loss: 0.1049316925


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4602, Accuracy: 84/100 ( 84%)



  0%|          | 0/30 [00:00<?, ?it/s]

[92] Train loss: 0.1263305585


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3142, Accuracy: 90/100 ( 90%)



  0%|          | 0/30 [00:00<?, ?it/s]

[93] Train loss: 0.1497585818


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3397, Accuracy: 88/100 ( 88%)



  0%|          | 0/30 [00:00<?, ?it/s]

[94] Train loss: 0.0913286352


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3251, Accuracy: 88/100 ( 88%)



  0%|          | 0/30 [00:00<?, ?it/s]

[95] Train loss: 0.1067970553


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3040, Accuracy: 91/100 ( 91%)



  0%|          | 0/30 [00:00<?, ?it/s]

[96] Train loss: 0.0859088713


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.2139, Accuracy: 93/100 ( 93%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[97] Train loss: 0.0888778885


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.2738, Accuracy: 88/100 ( 88%)



  0%|          | 0/30 [00:00<?, ?it/s]

[98] Train loss: 0.0685690289


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.2755, Accuracy: 90/100 ( 90%)



  0%|          | 0/30 [00:00<?, ?it/s]

[99] Train loss: 0.1195450066


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.2697, Accuracy: 86/100 ( 86%)



In [30]:
test_mfccs = preprocess_dataset(test_x)
test_mfccs = np.array(test_mfccs)
test_mfccs = test_mfccs.reshape(-1, test_mfccs.shape[1], test_mfccs.shape[2], 1)

In [31]:
test_mfccs.shape

(200, 65, 12, 1)

In [32]:
def predict(model, test_loader, device):
    model.eval()
    model_pred = []
    with torch.no_grad():
        for wav in tqdm(iter(test_loader)):
            wav = wav.to(device)

            pred_logit = model(wav)
            pred_logit = pred_logit.argmax(dim=1, keepdim=True).squeeze(1)

            model_pred.extend(pred_logit.tolist())
    return model_pred

In [33]:
test_dataset = CustomDataset(X=test_mfccs, y= None, train_mode=False)
test_loader = DataLoader(test_dataset, batch_size = batch_size, shuffle=False)

In [34]:
# Validation Accuracy가 가장 뛰어난 모델을 불러옵니다.
checkpoint = torch.load('/content/drive/MyDrive/DL/sound_classify/saved/best_model2.pth')
model = CNNclassification().to(device)
model.load_state_dict(checkpoint)

# Inference
preds = predict(model, test_loader, device)
preds[0:5]

  0%|          | 0/20 [00:00<?, ?it/s]

[2, 7, 8, 1, 8]

In [35]:
len(preds)

200

In [36]:
test_wav['label'] = preds
test_wav = test_wav[['file_name', 'label']]

pred_df = test_wav.copy()
pred_df = pred_df.sort_values(by=[pred_df.columns[0]], ascending=[True]).reset_index(drop=True)
pred_df.head()

Unnamed: 0,file_name,label
0,003.wav,0
1,008.wav,9
2,010.wav,8
3,015.wav,8
4,024.wav,2


In [37]:
submission = pd.read_csv('/content/drive/MyDrive/DL/sound_classify/submission.csv')
submission['label'] = pred_df['label']
submission.head()

Unnamed: 0,file_name,label
0,003.wav,0
1,008.wav,9
2,010.wav,8
3,015.wav,8
4,024.wav,2


In [38]:
submission.to_csv('/content/drive/MyDrive/DL/sound_classify/submit2-6.csv', index=False)