<a href="https://colab.research.google.com/github/JellyJoa/DataAnalysis/blob/master/Dacon/sound_classify/baseline2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import librosa 
import librosa.display as dsp
from IPython.display import Audio
import pandas as pd
import numpy as np
from tqdm import tqdm
import os
import torch

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [2]:
import random

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(929)

In [3]:
train = pd.read_csv('/content/drive/MyDrive/DL/sound_classify/train.csv')
train.head()

Unnamed: 0,file_name,label
0,001.wav,9
1,002.wav,0
2,004.wav,1
3,005.wav,8
4,006.wav,0


In [4]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   file_name  400 non-null    object
 1   label      400 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 6.4+ KB


In [5]:
data, sample_rate = librosa.load('/content/drive/MyDrive/DL/sound_classify/train/001.wav', sr = 16000)
print('sample_rate:', sample_rate, ', audio shape:', data.shape)
print('length:', data.shape[0]/float(sample_rate), 'secs')

sample_rate: 16000 , audio shape: (10192,)
length: 0.637 secs


In [8]:
def train_dataset():
    folder = "/content/drive/MyDrive/DL/sound_classify/train/"
    dataset = []
    for file in tqdm(os.listdir(folder),colour='green'):
        if 'wav' in file:
            abs_file_path = os.path.join(folder,file)
            data, sr = librosa.load(abs_file_path, sr = 16000)
            class_label = int(train[train.file_name == file].label)
            dataset.append([data,class_label])
    
    print("Dataset 생성 완료")
    return pd.DataFrame(dataset,columns=['data','label'])

In [9]:
def test_dataset():
    folder = "/content/drive/MyDrive/DL/sound_classify/test/"
    dataset = []
    for file in tqdm(os.listdir(folder),colour='green'):
        if 'wav' in file:
            abs_file_path = os.path.join(folder,file)
            data, sr = librosa.load(abs_file_path, sr = 16000)
            
            dataset.append([data, file])
    
    print("Dataset 생성 완료")
    return pd.DataFrame(dataset,columns=['data', 'file_name'])

In [10]:
train_wav = train_dataset()
test_wav = test_dataset()

100%|[32m██████████[0m| 400/400 [00:14<00:00, 26.69it/s]


Dataset 생성 완료


100%|[32m██████████[0m| 200/200 [00:07<00:00, 28.53it/s]

Dataset 생성 완료





In [11]:
train_wav.head()

Unnamed: 0,data,label
0,"[3.6655838e-05, -3.7366447e-06, 3.4776433e-05,...",5
1,"[0.00011985076, 0.00016174652, 0.00017246709, ...",9
2,"[1.2653453e-05, 2.3892262e-05, -7.51332e-06, 4...",0
3,"[-0.00010586961, -0.00020532755, -0.0002140045...",7
4,"[-4.289015e-05, 9.891299e-05, 2.6636611e-05, 0...",4


In [12]:
train_x = np.array(train_wav.data)
test_x = np.array(test_wav.data)

In [13]:
def get_mini(data):

    mini = 9999999
    for i in data:
        if len(i) < mini:
            mini = len(i)

    return mini

train_mini = get_mini(train_x)
test_mini = get_mini(test_x)

#음성들의 길이를 맞춰줍니다.

mini = np.min([train_mini, test_mini])

In [14]:
print('가장 작은 길이 :', mini)

가장 작은 길이 : 5711


In [15]:
def set_length(data, d_mini):

    result = []
    for i in data:
        result.append(i[:d_mini])
    result = np.array(result)

    return result

train_x = set_length(train_x, mini)
test_x = set_length(test_x, mini)

In [16]:
print('train :', train_x.shape)
print('test :', test_x.shape)

train : (400, 5711)
test : (200, 5711)


In [17]:
extracted_features = librosa.feature.mfcc(y=train_x[0], sr=16000, n_mfcc=40)
extracted_features.shape

(40, 12)

In [18]:
def preprocess_dataset(data):
    mfccs = []
    for i in data:
        extracted_features = librosa.feature.mfcc(y=i,
                                              sr=16000,
                                              n_mfcc=40)
        mfccs.append(extracted_features)
            
    return mfccs

In [19]:
train_mfccs = preprocess_dataset(train_x)
train_mfccs = np.array(train_mfccs)
train_mfccs = train_mfccs.reshape(-1, train_mfccs.shape[1], train_mfccs.shape[2], 1)
#test_x = test_x.reshape(-1, test_x.shape[1], test_x.shape[2], 1)

In [20]:
np.array(train_mfccs).shape

(400, 40, 12, 1)

In [21]:
import torchvision.datasets as datasets # 데이터셋 집합체
import torchvision.transforms as transforms # 변환 툴

from torch.utils.data import DataLoader # 학습 및 배치로 모델에 넣어주기 위한 툴
from torch.utils.data import DataLoader, Dataset

class CustomDataset(Dataset):
    def __init__(self, X, y, train_mode=True, transforms=None): #필요한 변수들을 선언
        self.X = X
        self.y = y
        self.train_mode = train_mode
        self.transforms = transforms

    def __getitem__(self, index): #index번째 data를 return
        X = self.X[index]
        
        if self.transforms is not None:
            X = self.transforms(X)

        if self.train_mode:
            y = self.y[index]
            return X, y
        else:
            return X
    
    def __len__(self): #길이 return
        return len(self.X)

In [22]:
train_X = train_mfccs[:300]
vali_X = train_mfccs[300:]

In [23]:
train_y = train_wav.label[:300]
vali_y = train_wav.label[300:].reset_index(drop = True)

In [24]:
# 에포크 설정
num_epochs = 100

# 배치 사이즈 설정
batch_size = 10

#만든 train dataset를 DataLoader에 넣어 batch 만들기
train_dataset = CustomDataset(X=train_X, y=train_y)
train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle=True)

vali_dataset = CustomDataset(X=vali_X, y=vali_y)
vali_loader = DataLoader(vali_dataset, batch_size = batch_size, shuffle=False)

In [25]:
train_batches = len(train_loader)
vali_batches = len(vali_loader)

print('/ total train batches :', train_batches)
print('/ total valid batches :', vali_batches)

/ total train batches : 30
/ total valid batches : 10


In [26]:
from tqdm.auto import tqdm
import torch.nn as nn # 신경망들이 포함됨

class CNNclassification(torch.nn.Module):
    def __init__(self):
        super(CNNclassification, self).__init__()
        self.layer1 = torch.nn.Sequential(
            nn.Conv2d(40, 10, kernel_size=2, stride=1, padding=1), #cnn layer
            nn.ReLU(), #activation function
            nn.MaxPool2d(kernel_size=2, stride=2)) #pooling layer
        
        self.layer2 = torch.nn.Sequential(
            nn.Conv2d(10, 100, kernel_size=2, stride=1, padding=1), #cnn layer
            nn.ReLU(), #activation function
            nn.MaxPool2d(kernel_size=2, stride=2)) #pooling layer
        
        self.layer3 = torch.nn.Sequential(
            nn.Conv2d(100, 200, kernel_size=2, stride=1, padding=1), #cnn layer
            nn.ReLU(), #activation function
            nn.MaxPool2d(kernel_size=2, stride=2)) #pooling layer
        
        self.layer4 = torch.nn.Sequential(
            nn.Conv2d(200, 300, kernel_size=2, stride=1, padding=1), #cnn layer
            nn.ReLU(), #activation function
            nn.MaxPool2d(kernel_size=2, stride=2)) #pooling layer
        
        self.fc_layer = nn.Sequential( 
            nn.Linear(300, 10) #fully connected layer(ouput layer)
        )    
        
    def forward(self, x):
        
        x = self.layer1(x) #1층
        
        x = self.layer2(x) #2층
         
        x = self.layer3(x) #3층
        
        x = self.layer4(x) #4층
        
        x = torch.flatten(x, start_dim=1) # N차원 배열 -> 1차원 배열
        
        out = self.fc_layer(x)
        return out

In [27]:
import torch.optim as optim # 최적화 알고리즘들이 포함힘

model = CNNclassification().to(device)
criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(params = model.parameters(), lr = 1e-3 )
scheduler = None

In [28]:
model(torch.rand(10, 40, 12, 1).to(device))

tensor([[ 6.3272e-02,  2.6929e-02, -9.6571e-03, -5.1214e-02,  1.6838e-02,
         -5.0106e-02, -5.2381e-02,  1.3524e-03,  4.0278e-02,  4.3555e-02],
        [ 6.1824e-02,  2.7923e-02, -1.1238e-02, -5.1971e-02,  1.5232e-02,
         -5.0787e-02, -5.2307e-02,  2.1474e-03,  3.7560e-02,  4.4356e-02],
        [ 6.1668e-02,  2.5830e-02, -1.0102e-02, -4.9540e-02,  1.5662e-02,
         -5.3129e-02, -5.3434e-02,  2.5749e-03,  3.8416e-02,  4.4608e-02],
        [ 6.3229e-02,  2.9607e-02, -1.0866e-02, -5.2343e-02,  1.5266e-02,
         -5.0899e-02, -5.2491e-02,  3.6425e-03,  3.8196e-02,  4.4868e-02],
        [ 6.3534e-02,  2.5505e-02, -1.0897e-02, -5.0993e-02,  1.7650e-02,
         -5.3773e-02, -5.1939e-02, -2.8022e-04,  3.5802e-02,  4.3764e-02],
        [ 6.3745e-02,  2.7125e-02, -1.0257e-02, -5.2642e-02,  1.4189e-02,
         -5.3396e-02, -5.3332e-02,  3.5185e-05,  3.8465e-02,  4.5951e-02],
        [ 6.3431e-02,  2.6569e-02, -1.1134e-02, -5.2119e-02,  1.5154e-02,
         -5.3788e-02, -5.2260e-0

In [29]:
from tqdm.auto import tqdm

def train(model, optimizer, train_loader, scheduler, device): 
    model.to(device)
    n = len(train_loader)
    best_acc = 0
    
    for epoch in range(1,num_epochs): #에포크 설정
        model.train() #모델 학습
        running_loss = 0.0
        
        for wav, label in tqdm(iter(train_loader)):
            
            wav, label = wav.to(device), label.to(device) #배치 데이터
            optimizer.zero_grad() #배치마다 optimizer 초기화
        
            # Data -> Model -> Output
            logit = model(wav) #예측값 산출
            loss = criterion(logit, label) #손실함수 계산
            
            # 역전파
            loss.backward() #손실함수 기준 역전파 
            optimizer.step() #가중치 최적화
            running_loss += loss.item()
             
        print('[%d] Train loss: %.10f' %(epoch, running_loss / len(train_loader)))
        
        if scheduler is not None:
            scheduler.step()
            
            
        #Validation set 평가
        model.eval() #evaluation 과정에서 사용하지 않아야 하는 layer들을 알아서 off 시키도록 하는 함수
        vali_loss = 0.0
        correct = 0
       
        with torch.no_grad(): #파라미터 업데이트 안하기 때문에 no_grad 사용
            for wav, label in tqdm(iter(vali_loader)):
                
                wav, label = wav.to(device), label.to(device)
                logit = model(wav)
                vali_loss += criterion(logit, label)
                pred = logit.argmax(dim=1, keepdim=True)  #10개의 class중 가장 값이 높은 것을 예측 label로 추출
                correct += pred.eq(label.view_as(pred)).sum().item() #예측값과 실제값이 맞으면 1 아니면 0으로 합산
        vali_acc = 100 * correct / len(vali_loader.dataset)
        print('Vail set: Loss: {:.4f}, Accuracy: {}/{} ( {:.0f}%)\n'.format(vali_loss / len(vali_loader), correct, len(vali_loader.dataset), 100 * correct / len(vali_loader.dataset)))
        
        #베스트 모델 저장
        if best_acc < vali_acc:
            best_acc = vali_acc
            torch.save(model.state_dict(), '/content/drive/MyDrive/DL/sound_classify/saved/best_model2.pth') #이 디렉토리에 best_model.pth을 저장
            print('Model Saved.')

In [30]:
train(model, optimizer, train_loader, scheduler, device)

  0%|          | 0/30 [00:00<?, ?it/s]

[1] Train loss: 2.5924017747


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 2.3013, Accuracy: 16/100 ( 16%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[2] Train loss: 2.2492518187


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 2.2676, Accuracy: 12/100 ( 12%)



  0%|          | 0/30 [00:00<?, ?it/s]

[3] Train loss: 2.1173717181


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 2.1658, Accuracy: 16/100 ( 16%)



  0%|          | 0/30 [00:00<?, ?it/s]

[4] Train loss: 2.0677159150


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 2.0491, Accuracy: 25/100 ( 25%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[5] Train loss: 1.9878605445


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.9182, Accuracy: 40/100 ( 40%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[6] Train loss: 1.8859785438


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.8442, Accuracy: 33/100 ( 33%)



  0%|          | 0/30 [00:00<?, ?it/s]

[7] Train loss: 1.8266590397


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.7614, Accuracy: 35/100 ( 35%)



  0%|          | 0/30 [00:00<?, ?it/s]

[8] Train loss: 1.7214944998


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.5887, Accuracy: 53/100 ( 53%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[9] Train loss: 1.6304380298


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.7465, Accuracy: 31/100 ( 31%)



  0%|          | 0/30 [00:00<?, ?it/s]

[10] Train loss: 1.5381288886


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.7247, Accuracy: 30/100 ( 30%)



  0%|          | 0/30 [00:00<?, ?it/s]

[11] Train loss: 1.5251904647


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.5236, Accuracy: 37/100 ( 37%)



  0%|          | 0/30 [00:00<?, ?it/s]

[12] Train loss: 1.4105582595


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.5011, Accuracy: 52/100 ( 52%)



  0%|          | 0/30 [00:00<?, ?it/s]

[13] Train loss: 1.3980938633


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.2903, Accuracy: 49/100 ( 49%)



  0%|          | 0/30 [00:00<?, ?it/s]

[14] Train loss: 1.2971182883


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.2370, Accuracy: 56/100 ( 56%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[15] Train loss: 1.2607957443


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.3332, Accuracy: 41/100 ( 41%)



  0%|          | 0/30 [00:00<?, ?it/s]

[16] Train loss: 1.2510573765


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.2468, Accuracy: 58/100 ( 58%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[17] Train loss: 1.1636648893


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.0883, Accuracy: 60/100 ( 60%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[18] Train loss: 1.1705242276


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.1391, Accuracy: 57/100 ( 57%)



  0%|          | 0/30 [00:00<?, ?it/s]

[19] Train loss: 1.0909985423


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.1564, Accuracy: 50/100 ( 50%)



  0%|          | 0/30 [00:00<?, ?it/s]

[20] Train loss: 1.0616695722


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.1009, Accuracy: 56/100 ( 56%)



  0%|          | 0/30 [00:00<?, ?it/s]

[21] Train loss: 1.0655732652


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.1700, Accuracy: 49/100 ( 49%)



  0%|          | 0/30 [00:00<?, ?it/s]

[22] Train loss: 1.0107599874


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.9391, Accuracy: 66/100 ( 66%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[23] Train loss: 1.0060301950


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.4979, Accuracy: 38/100 ( 38%)



  0%|          | 0/30 [00:00<?, ?it/s]

[24] Train loss: 0.9979684154


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.9084, Accuracy: 66/100 ( 66%)



  0%|          | 0/30 [00:00<?, ?it/s]

[25] Train loss: 0.9269694785


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.8883, Accuracy: 68/100 ( 68%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[26] Train loss: 0.8974640896


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.9053, Accuracy: 66/100 ( 66%)



  0%|          | 0/30 [00:00<?, ?it/s]

[27] Train loss: 0.8743220796


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.8734, Accuracy: 64/100 ( 64%)



  0%|          | 0/30 [00:00<?, ?it/s]

[28] Train loss: 0.8033690969


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.7683, Accuracy: 74/100 ( 74%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[29] Train loss: 0.8121187647


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.9024, Accuracy: 62/100 ( 62%)



  0%|          | 0/30 [00:00<?, ?it/s]

[30] Train loss: 0.8133241783


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.7322, Accuracy: 77/100 ( 77%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[31] Train loss: 0.7826063842


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.7227, Accuracy: 77/100 ( 77%)



  0%|          | 0/30 [00:00<?, ?it/s]

[32] Train loss: 0.7702546348


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.7837, Accuracy: 70/100 ( 70%)



  0%|          | 0/30 [00:00<?, ?it/s]

[33] Train loss: 0.7475941092


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.7105, Accuracy: 71/100 ( 71%)



  0%|          | 0/30 [00:00<?, ?it/s]

[34] Train loss: 0.8052478641


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.6872, Accuracy: 77/100 ( 77%)



  0%|          | 0/30 [00:00<?, ?it/s]

[35] Train loss: 0.7251801997


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.7405, Accuracy: 71/100 ( 71%)



  0%|          | 0/30 [00:00<?, ?it/s]

[36] Train loss: 0.7505739977


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.6321, Accuracy: 80/100 ( 80%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[37] Train loss: 0.6441720515


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.6754, Accuracy: 79/100 ( 79%)



  0%|          | 0/30 [00:00<?, ?it/s]

[38] Train loss: 0.6863013585


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.6027, Accuracy: 81/100 ( 81%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[39] Train loss: 0.6354030897


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.5812, Accuracy: 82/100 ( 82%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[40] Train loss: 0.5999448354


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.5928, Accuracy: 80/100 ( 80%)



  0%|          | 0/30 [00:00<?, ?it/s]

[41] Train loss: 0.5760050317


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.6062, Accuracy: 77/100 ( 77%)



  0%|          | 0/30 [00:00<?, ?it/s]

[42] Train loss: 0.5761045963


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.6873, Accuracy: 68/100 ( 68%)



  0%|          | 0/30 [00:00<?, ?it/s]

[43] Train loss: 0.5812921797


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.5048, Accuracy: 85/100 ( 85%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[44] Train loss: 0.5603254378


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4932, Accuracy: 87/100 ( 87%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[45] Train loss: 0.5095250458


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4765, Accuracy: 89/100 ( 89%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[46] Train loss: 0.5227480431


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.5491, Accuracy: 78/100 ( 78%)



  0%|          | 0/30 [00:00<?, ?it/s]

[47] Train loss: 0.4795126833


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.6186, Accuracy: 81/100 ( 81%)



  0%|          | 0/30 [00:00<?, ?it/s]

[48] Train loss: 0.4816179514


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.5027, Accuracy: 83/100 ( 83%)



  0%|          | 0/30 [00:00<?, ?it/s]

[49] Train loss: 0.4670706893


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4080, Accuracy: 92/100 ( 92%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[50] Train loss: 0.4733458842


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.7296, Accuracy: 70/100 ( 70%)



  0%|          | 0/30 [00:00<?, ?it/s]

[51] Train loss: 0.4872426239


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.5291, Accuracy: 83/100 ( 83%)



  0%|          | 0/30 [00:00<?, ?it/s]

[52] Train loss: 0.4544530051


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4165, Accuracy: 88/100 ( 88%)



  0%|          | 0/30 [00:00<?, ?it/s]

[53] Train loss: 0.4461722409


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3694, Accuracy: 92/100 ( 92%)



  0%|          | 0/30 [00:00<?, ?it/s]

[54] Train loss: 0.4579913596


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4001, Accuracy: 89/100 ( 89%)



  0%|          | 0/30 [00:00<?, ?it/s]

[55] Train loss: 0.3510104105


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.5209, Accuracy: 79/100 ( 79%)



  0%|          | 0/30 [00:00<?, ?it/s]

[56] Train loss: 0.3694964019


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3996, Accuracy: 89/100 ( 89%)



  0%|          | 0/30 [00:00<?, ?it/s]

[57] Train loss: 0.4416899363


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3569, Accuracy: 88/100 ( 88%)



  0%|          | 0/30 [00:00<?, ?it/s]

[58] Train loss: 0.3799407577


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4220, Accuracy: 86/100 ( 86%)



  0%|          | 0/30 [00:00<?, ?it/s]

[59] Train loss: 0.3599679492


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3683, Accuracy: 88/100 ( 88%)



  0%|          | 0/30 [00:00<?, ?it/s]

[60] Train loss: 0.3379948512


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3519, Accuracy: 90/100 ( 90%)



  0%|          | 0/30 [00:00<?, ?it/s]

[61] Train loss: 0.3118570119


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3210, Accuracy: 93/100 ( 93%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[62] Train loss: 0.3359276921


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3874, Accuracy: 85/100 ( 85%)



  0%|          | 0/30 [00:00<?, ?it/s]

[63] Train loss: 0.3601077373


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3605, Accuracy: 90/100 ( 90%)



  0%|          | 0/30 [00:00<?, ?it/s]

[64] Train loss: 0.2950961401


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3299, Accuracy: 90/100 ( 90%)



  0%|          | 0/30 [00:00<?, ?it/s]

[65] Train loss: 0.3841810949


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4826, Accuracy: 81/100 ( 81%)



  0%|          | 0/30 [00:00<?, ?it/s]

[66] Train loss: 0.3066130333


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4339, Accuracy: 84/100 ( 84%)



  0%|          | 0/30 [00:00<?, ?it/s]

[67] Train loss: 0.2818695438


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3583, Accuracy: 90/100 ( 90%)



  0%|          | 0/30 [00:00<?, ?it/s]

[68] Train loss: 0.2912230107


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3349, Accuracy: 91/100 ( 91%)



  0%|          | 0/30 [00:00<?, ?it/s]

[69] Train loss: 0.2943951593


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3888, Accuracy: 85/100 ( 85%)



  0%|          | 0/30 [00:00<?, ?it/s]

[70] Train loss: 0.3212657424


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3218, Accuracy: 91/100 ( 91%)



  0%|          | 0/30 [00:00<?, ?it/s]

[71] Train loss: 0.2923257717


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3956, Accuracy: 86/100 ( 86%)



  0%|          | 0/30 [00:00<?, ?it/s]

[72] Train loss: 0.2345375789


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.2796, Accuracy: 93/100 ( 93%)



  0%|          | 0/30 [00:00<?, ?it/s]

[73] Train loss: 0.2579262833


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3061, Accuracy: 91/100 ( 91%)



  0%|          | 0/30 [00:00<?, ?it/s]

[74] Train loss: 0.2354985046


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.2688, Accuracy: 91/100 ( 91%)



  0%|          | 0/30 [00:00<?, ?it/s]

[75] Train loss: 0.2210858146


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.2971, Accuracy: 89/100 ( 89%)



  0%|          | 0/30 [00:00<?, ?it/s]

[76] Train loss: 0.2645488746


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3369, Accuracy: 87/100 ( 87%)



  0%|          | 0/30 [00:00<?, ?it/s]

[77] Train loss: 0.2339456821


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.2601, Accuracy: 90/100 ( 90%)



  0%|          | 0/30 [00:00<?, ?it/s]

[78] Train loss: 0.2751164693


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4407, Accuracy: 82/100 ( 82%)



  0%|          | 0/30 [00:00<?, ?it/s]

[79] Train loss: 0.2166946876


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.2525, Accuracy: 92/100 ( 92%)



  0%|          | 0/30 [00:00<?, ?it/s]

[80] Train loss: 0.1962779673


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4812, Accuracy: 81/100 ( 81%)



  0%|          | 0/30 [00:00<?, ?it/s]

[81] Train loss: 0.2310758147


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.2886, Accuracy: 90/100 ( 90%)



  0%|          | 0/30 [00:00<?, ?it/s]

[82] Train loss: 0.3395459656


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3095, Accuracy: 90/100 ( 90%)



  0%|          | 0/30 [00:00<?, ?it/s]

[83] Train loss: 0.2034221752


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3075, Accuracy: 91/100 ( 91%)



  0%|          | 0/30 [00:00<?, ?it/s]

[84] Train loss: 0.2237244765


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3707, Accuracy: 84/100 ( 84%)



  0%|          | 0/30 [00:00<?, ?it/s]

[85] Train loss: 0.1783669082


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3163, Accuracy: 90/100 ( 90%)



  0%|          | 0/30 [00:00<?, ?it/s]

[86] Train loss: 0.1683333704


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.2644, Accuracy: 91/100 ( 91%)



  0%|          | 0/30 [00:00<?, ?it/s]

[87] Train loss: 0.1980078752


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.2786, Accuracy: 91/100 ( 91%)



  0%|          | 0/30 [00:00<?, ?it/s]

[88] Train loss: 0.2171246336


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.2893, Accuracy: 89/100 ( 89%)



  0%|          | 0/30 [00:00<?, ?it/s]

[89] Train loss: 0.1613287783


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3367, Accuracy: 87/100 ( 87%)



  0%|          | 0/30 [00:00<?, ?it/s]

[90] Train loss: 0.1756707322


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3413, Accuracy: 85/100 ( 85%)



  0%|          | 0/30 [00:00<?, ?it/s]

[91] Train loss: 0.1402110785


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3523, Accuracy: 86/100 ( 86%)



  0%|          | 0/30 [00:00<?, ?it/s]

[92] Train loss: 0.2139775821


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3951, Accuracy: 87/100 ( 87%)



  0%|          | 0/30 [00:00<?, ?it/s]

[93] Train loss: 0.1547663381


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.2258, Accuracy: 92/100 ( 92%)



  0%|          | 0/30 [00:00<?, ?it/s]

[94] Train loss: 0.1198922024


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.2272, Accuracy: 94/100 ( 94%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[95] Train loss: 0.1492087671


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.2263, Accuracy: 93/100 ( 93%)



  0%|          | 0/30 [00:00<?, ?it/s]

[96] Train loss: 0.1328194169


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.2712, Accuracy: 91/100 ( 91%)



  0%|          | 0/30 [00:00<?, ?it/s]

[97] Train loss: 0.1140609040


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3248, Accuracy: 90/100 ( 90%)



  0%|          | 0/30 [00:00<?, ?it/s]

[98] Train loss: 0.1406602018


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3333, Accuracy: 89/100 ( 89%)



  0%|          | 0/30 [00:00<?, ?it/s]

[99] Train loss: 0.1159390463


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3191, Accuracy: 86/100 ( 86%)



In [31]:
test_mfccs = preprocess_dataset(test_x)
test_mfccs = np.array(test_mfccs)
test_mfccs = test_mfccs.reshape(-1, test_mfccs.shape[1], test_mfccs.shape[2], 1)

In [32]:
test_mfccs.shape

(200, 40, 12, 1)

In [33]:
def predict(model, test_loader, device):
    model.eval()
    model_pred = []
    with torch.no_grad():
        for wav in tqdm(iter(test_loader)):
            wav = wav.to(device)

            pred_logit = model(wav)
            pred_logit = pred_logit.argmax(dim=1, keepdim=True).squeeze(1)

            model_pred.extend(pred_logit.tolist())
    return model_pred

In [34]:
test_dataset = CustomDataset(X=test_mfccs, y= None, train_mode=False)
test_loader = DataLoader(test_dataset, batch_size = batch_size, shuffle=False)

In [35]:
# Validation Accuracy가 가장 뛰어난 모델을 불러옵니다.
checkpoint = torch.load('/content/drive/MyDrive/DL/sound_classify/saved/best_model2.pth')
model = CNNclassification().to(device)
model.load_state_dict(checkpoint)

# Inference
preds = predict(model, test_loader, device)
preds[0:5]

  0%|          | 0/20 [00:00<?, ?it/s]

[2, 7, 8, 1, 8]

In [36]:
len(preds)

200

In [37]:
test_wav['label'] = preds
test_wav = test_wav[['file_name', 'label']]

pred_df = test_wav.copy()
pred_df = pred_df.sort_values(by=[pred_df.columns[0]], ascending=[True]).reset_index(drop=True)
pred_df.head()

Unnamed: 0,file_name,label
0,003.wav,0
1,008.wav,9
2,010.wav,8
3,015.wav,8
4,024.wav,2


In [38]:
submission = pd.read_csv('/content/drive/MyDrive/DL/sound_classify/submission.csv')
submission['label'] = pred_df['label']
submission.head()

Unnamed: 0,file_name,label
0,003.wav,0
1,008.wav,9
2,010.wav,8
3,015.wav,8
4,024.wav,2


In [39]:
submission.to_csv('/content/drive/MyDrive/DL/sound_classify/submit2.csv', index=False)