<a href="https://colab.research.google.com/github/JellyJoa/DataAnalysis/blob/master/Dacon/sound_classify/baseline2_insert.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import librosa 
import librosa.display as dsp
from IPython.display import Audio
import pandas as pd
import numpy as np
from tqdm import tqdm
import os
import torch

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [2]:
import random

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(929)

In [3]:
train = pd.read_csv('/content/drive/MyDrive/DL/sound_classify/train.csv')
train.head()

Unnamed: 0,file_name,label
0,001.wav,9
1,002.wav,0
2,004.wav,1
3,005.wav,8
4,006.wav,0


In [4]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   file_name  400 non-null    object
 1   label      400 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 6.4+ KB


In [5]:
data, sample_rate = librosa.load('/content/drive/MyDrive/DL/sound_classify/train/001.wav', sr = 16000)
print('sample_rate:', sample_rate, ', audio shape:', data.shape)
print('length:', data.shape[0]/float(sample_rate), 'secs')

sample_rate: 16000 , audio shape: (10192,)
length: 0.637 secs


In [6]:
def train_dataset():
    folder = "/content/drive/MyDrive/DL/sound_classify/train/"
    dataset = []
    for file in tqdm(os.listdir(folder),colour='green'):
        if 'wav' in file:
            abs_file_path = os.path.join(folder,file)
            data, sr = librosa.load(abs_file_path, sr = 16000)
            class_label = int(train[train.file_name == file].label)
            dataset.append([data,class_label])
    
    print("Dataset 생성 완료")
    return pd.DataFrame(dataset,columns=['data','label'])

In [7]:
def test_dataset():
    folder = "/content/drive/MyDrive/DL/sound_classify/test/"
    dataset = []
    for file in tqdm(os.listdir(folder),colour='green'):
        if 'wav' in file:
            abs_file_path = os.path.join(folder,file)
            data, sr = librosa.load(abs_file_path, sr = 16000)
            
            dataset.append([data, file])
    
    print("Dataset 생성 완료")
    return pd.DataFrame(dataset,columns=['data', 'file_name'])

In [8]:
train_wav = train_dataset()
test_wav = test_dataset()

100%|[32m██████████[0m| 400/400 [00:13<00:00, 30.60it/s]


Dataset 생성 완료


100%|[32m██████████[0m| 200/200 [00:06<00:00, 30.86it/s]

Dataset 생성 완료





In [9]:
train_wav.head()

Unnamed: 0,data,label
0,"[3.6655838e-05, -3.7366447e-06, 3.4776433e-05,...",5
1,"[0.00011985076, 0.00016174652, 0.00017246709, ...",9
2,"[1.2653453e-05, 2.3892262e-05, -7.51332e-06, 4...",0
3,"[-0.00010586961, -0.00020532755, -0.0002140045...",7
4,"[-4.289015e-05, 9.891299e-05, 2.6636611e-05, 0...",4


In [10]:
train_x = np.array(train_wav.data)
test_x = np.array(test_wav.data)

In [11]:
def get_mini(data):

    mini = 9999999
    for i in data:
        if len(i) < mini:
            mini = len(i)

    return mini

train_mini = get_mini(train_x)
test_mini = get_mini(test_x)

#음성들의 길이를 맞춰줍니다.

mini = np.min([train_mini, test_mini])

In [12]:
print('가장 작은 길이 :', mini)

가장 작은 길이 : 5711


In [13]:
def set_length(data, d_mini):

    result = []
    for i in data:
        result.append(i[:d_mini])
    result = np.array(result)

    return result

train_x = set_length(train_x, mini)
test_x = set_length(test_x, mini)

In [14]:
print('train :', train_x.shape)
print('test :', test_x.shape)

train : (400, 5711)
test : (200, 5711)


In [15]:
extracted_features = librosa.feature.mfcc(y=train_x[0], sr=16000, n_mfcc=70)
extracted_features.shape

(70, 12)

In [16]:
def preprocess_dataset(data):
    mfccs = []
    for i in data:
        extracted_features = librosa.feature.mfcc(y=i,
                                              sr=16000,
                                              n_mfcc=70)
        mfccs.append(extracted_features)
            
    return mfccs

In [17]:
train_mfccs = preprocess_dataset(train_x)
train_mfccs = np.array(train_mfccs)
train_mfccs = train_mfccs.reshape(-1, train_mfccs.shape[1], train_mfccs.shape[2], 1)
#test_x = test_x.reshape(-1, test_x.shape[1], test_x.shape[2], 1)

In [18]:
np.array(train_mfccs).shape

(400, 70, 12, 1)

In [19]:
import torchvision.datasets as datasets # 데이터셋 집합체
import torchvision.transforms as transforms # 변환 툴

from torch.utils.data import DataLoader # 학습 및 배치로 모델에 넣어주기 위한 툴
from torch.utils.data import DataLoader, Dataset

class CustomDataset(Dataset):
    def __init__(self, X, y, train_mode=True, transforms=None): #필요한 변수들을 선언
        self.X = X
        self.y = y
        self.train_mode = train_mode
        self.transforms = transforms

    def __getitem__(self, index): #index번째 data를 return
        X = self.X[index]
        
        if self.transforms is not None:
            X = self.transforms(X)

        if self.train_mode:
            y = self.y[index]
            return X, y
        else:
            return X
    
    def __len__(self): #길이 return
        return len(self.X)

In [20]:
train_X = train_mfccs[:300]
vali_X = train_mfccs[300:]

In [21]:
train_y = train_wav.label[:300]
vali_y = train_wav.label[300:].reset_index(drop = True)

In [22]:
# 에포크 설정
num_epochs = 100

# 배치 사이즈 설정
batch_size = 10

#만든 train dataset를 DataLoader에 넣어 batch 만들기
train_dataset = CustomDataset(X=train_X, y=train_y)
train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle=True)

vali_dataset = CustomDataset(X=vali_X, y=vali_y)
vali_loader = DataLoader(vali_dataset, batch_size = batch_size, shuffle=False)

In [23]:
train_batches = len(train_loader)
vali_batches = len(vali_loader)

print('/ total train batches :', train_batches)
print('/ total valid batches :', vali_batches)

/ total train batches : 30
/ total valid batches : 10


In [24]:
from tqdm.auto import tqdm
import torch.nn as nn # 신경망들이 포함됨

class CNNclassification(torch.nn.Module):
    def __init__(self):
        super(CNNclassification, self).__init__()
        self.layer1 = torch.nn.Sequential(
            nn.Conv2d(70, 10, kernel_size=2, stride=1, padding=1), #cnn layer
            nn.ReLU(), #activation function
            nn.MaxPool2d(kernel_size=2, stride=2)) #pooling layer
        
        self.layer2 = torch.nn.Sequential(
            nn.Conv2d(10, 100, kernel_size=2, stride=1, padding=1), #cnn layer
            nn.ReLU(), #activation function
            nn.MaxPool2d(kernel_size=2, stride=2)) #pooling layer
        
        self.layer3 = torch.nn.Sequential(
            nn.Conv2d(100, 200, kernel_size=2, stride=1, padding=1), #cnn layer
            nn.ReLU(), #activation function
            nn.MaxPool2d(kernel_size=2, stride=2)) #pooling layer
        
        self.layer4 = torch.nn.Sequential(
            nn.Conv2d(200, 300, kernel_size=2, stride=1, padding=1), #cnn layer
            nn.ReLU(), #activation function
            nn.MaxPool2d(kernel_size=2, stride=2)) #pooling layer
        
        self.fc_layer = nn.Sequential( 
            nn.Linear(300, 10) #fully connected layer(ouput layer)
        )    
        
    def forward(self, x):
        
        x = self.layer1(x) #1층
        
        x = self.layer2(x) #2층
         
        x = self.layer3(x) #3층
        
        x = self.layer4(x) #4층
        
        x = torch.flatten(x, start_dim=1) # N차원 배열 -> 1차원 배열
        
        out = self.fc_layer(x)
        return out

In [25]:
import torch.optim as optim # 최적화 알고리즘들이 포함힘

model = CNNclassification().to(device)
criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(params = model.parameters(), lr = 1e-3 )
scheduler = None

In [26]:
model(torch.rand(10, 70, 12, 1).to(device))

tensor([[-0.0478, -0.0317,  0.0406, -0.0161,  0.0001, -0.0476,  0.0251, -0.0209,
         -0.0593, -0.0148],
        [-0.0483, -0.0330,  0.0406, -0.0145,  0.0025, -0.0499,  0.0290, -0.0235,
         -0.0614, -0.0149],
        [-0.0508, -0.0320,  0.0389, -0.0144,  0.0034, -0.0501,  0.0258, -0.0231,
         -0.0595, -0.0129],
        [-0.0507, -0.0345,  0.0405, -0.0126,  0.0041, -0.0494,  0.0262, -0.0240,
         -0.0580, -0.0161],
        [-0.0493, -0.0345,  0.0396, -0.0128,  0.0003, -0.0488,  0.0254, -0.0256,
         -0.0600, -0.0145],
        [-0.0479, -0.0338,  0.0394, -0.0155,  0.0024, -0.0508,  0.0270, -0.0234,
         -0.0612, -0.0121],
        [-0.0508, -0.0336,  0.0398, -0.0148,  0.0007, -0.0491,  0.0259, -0.0240,
         -0.0602, -0.0116],
        [-0.0518, -0.0325,  0.0394, -0.0133, -0.0004, -0.0508,  0.0254, -0.0258,
         -0.0622, -0.0146],
        [-0.0484, -0.0332,  0.0384, -0.0149,  0.0031, -0.0460,  0.0260, -0.0229,
         -0.0595, -0.0157],
        [-0.0492, -

In [27]:
from tqdm.auto import tqdm

def train(model, optimizer, train_loader, scheduler, device): 
    model.to(device)
    n = len(train_loader)
    best_acc = 0
    
    for epoch in range(1,num_epochs): #에포크 설정
        model.train() #모델 학습
        running_loss = 0.0
        
        for wav, label in tqdm(iter(train_loader)):
            
            wav, label = wav.to(device), label.to(device) #배치 데이터
            optimizer.zero_grad() #배치마다 optimizer 초기화
        
            # Data -> Model -> Output
            logit = model(wav) #예측값 산출
            loss = criterion(logit, label) #손실함수 계산
            
            # 역전파
            loss.backward() #손실함수 기준 역전파 
            optimizer.step() #가중치 최적화
            running_loss += loss.item()
             
        print('[%d] Train loss: %.10f' %(epoch, running_loss / len(train_loader)))
        
        if scheduler is not None:
            scheduler.step()
            
            
        #Validation set 평가
        model.eval() #evaluation 과정에서 사용하지 않아야 하는 layer들을 알아서 off 시키도록 하는 함수
        vali_loss = 0.0
        correct = 0
       
        with torch.no_grad(): #파라미터 업데이트 안하기 때문에 no_grad 사용
            for wav, label in tqdm(iter(vali_loader)):
                
                wav, label = wav.to(device), label.to(device)
                logit = model(wav)
                vali_loss += criterion(logit, label)
                pred = logit.argmax(dim=1, keepdim=True)  #10개의 class중 가장 값이 높은 것을 예측 label로 추출
                correct += pred.eq(label.view_as(pred)).sum().item() #예측값과 실제값이 맞으면 1 아니면 0으로 합산
        vali_acc = 100 * correct / len(vali_loader.dataset)
        print('Vail set: Loss: {:.4f}, Accuracy: {}/{} ( {:.0f}%)\n'.format(vali_loss / len(vali_loader), correct, len(vali_loader.dataset), 100 * correct / len(vali_loader.dataset)))
        
        #베스트 모델 저장
        if best_acc < vali_acc:
            best_acc = vali_acc
            torch.save(model.state_dict(), '/content/drive/MyDrive/DL/sound_classify/saved/best_model2.pth') #이 디렉토리에 best_model.pth을 저장
            print('Model Saved.')

In [28]:
train(model, optimizer, train_loader, scheduler, device)

  0%|          | 0/30 [00:00<?, ?it/s]

[1] Train loss: 2.3487844865


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 2.3457, Accuracy: 11/100 ( 11%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[2] Train loss: 2.2462323984


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 2.2223, Accuracy: 12/100 ( 12%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[3] Train loss: 2.1737405459


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 2.2541, Accuracy: 11/100 ( 11%)



  0%|          | 0/30 [00:00<?, ?it/s]

[4] Train loss: 2.1119894067


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 2.0928, Accuracy: 25/100 ( 25%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[5] Train loss: 2.0374204278


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 2.0692, Accuracy: 26/100 ( 26%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[6] Train loss: 1.9645684918


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 2.0446, Accuracy: 26/100 ( 26%)



  0%|          | 0/30 [00:00<?, ?it/s]

[7] Train loss: 1.8857607206


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.8098, Accuracy: 41/100 ( 41%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[8] Train loss: 1.7816635648


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.6873, Accuracy: 46/100 ( 46%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[9] Train loss: 1.7166417559


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.7700, Accuracy: 34/100 ( 34%)



  0%|          | 0/30 [00:00<?, ?it/s]

[10] Train loss: 1.6155948321


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.5574, Accuracy: 40/100 ( 40%)



  0%|          | 0/30 [00:00<?, ?it/s]

[11] Train loss: 1.5693472246


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.6620, Accuracy: 35/100 ( 35%)



  0%|          | 0/30 [00:00<?, ?it/s]

[12] Train loss: 1.4823574781


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.4809, Accuracy: 36/100 ( 36%)



  0%|          | 0/30 [00:00<?, ?it/s]

[13] Train loss: 1.4386548042


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.3961, Accuracy: 46/100 ( 46%)



  0%|          | 0/30 [00:00<?, ?it/s]

[14] Train loss: 1.4356243988


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.1852, Accuracy: 65/100 ( 65%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[15] Train loss: 1.2605030994


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.4467, Accuracy: 47/100 ( 47%)



  0%|          | 0/30 [00:00<?, ?it/s]

[16] Train loss: 1.3113021195


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.1756, Accuracy: 51/100 ( 51%)



  0%|          | 0/30 [00:00<?, ?it/s]

[17] Train loss: 1.2657488346


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.2294, Accuracy: 46/100 ( 46%)



  0%|          | 0/30 [00:00<?, ?it/s]

[18] Train loss: 1.1808575193


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.2033, Accuracy: 52/100 ( 52%)



  0%|          | 0/30 [00:00<?, ?it/s]

[19] Train loss: 1.1357174595


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.9888, Accuracy: 65/100 ( 65%)



  0%|          | 0/30 [00:00<?, ?it/s]

[20] Train loss: 1.0990966340


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.0757, Accuracy: 57/100 ( 57%)



  0%|          | 0/30 [00:00<?, ?it/s]

[21] Train loss: 1.0869516472


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.1747, Accuracy: 51/100 ( 51%)



  0%|          | 0/30 [00:00<?, ?it/s]

[22] Train loss: 0.9897965074


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 1.0043, Accuracy: 58/100 ( 58%)



  0%|          | 0/30 [00:00<?, ?it/s]

[23] Train loss: 0.9839607040


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.9115, Accuracy: 63/100 ( 63%)



  0%|          | 0/30 [00:00<?, ?it/s]

[24] Train loss: 0.9247547885


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.9084, Accuracy: 68/100 ( 68%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[25] Train loss: 0.8924642235


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.7954, Accuracy: 72/100 ( 72%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[26] Train loss: 0.8914164474


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.9339, Accuracy: 65/100 ( 65%)



  0%|          | 0/30 [00:00<?, ?it/s]

[27] Train loss: 0.8843847166


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.7919, Accuracy: 74/100 ( 74%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[28] Train loss: 0.8290601204


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.7370, Accuracy: 77/100 ( 77%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[29] Train loss: 0.8343016942


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.7014, Accuracy: 75/100 ( 75%)



  0%|          | 0/30 [00:00<?, ?it/s]

[30] Train loss: 0.7776441574


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.6800, Accuracy: 81/100 ( 81%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[31] Train loss: 0.7399060279


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.8151, Accuracy: 65/100 ( 65%)



  0%|          | 0/30 [00:00<?, ?it/s]

[32] Train loss: 0.7879028459


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.6430, Accuracy: 77/100 ( 77%)



  0%|          | 0/30 [00:00<?, ?it/s]

[33] Train loss: 0.6968706886


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.7361, Accuracy: 70/100 ( 70%)



  0%|          | 0/30 [00:00<?, ?it/s]

[34] Train loss: 0.6727952510


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.6727, Accuracy: 78/100 ( 78%)



  0%|          | 0/30 [00:00<?, ?it/s]

[35] Train loss: 0.6375601158


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.7932, Accuracy: 72/100 ( 72%)



  0%|          | 0/30 [00:00<?, ?it/s]

[36] Train loss: 0.6477255960


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.6789, Accuracy: 79/100 ( 79%)



  0%|          | 0/30 [00:00<?, ?it/s]

[37] Train loss: 0.6619515002


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.6414, Accuracy: 83/100 ( 83%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[38] Train loss: 0.5848335336


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.6227, Accuracy: 83/100 ( 83%)



  0%|          | 0/30 [00:00<?, ?it/s]

[39] Train loss: 0.7302528312


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.5531, Accuracy: 82/100 ( 82%)



  0%|          | 0/30 [00:00<?, ?it/s]

[40] Train loss: 0.5813782086


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.6871, Accuracy: 74/100 ( 74%)



  0%|          | 0/30 [00:00<?, ?it/s]

[41] Train loss: 0.5680558632


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4830, Accuracy: 87/100 ( 87%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[42] Train loss: 0.5315790474


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.5403, Accuracy: 81/100 ( 81%)



  0%|          | 0/30 [00:00<?, ?it/s]

[43] Train loss: 0.5745780518


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4971, Accuracy: 86/100 ( 86%)



  0%|          | 0/30 [00:00<?, ?it/s]

[44] Train loss: 0.5079023033


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4958, Accuracy: 84/100 ( 84%)



  0%|          | 0/30 [00:00<?, ?it/s]

[45] Train loss: 0.5186371048


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4919, Accuracy: 84/100 ( 84%)



  0%|          | 0/30 [00:00<?, ?it/s]

[46] Train loss: 0.5061710976


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.5860, Accuracy: 79/100 ( 79%)



  0%|          | 0/30 [00:00<?, ?it/s]

[47] Train loss: 0.4810322568


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4540, Accuracy: 86/100 ( 86%)



  0%|          | 0/30 [00:00<?, ?it/s]

[48] Train loss: 0.4718324626


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.5108, Accuracy: 83/100 ( 83%)



  0%|          | 0/30 [00:00<?, ?it/s]

[49] Train loss: 0.4387071679


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.5555, Accuracy: 80/100 ( 80%)



  0%|          | 0/30 [00:00<?, ?it/s]

[50] Train loss: 0.4632213503


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.5043, Accuracy: 81/100 ( 81%)



  0%|          | 0/30 [00:00<?, ?it/s]

[51] Train loss: 0.4822713586


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4414, Accuracy: 86/100 ( 86%)



  0%|          | 0/30 [00:00<?, ?it/s]

[52] Train loss: 0.4338136487


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.6970, Accuracy: 70/100 ( 70%)



  0%|          | 0/30 [00:00<?, ?it/s]

[53] Train loss: 0.4142962997


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.5982, Accuracy: 77/100 ( 77%)



  0%|          | 0/30 [00:00<?, ?it/s]

[54] Train loss: 0.3738572074


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.5011, Accuracy: 83/100 ( 83%)



  0%|          | 0/30 [00:00<?, ?it/s]

[55] Train loss: 0.4061277390


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3901, Accuracy: 90/100 ( 90%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[56] Train loss: 0.4027958105


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3471, Accuracy: 91/100 ( 91%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[57] Train loss: 0.3790176113


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4772, Accuracy: 85/100 ( 85%)



  0%|          | 0/30 [00:00<?, ?it/s]

[58] Train loss: 0.4069751486


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4219, Accuracy: 84/100 ( 84%)



  0%|          | 0/30 [00:00<?, ?it/s]

[59] Train loss: 0.3638989635


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3940, Accuracy: 87/100 ( 87%)



  0%|          | 0/30 [00:00<?, ?it/s]

[60] Train loss: 0.3431896338


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3724, Accuracy: 87/100 ( 87%)



  0%|          | 0/30 [00:00<?, ?it/s]

[61] Train loss: 0.3069232066


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3793, Accuracy: 90/100 ( 90%)



  0%|          | 0/30 [00:00<?, ?it/s]

[62] Train loss: 0.3299393984


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4784, Accuracy: 84/100 ( 84%)



  0%|          | 0/30 [00:00<?, ?it/s]

[63] Train loss: 0.3162178437


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4768, Accuracy: 85/100 ( 85%)



  0%|          | 0/30 [00:00<?, ?it/s]

[64] Train loss: 0.2840056842


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4777, Accuracy: 84/100 ( 84%)



  0%|          | 0/30 [00:00<?, ?it/s]

[65] Train loss: 0.2843350530


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3767, Accuracy: 91/100 ( 91%)



  0%|          | 0/30 [00:00<?, ?it/s]

[66] Train loss: 0.2774063880


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3987, Accuracy: 87/100 ( 87%)



  0%|          | 0/30 [00:00<?, ?it/s]

[67] Train loss: 0.2837501792


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4466, Accuracy: 83/100 ( 83%)



  0%|          | 0/30 [00:00<?, ?it/s]

[68] Train loss: 0.3015405945


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3941, Accuracy: 88/100 ( 88%)



  0%|          | 0/30 [00:00<?, ?it/s]

[69] Train loss: 0.2601294582


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3997, Accuracy: 87/100 ( 87%)



  0%|          | 0/30 [00:00<?, ?it/s]

[70] Train loss: 0.2426772847


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4188, Accuracy: 86/100 ( 86%)



  0%|          | 0/30 [00:00<?, ?it/s]

[71] Train loss: 0.2617581792


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.5512, Accuracy: 77/100 ( 77%)



  0%|          | 0/30 [00:00<?, ?it/s]

[72] Train loss: 0.2357205672


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4370, Accuracy: 82/100 ( 82%)



  0%|          | 0/30 [00:00<?, ?it/s]

[73] Train loss: 0.2452054925


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3863, Accuracy: 88/100 ( 88%)



  0%|          | 0/30 [00:00<?, ?it/s]

[74] Train loss: 0.2346826325


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4581, Accuracy: 81/100 ( 81%)



  0%|          | 0/30 [00:00<?, ?it/s]

[75] Train loss: 0.2611296092


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3355, Accuracy: 91/100 ( 91%)



  0%|          | 0/30 [00:00<?, ?it/s]

[76] Train loss: 0.2111858060


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3037, Accuracy: 92/100 ( 92%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[77] Train loss: 0.2196199560


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3174, Accuracy: 91/100 ( 91%)



  0%|          | 0/30 [00:00<?, ?it/s]

[78] Train loss: 0.2148611840


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4261, Accuracy: 85/100 ( 85%)



  0%|          | 0/30 [00:00<?, ?it/s]

[79] Train loss: 0.2190062734


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3129, Accuracy: 89/100 ( 89%)



  0%|          | 0/30 [00:00<?, ?it/s]

[80] Train loss: 0.1832541425


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.2818, Accuracy: 92/100 ( 92%)



  0%|          | 0/30 [00:00<?, ?it/s]

[81] Train loss: 0.1926304345


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3032, Accuracy: 93/100 ( 93%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[82] Train loss: 0.2003337145


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3271, Accuracy: 89/100 ( 89%)



  0%|          | 0/30 [00:00<?, ?it/s]

[83] Train loss: 0.1761270610


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3148, Accuracy: 91/100 ( 91%)



  0%|          | 0/30 [00:00<?, ?it/s]

[84] Train loss: 0.1582033575


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3624, Accuracy: 88/100 ( 88%)



  0%|          | 0/30 [00:00<?, ?it/s]

[85] Train loss: 0.1638192173


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3232, Accuracy: 89/100 ( 89%)



  0%|          | 0/30 [00:00<?, ?it/s]

[86] Train loss: 0.1508147926


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3848, Accuracy: 86/100 ( 86%)



  0%|          | 0/30 [00:00<?, ?it/s]

[87] Train loss: 0.2047261747


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3204, Accuracy: 89/100 ( 89%)



  0%|          | 0/30 [00:00<?, ?it/s]

[88] Train loss: 0.1567797846


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3483, Accuracy: 89/100 ( 89%)



  0%|          | 0/30 [00:00<?, ?it/s]

[89] Train loss: 0.1445487588


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3046, Accuracy: 92/100 ( 92%)



  0%|          | 0/30 [00:00<?, ?it/s]

[90] Train loss: 0.1504299109


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.4036, Accuracy: 85/100 ( 85%)



  0%|          | 0/30 [00:00<?, ?it/s]

[91] Train loss: 0.1751945633


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3409, Accuracy: 89/100 ( 89%)



  0%|          | 0/30 [00:00<?, ?it/s]

[92] Train loss: 0.1591156236


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3083, Accuracy: 92/100 ( 92%)



  0%|          | 0/30 [00:00<?, ?it/s]

[93] Train loss: 0.1554708205


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3265, Accuracy: 90/100 ( 90%)



  0%|          | 0/30 [00:00<?, ?it/s]

[94] Train loss: 0.1542204777


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.5230, Accuracy: 82/100 ( 82%)



  0%|          | 0/30 [00:00<?, ?it/s]

[95] Train loss: 0.1328254963


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.3406, Accuracy: 89/100 ( 89%)



  0%|          | 0/30 [00:00<?, ?it/s]

[96] Train loss: 0.1074353548


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.2720, Accuracy: 92/100 ( 92%)



  0%|          | 0/30 [00:00<?, ?it/s]

[97] Train loss: 0.1056082432


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.2844, Accuracy: 92/100 ( 92%)



  0%|          | 0/30 [00:00<?, ?it/s]

[98] Train loss: 0.1062948415


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.6169, Accuracy: 75/100 ( 75%)



  0%|          | 0/30 [00:00<?, ?it/s]

[99] Train loss: 0.1102280324


  0%|          | 0/10 [00:00<?, ?it/s]

Vail set: Loss: 0.2915, Accuracy: 90/100 ( 90%)



In [29]:
test_mfccs = preprocess_dataset(test_x)
test_mfccs = np.array(test_mfccs)
test_mfccs = test_mfccs.reshape(-1, test_mfccs.shape[1], test_mfccs.shape[2], 1)

In [30]:
test_mfccs.shape

(200, 70, 12, 1)

In [31]:
def predict(model, test_loader, device):
    model.eval()
    model_pred = []
    with torch.no_grad():
        for wav in tqdm(iter(test_loader)):
            wav = wav.to(device)

            pred_logit = model(wav)
            pred_logit = pred_logit.argmax(dim=1, keepdim=True).squeeze(1)

            model_pred.extend(pred_logit.tolist())
    return model_pred

In [32]:
test_dataset = CustomDataset(X=test_mfccs, y= None, train_mode=False)
test_loader = DataLoader(test_dataset, batch_size = batch_size, shuffle=False)

In [33]:
# Validation Accuracy가 가장 뛰어난 모델을 불러옵니다.
checkpoint = torch.load('/content/drive/MyDrive/DL/sound_classify/saved/best_model2.pth')
model = CNNclassification().to(device)
model.load_state_dict(checkpoint)

# Inference
preds = predict(model, test_loader, device)
preds[0:5]

  0%|          | 0/20 [00:00<?, ?it/s]

[2, 7, 8, 1, 8]

In [34]:
len(preds)

200

In [35]:
test_wav['label'] = preds
test_wav = test_wav[['file_name', 'label']]

pred_df = test_wav.copy()
pred_df = pred_df.sort_values(by=[pred_df.columns[0]], ascending=[True]).reset_index(drop=True)
pred_df.head()

Unnamed: 0,file_name,label
0,003.wav,0
1,008.wav,9
2,010.wav,8
3,015.wav,8
4,024.wav,2


In [36]:
submission = pd.read_csv('/content/drive/MyDrive/DL/sound_classify/submission.csv')
submission['label'] = pred_df['label']
submission.head()

Unnamed: 0,file_name,label
0,003.wav,0
1,008.wav,9
2,010.wav,8
3,015.wav,8
4,024.wav,2


In [37]:
submission.to_csv('/content/drive/MyDrive/DL/sound_classify/submit2-3.csv', index=False)