<a href="https://colab.research.google.com/github/Jaew00/Grad_PJT/blob/main/cnn_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

1. Settings

> 1) Important required libraries




In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import os

In [2]:
import torch

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') #GPU 할당

In [3]:
import random

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(929)

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


2. Data Loader

트레이닝 데이터

In [45]:
import pandas as pd
d_set = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/졸업 프로젝트/new_UrbanSound8K.csv')

In [46]:
d_set.columns = ['file_name', 'fsID', 'start', 'end', 'salience', 'fold',
       'classID', 'class', 'label']

In [47]:
d_set = d_set[['fsID', 'file_name', 'label']]
d_set

Unnamed: 0,fsID,file_name,label
0,100648,100648-1-0-0.wav,0
1,100648,100648-1-1-0.wav,0
2,100648,100648-1-2-0.wav,0
3,100648,100648-1-3-0.wav,0
4,100648,100648-1-4-0.wav,0
...,...,...,...
5353,96657,96657-8-0-1.wav,5
5354,96657,96657-8-0-2.wav,5
5355,96657,96657-8-0-3.wav,5
5356,98525,98525-8-0-0.wav,5


In [48]:
d_set['label'].value_counts()

1    1000
2    1000
3    1000
4    1000
5     929
0     429
Name: label, dtype: int64

In [50]:
import librosa

data, sample_rate = librosa.load('/content/drive/MyDrive/Colab Notebooks/졸업 프로젝트/train/46669-4-0-63.wav', sr = 16000)
print('sample_rate:', sample_rate, ', audio shape:', data.shape)
print('length:', data.shape[0]/float(sample_rate), 'secs')


sample_rate: 16000 , audio shape: (64000,)
length: 4.0 secs


In [55]:
def train_dataset():
    folder = "/content/drive/MyDrive/Colab Notebooks/졸업 프로젝트/train"
    dataset = []
    for file in tqdm(os.listdir(folder),colour='green'):
        if 'wav' in file:
            abs_file_path = os.path.join(folder,file)
            data, sr = librosa.load(abs_file_path, sr = 16000)
            class_label = int(d_set[d_set.file_name == file].label)
            dataset.append([data,class_label])
    
    print("Dataset 생성 완료")
    return pd.DataFrame(dataset,columns=['data','label'])

In [56]:
def test_dataset():
    folder = "/content/drive/MyDrive/Colab Notebooks/졸업 프로젝트/test"
    dataset = []
    for file in tqdm(os.listdir(folder),colour='green'):
        if 'wav' in file:
            abs_file_path = os.path.join(folder,file)
            data, sr = librosa.load(abs_file_path, sr = 16000)
            class_label = int(d_set[d_set.file_name == file].label)
            dataset.append([data, file])
    
    print("Dataset 생성 완료")
    return pd.DataFrame(dataset,columns=['data', 'file_name'])

In [57]:
train_wav = train_dataset()
test_wav = test_dataset()

  0%|          | 0/3893 [00:00<?, ?it/s]

Dataset 생성 완료


  0%|          | 0/1467 [00:00<?, ?it/s]

Dataset 생성 완료


In [58]:
train_wav.head()

Unnamed: 0,data,label
0,"[0.2663571, 0.48566937, 0.4173614, 0.41482723,...",3
1,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",1
2,"[-0.00027148987, -0.0003258982, -0.00013432898...",2
3,"[0.00040281215, 0.0020528466, 0.0028018046, 0....",1
4,"[0.012266396, 0.013744292, -0.0026555897, -0.0...",1


In [59]:
train_x = np.array(train_wav.data)
test_x = np.array(test_wav.data)

In [60]:
# 음성의 길이 중 가장 작은 길이를 구합니다.

def get_mini(data):

    mini = 9999999
    for i in data:
        if len(i) < mini:
            mini = len(i)

    return mini

train_mini = get_mini(train_x)
test_mini = get_mini(test_x)

#음성들의 길이를 맞춰줍니다.

mini = np.min([train_mini, test_mini])
print('가장 작은 길이 :', mini)

가장 작은 길이 : 800


In [61]:
def set_length(data, d_mini):

    result = []
    for i in data:
        result.append(i[:d_mini])
    result = np.array(result)

    return result

train_x = set_length(train_x, mini)
test_x = set_length(test_x, mini)

In [62]:
print('train :', train_x.shape)
print('test :', test_x.shape)

train : (3892, 800)
test : (1466, 800)


In [63]:
extracted_features = librosa.feature.mfcc(y=train_x[0], sr=16000, n_mfcc=40)
extracted_features.shape

  n_fft, y.shape[-1]


(40, 2)

In [64]:
def preprocess_dataset(data):
    mfccs = []
    for i in data:
        extracted_features = librosa.feature.mfcc(y=i,
                                              sr=16000,
                                              n_mfcc=40)
        mfccs.append(extracted_features)
            
    return mfccs

In [65]:
train_mfccs = preprocess_dataset(train_x)
train_mfccs = np.array(train_mfccs)
train_mfccs = train_mfccs.reshape(-1, train_mfccs.shape[1], train_mfccs.shape[2], 1)
np.array(train_mfccs).shape

(3892, 40, 2, 1)

In [66]:
import torchvision.datasets as datasets # 데이터셋 집합체
import torchvision.transforms as transforms # 변환 툴

from torch.utils.data import DataLoader # 학습 및 배치로 모델에 넣어주기 위한 툴
from torch.utils.data import DataLoader, Dataset

class CustomDataset(Dataset):
    def __init__(self, X, y, train_mode=True, transforms=None): #필요한 변수들을 선언
        self.X = X
        self.y = y
        self.train_mode = train_mode
        self.transforms = transforms

    def __getitem__(self, index): #index번째 data를 return
        X = self.X[index]
        
        if self.transforms is not None:
            X = self.transforms(X)

        if self.train_mode:
            y = self.y[index]
            return X, y
        else:
            return X
    
    def __len__(self): #길이 return
        return len(self.X)

In [67]:
train_X = train_mfccs[:300]
vali_X = train_mfccs[300:]

In [68]:
train_y = train_wav.label[:300]
vali_y = train_wav.label[300:].reset_index(drop = True)

In [69]:
# 에포크 설정
num_epochs = 100

# 배치 사이즈 설정
batch_size = 10

#만든 train dataset를 DataLoader에 넣어 batch 만들기
train_dataset = CustomDataset(X=train_X, y=train_y)
train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle=True)

vali_dataset = CustomDataset(X=vali_X, y=vali_y)
vali_loader = DataLoader(vali_dataset, batch_size = batch_size, shuffle=False)

In [70]:
train_batches = len(train_loader)
vali_batches = len(vali_loader)

print('/ total train batches :', train_batches)
print('/ total valid batches :', vali_batches)

/ total train batches : 30
/ total valid batches : 360


In [71]:
from tqdm.auto import tqdm
import torch.nn as nn # 신경망들이 포함됨

class CNN(torch.nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.layer1 = torch.nn.Sequential(
            nn.Conv2d(40, 10, kernel_size=2, stride=1, padding=1), #cnn layer
            nn.ELU(), #activation function
            nn.BatchNorm2d(10))
            #nn.MaxPool2d(kernel_size=2, stride=2)) #pooling layer
        
        self.layer2 = torch.nn.Sequential(
            nn.Conv2d(10, 100, kernel_size=2, stride=1, padding=1), #cnn layer
            nn.ELU(), #activation function
            nn.BatchNorm2d(100),
            nn.MaxPool2d(kernel_size=2, stride=2)) #pooling layer
        
        self.layer3 = torch.nn.Sequential(
            nn.Conv2d(100, 200, kernel_size=2, stride=1, padding=1), #cnn layer
            nn.ELU(), #activation function
            nn.BatchNorm2d(200),
            nn.MaxPool2d(kernel_size=2, stride=2)) #pooling layer
        
        self.layer4 = torch.nn.Sequential(
            nn.Conv2d(200, 300, kernel_size=2, stride=1, padding=1), #cnn layer
            nn.ELU(), #activation function
            nn.BatchNorm2d(300),
            nn.MaxPool2d(kernel_size=2, stride=2)) #pooling layer
        
        self.fc_layer = nn.Sequential( 
            nn.Linear(300, 100), #fully connected layer(ouput layer)
            nn.ELU(),
            nn.Dropout2d(p=0.2),
            nn.BatchNorm1d(100),
            nn.Linear(100, 6) #fully connected layer(ouput layer)
        )    
        
    def forward(self, x):
        
        x = self.layer1(x) #1층
        
        x = self.layer2(x) #2층
         
        x = self.layer3(x) #3층
        
        x = self.layer4(x) #4층
        
        x = torch.flatten(x, start_dim=1) # N차원 배열 -> 1차원 배열
        
        out = self.fc_layer(x)
        return out

In [72]:
from tqdm.auto import tqdm
import torch.nn.init as init
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
import torch.nn as nn # 신경망들이 포함됨

class CNN(torch.nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.layer = torch.nn.Sequential(
            nn.Conv2d(40, 10, kernel_size=2, stride=1, padding=1), #cnn layer
            nn.ELU(), #activation function
            nn.BatchNorm2d(10),

            nn.Conv2d(10, 100, kernel_size=2, stride=1, padding=1), #cnn layer
            nn.ELU(), #activation function
            nn.BatchNorm2d(100),
            nn.MaxPool2d(kernel_size=2, stride=2), #pooling layer

            nn.Conv2d(100, 200, kernel_size=2, stride=1, padding=1), #cnn layer
            nn.ELU(), #activation function
            nn.BatchNorm2d(200),
            nn.MaxPool2d(kernel_size=2, stride=2), #pooling layer
        
            nn.Conv2d(200, 300, kernel_size=2, stride=1, padding=1), #cnn layer
            nn.ELU(), #activation function
            nn.Dropout2d(0.2),
            nn.BatchNorm2d(300),
            nn.MaxPool2d(kernel_size=2, stride=2)) #pooling layer
        
        self.fc_layer = nn.Sequential( 
            nn.Linear(300, 100), #fully connected layer(ouput layer)
            nn.ELU(),
            nn.Dropout2d(p=0.2),
            nn.BatchNorm1d(100),
            nn.Linear(100, 6) #fully connected layer(ouput layer)
        )    


        for m in self.modules():
          if isinstance(m, nn.Conv2d):
            init.kaiming_normal_(m.weight.data) 
            m.bias.data.fill_(0)
          if isinstance(m, nn.Linear):
            init.kaiming_normal_(m.weight.data)
            m.bias.data.fill_(0)
        
    def forward(self, x):
        out = self.layer(x)
        out = torch.flatten(out, start_dim=1) # N차원 배열 -> 1차원 배열
        out = self.fc_layer(out)

        return out

In [73]:
import torch.optim as optim # 최적화 알고리즘들이 포함힘

model = CNN().to(device)
criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(params = model.parameters(), lr = 1e-3 )
scheduler = None

In [74]:
from tqdm.auto import tqdm

def train(model, optimizer, train_loader, scheduler, device): 
    model.to(device)
    n = len(train_loader)
    best_acc = 0
    
    for epoch in range(1,num_epochs): #에포크 설정
        model.train() #모델 학습
        running_loss = 0.0
        
        for wav, label in tqdm(iter(train_loader)):
            
            wav, label = wav.to(device), label.to(device) #배치 데이터
            optimizer.zero_grad() #배치마다 optimizer 초기화
        
            # Data -> Model -> Output
            logit = model(wav) #예측값 산출
            loss = criterion(logit, label) #손실함수 계산
            
            # 역전파
            loss.backward() #손실함수 기준 역전파 
            optimizer.step() #가중치 최적화
            running_loss += loss.item()
             
        print('[%d] Train loss: %.10f' %(epoch, running_loss / len(train_loader)))
        
        if scheduler is not None:
            scheduler.step()
            
            
        #Validation set 평가
        model.eval() #evaluation 과정에서 사용하지 않아야 하는 layer들을 알아서 off 시키도록 하는 함수
        vali_loss = 0.0
        correct = 0
       
        with torch.no_grad(): #파라미터 업데이트 안하기 때문에 no_grad 사용
            for wav, label in tqdm(iter(vali_loader)):
                
                wav, label = wav.to(device), label.to(device)
                logit = model(wav)
                vali_loss += criterion(logit, label)
                pred = logit.argmax(dim=1, keepdim=True)  #10개의 class중 가장 값이 높은 것을 예측 label로 추출
                correct += pred.eq(label.view_as(pred)).sum().item() #예측값과 실제값이 맞으면 1 아니면 0으로 합산
        vali_acc = 100 * correct / len(vali_loader.dataset)
        print('Vail set: Loss: {:.4f}, Accuracy: {}/{} ( {:.0f}%)\n'.format(vali_loss / len(vali_loader), correct, len(vali_loader.dataset), 100 * correct / len(vali_loader.dataset)))
        
        #베스트 모델 저장
        if best_acc < vali_acc:
            best_acc = vali_acc
            
            torch.save(model.state_dict(), '/content/drive/MyDrive/Colab Notebooks/졸업 프로젝트/weights.best.cnn.pth') #이 디렉토리에 best_model.pth을 저장
            print('Model Saved.')

In [75]:
train(model, optimizer, train_loader, scheduler, device)

  0%|          | 0/30 [00:00<?, ?it/s]



[1] Train loss: 1.9517538150


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 1.6073, Accuracy: 1487/3592 ( 41%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[2] Train loss: 1.5286244690


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 1.5139, Accuracy: 1590/3592 ( 44%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[3] Train loss: 1.4335291763


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 1.4861, Accuracy: 1669/3592 ( 46%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[4] Train loss: 1.1500440081


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 1.4965, Accuracy: 1663/3592 ( 46%)



  0%|          | 0/30 [00:00<?, ?it/s]

[5] Train loss: 1.1360567371


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 1.4251, Accuracy: 1808/3592 ( 50%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[6] Train loss: 0.9694869717


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 1.4495, Accuracy: 1849/3592 ( 51%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[7] Train loss: 0.8869675875


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 1.5225, Accuracy: 1755/3592 ( 49%)



  0%|          | 0/30 [00:00<?, ?it/s]

[8] Train loss: 0.7681932817


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 1.5323, Accuracy: 1813/3592 ( 50%)



  0%|          | 0/30 [00:00<?, ?it/s]

[9] Train loss: 0.7930620720


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 1.5304, Accuracy: 1882/3592 ( 52%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[10] Train loss: 0.7478183548


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 1.6129, Accuracy: 1829/3592 ( 51%)



  0%|          | 0/30 [00:00<?, ?it/s]

[11] Train loss: 0.5799494962


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 1.5410, Accuracy: 1882/3592 ( 52%)



  0%|          | 0/30 [00:00<?, ?it/s]

[12] Train loss: 0.6188990206


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 1.5643, Accuracy: 1831/3592 ( 51%)



  0%|          | 0/30 [00:00<?, ?it/s]

[13] Train loss: 0.5178392594


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 1.8248, Accuracy: 1787/3592 ( 50%)



  0%|          | 0/30 [00:00<?, ?it/s]

[14] Train loss: 0.5086775514


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 1.6624, Accuracy: 1945/3592 ( 54%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[15] Train loss: 0.5228231598


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 1.6851, Accuracy: 1874/3592 ( 52%)



  0%|          | 0/30 [00:00<?, ?it/s]

[16] Train loss: 0.3999370067


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 1.7163, Accuracy: 1943/3592 ( 54%)



  0%|          | 0/30 [00:00<?, ?it/s]

[17] Train loss: 0.3163809601


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 1.6981, Accuracy: 1911/3592 ( 53%)



  0%|          | 0/30 [00:00<?, ?it/s]

[18] Train loss: 0.3583591020


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 1.7437, Accuracy: 1943/3592 ( 54%)



  0%|          | 0/30 [00:00<?, ?it/s]

[19] Train loss: 0.3350166066


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 1.7983, Accuracy: 1916/3592 ( 53%)



  0%|          | 0/30 [00:00<?, ?it/s]

[20] Train loss: 0.3100065528


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 1.7184, Accuracy: 1926/3592 ( 54%)



  0%|          | 0/30 [00:00<?, ?it/s]

[21] Train loss: 0.3365034558


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 1.8622, Accuracy: 1906/3592 ( 53%)



  0%|          | 0/30 [00:00<?, ?it/s]

[22] Train loss: 0.2983615935


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 1.8830, Accuracy: 1966/3592 ( 55%)

Model Saved.


  0%|          | 0/30 [00:00<?, ?it/s]

[23] Train loss: 0.2750798504


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 1.8601, Accuracy: 1886/3592 ( 53%)



  0%|          | 0/30 [00:00<?, ?it/s]

[24] Train loss: 0.2861944265


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.0526, Accuracy: 1816/3592 ( 51%)



  0%|          | 0/30 [00:00<?, ?it/s]

[25] Train loss: 0.2902019137


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.0966, Accuracy: 1812/3592 ( 50%)



  0%|          | 0/30 [00:00<?, ?it/s]

[26] Train loss: 0.2624174272


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 1.9695, Accuracy: 1958/3592 ( 55%)



  0%|          | 0/30 [00:00<?, ?it/s]

[27] Train loss: 0.2440774261


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.1587, Accuracy: 1819/3592 ( 51%)



  0%|          | 0/30 [00:00<?, ?it/s]

[28] Train loss: 0.1775419445


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.0759, Accuracy: 1888/3592 ( 53%)



  0%|          | 0/30 [00:00<?, ?it/s]

[29] Train loss: 0.1972886542


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 1.9366, Accuracy: 1932/3592 ( 54%)



  0%|          | 0/30 [00:00<?, ?it/s]

[30] Train loss: 0.1639080266


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 1.9198, Accuracy: 1957/3592 ( 54%)



  0%|          | 0/30 [00:00<?, ?it/s]

[31] Train loss: 0.1364852324


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.1138, Accuracy: 1821/3592 ( 51%)



  0%|          | 0/30 [00:00<?, ?it/s]

[32] Train loss: 0.2031430518


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.1065, Accuracy: 1894/3592 ( 53%)



  0%|          | 0/30 [00:00<?, ?it/s]

[33] Train loss: 0.2286976889


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.2632, Accuracy: 1832/3592 ( 51%)



  0%|          | 0/30 [00:00<?, ?it/s]

[34] Train loss: 0.1103187790


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.1622, Accuracy: 1870/3592 ( 52%)



  0%|          | 0/30 [00:00<?, ?it/s]

[35] Train loss: 0.1608867352


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.1472, Accuracy: 1835/3592 ( 51%)



  0%|          | 0/30 [00:00<?, ?it/s]

[36] Train loss: 0.1455139823


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.0818, Accuracy: 1886/3592 ( 53%)



  0%|          | 0/30 [00:00<?, ?it/s]

[37] Train loss: 0.1203055254


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.0986, Accuracy: 1934/3592 ( 54%)



  0%|          | 0/30 [00:00<?, ?it/s]

[38] Train loss: 0.1401193372


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.0867, Accuracy: 1916/3592 ( 53%)



  0%|          | 0/30 [00:00<?, ?it/s]

[39] Train loss: 0.2715602921


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.1897, Accuracy: 1852/3592 ( 52%)



  0%|          | 0/30 [00:00<?, ?it/s]

[40] Train loss: 0.2059554783


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.0670, Accuracy: 1892/3592 ( 53%)



  0%|          | 0/30 [00:00<?, ?it/s]

[41] Train loss: 0.1356115535


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.0966, Accuracy: 1837/3592 ( 51%)



  0%|          | 0/30 [00:00<?, ?it/s]

[42] Train loss: 0.1098853193


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.2771, Accuracy: 1878/3592 ( 52%)



  0%|          | 0/30 [00:00<?, ?it/s]

[43] Train loss: 0.0570678823


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.1136, Accuracy: 1957/3592 ( 54%)



  0%|          | 0/30 [00:00<?, ?it/s]

[44] Train loss: 0.1121503232


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.3695, Accuracy: 1783/3592 ( 50%)



  0%|          | 0/30 [00:00<?, ?it/s]

[45] Train loss: 0.0590696540


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.2630, Accuracy: 1862/3592 ( 52%)



  0%|          | 0/30 [00:00<?, ?it/s]

[46] Train loss: 0.0488654787


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.3521, Accuracy: 1907/3592 ( 53%)



  0%|          | 0/30 [00:00<?, ?it/s]

[47] Train loss: 0.1129982588


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.3473, Accuracy: 1827/3592 ( 51%)



  0%|          | 0/30 [00:00<?, ?it/s]

[48] Train loss: 0.0850853445


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.3029, Accuracy: 1917/3592 ( 53%)



  0%|          | 0/30 [00:00<?, ?it/s]

[49] Train loss: 0.0470609639


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.3061, Accuracy: 1891/3592 ( 53%)



  0%|          | 0/30 [00:00<?, ?it/s]

[50] Train loss: 0.0622932823


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.3448, Accuracy: 1870/3592 ( 52%)



  0%|          | 0/30 [00:00<?, ?it/s]

[51] Train loss: 0.1196718663


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.2797, Accuracy: 1837/3592 ( 51%)



  0%|          | 0/30 [00:00<?, ?it/s]

[52] Train loss: 0.0929033231


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.2768, Accuracy: 1889/3592 ( 53%)



  0%|          | 0/30 [00:00<?, ?it/s]

[53] Train loss: 0.1672144515


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.4423, Accuracy: 1781/3592 ( 50%)



  0%|          | 0/30 [00:00<?, ?it/s]

[54] Train loss: 0.1580314266


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.4664, Accuracy: 1875/3592 ( 52%)



  0%|          | 0/30 [00:00<?, ?it/s]

[55] Train loss: 0.0852326884


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.3572, Accuracy: 1863/3592 ( 52%)



  0%|          | 0/30 [00:00<?, ?it/s]

[56] Train loss: 0.0963557215


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.3053, Accuracy: 1907/3592 ( 53%)



  0%|          | 0/30 [00:00<?, ?it/s]

[57] Train loss: 0.0438965673


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.3352, Accuracy: 1892/3592 ( 53%)



  0%|          | 0/30 [00:00<?, ?it/s]

[58] Train loss: 0.0385092231


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.2766, Accuracy: 1914/3592 ( 53%)



  0%|          | 0/30 [00:00<?, ?it/s]

[59] Train loss: 0.0843934180


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.4466, Accuracy: 1907/3592 ( 53%)



  0%|          | 0/30 [00:00<?, ?it/s]

[60] Train loss: 0.0853012319


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.5104, Accuracy: 1832/3592 ( 51%)



  0%|          | 0/30 [00:00<?, ?it/s]

[61] Train loss: 0.1267076463


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.3391, Accuracy: 1885/3592 ( 52%)



  0%|          | 0/30 [00:00<?, ?it/s]

[62] Train loss: 0.0471790371


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.4269, Accuracy: 1804/3592 ( 50%)



  0%|          | 0/30 [00:00<?, ?it/s]

[63] Train loss: 0.0941007578


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.5363, Accuracy: 1804/3592 ( 50%)



  0%|          | 0/30 [00:00<?, ?it/s]

[64] Train loss: 0.0746942476


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.4566, Accuracy: 1832/3592 ( 51%)



  0%|          | 0/30 [00:00<?, ?it/s]

[65] Train loss: 0.0547275526


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.4332, Accuracy: 1852/3592 ( 52%)



  0%|          | 0/30 [00:00<?, ?it/s]

[66] Train loss: 0.0317934006


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.4120, Accuracy: 1859/3592 ( 52%)



  0%|          | 0/30 [00:00<?, ?it/s]

[67] Train loss: 0.0576253407


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.4930, Accuracy: 1820/3592 ( 51%)



  0%|          | 0/30 [00:00<?, ?it/s]

[68] Train loss: 0.1508885119


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.5383, Accuracy: 1866/3592 ( 52%)



  0%|          | 0/30 [00:00<?, ?it/s]

[69] Train loss: 0.0709062062


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.6685, Accuracy: 1800/3592 ( 50%)



  0%|          | 0/30 [00:00<?, ?it/s]

[70] Train loss: 0.0443391333


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.3888, Accuracy: 1892/3592 ( 53%)



  0%|          | 0/30 [00:00<?, ?it/s]

[71] Train loss: 0.1109967361


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.4646, Accuracy: 1903/3592 ( 53%)



  0%|          | 0/30 [00:00<?, ?it/s]

[72] Train loss: 0.0719834818


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.4322, Accuracy: 1855/3592 ( 52%)



  0%|          | 0/30 [00:00<?, ?it/s]

[73] Train loss: 0.0829321994


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.5464, Accuracy: 1892/3592 ( 53%)



  0%|          | 0/30 [00:00<?, ?it/s]

[74] Train loss: 0.0791756795


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.5508, Accuracy: 1925/3592 ( 54%)



  0%|          | 0/30 [00:00<?, ?it/s]

[75] Train loss: 0.1018929445


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.6734, Accuracy: 1862/3592 ( 52%)



  0%|          | 0/30 [00:00<?, ?it/s]

[76] Train loss: 0.0520652178


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.5275, Accuracy: 1833/3592 ( 51%)



  0%|          | 0/30 [00:00<?, ?it/s]

[77] Train loss: 0.0852510306


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.3705, Accuracy: 1891/3592 ( 53%)



  0%|          | 0/30 [00:00<?, ?it/s]

[78] Train loss: 0.0949185043


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.5533, Accuracy: 1835/3592 ( 51%)



  0%|          | 0/30 [00:00<?, ?it/s]

[79] Train loss: 0.0429783430


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.4761, Accuracy: 1854/3592 ( 52%)



  0%|          | 0/30 [00:00<?, ?it/s]

[80] Train loss: 0.1445809056


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.3983, Accuracy: 1901/3592 ( 53%)



  0%|          | 0/30 [00:00<?, ?it/s]

[81] Train loss: 0.1151621658


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.3634, Accuracy: 1965/3592 ( 55%)



  0%|          | 0/30 [00:00<?, ?it/s]

[82] Train loss: 0.1088688598


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.4516, Accuracy: 1880/3592 ( 52%)



  0%|          | 0/30 [00:00<?, ?it/s]

[83] Train loss: 0.0812571488


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.5688, Accuracy: 1848/3592 ( 51%)



  0%|          | 0/30 [00:00<?, ?it/s]

[84] Train loss: 0.0544620854


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.5918, Accuracy: 1791/3592 ( 50%)



  0%|          | 0/30 [00:00<?, ?it/s]

[85] Train loss: 0.0937785034


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.6123, Accuracy: 1803/3592 ( 50%)



  0%|          | 0/30 [00:00<?, ?it/s]

[86] Train loss: 0.0841839006


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.3546, Accuracy: 1927/3592 ( 54%)



  0%|          | 0/30 [00:00<?, ?it/s]

[87] Train loss: 0.0767728116


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.4864, Accuracy: 1913/3592 ( 53%)



  0%|          | 0/30 [00:00<?, ?it/s]

[88] Train loss: 0.0583065297


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.4451, Accuracy: 1877/3592 ( 52%)



  0%|          | 0/30 [00:00<?, ?it/s]

[89] Train loss: 0.0607086940


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.4408, Accuracy: 1926/3592 ( 54%)



  0%|          | 0/30 [00:00<?, ?it/s]

[90] Train loss: 0.1069579473


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.6568, Accuracy: 1852/3592 ( 52%)



  0%|          | 0/30 [00:00<?, ?it/s]

[91] Train loss: 0.0801669242


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.5507, Accuracy: 1895/3592 ( 53%)



  0%|          | 0/30 [00:00<?, ?it/s]

[92] Train loss: 0.0862909314


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.7109, Accuracy: 1851/3592 ( 52%)



  0%|          | 0/30 [00:00<?, ?it/s]

[93] Train loss: 0.0796552928


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.4734, Accuracy: 1866/3592 ( 52%)



  0%|          | 0/30 [00:00<?, ?it/s]

[94] Train loss: 0.0650328914


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.3680, Accuracy: 1889/3592 ( 53%)



  0%|          | 0/30 [00:00<?, ?it/s]

[95] Train loss: 0.0551426815


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.4939, Accuracy: 1868/3592 ( 52%)



  0%|          | 0/30 [00:00<?, ?it/s]

[96] Train loss: 0.0335790899


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.6001, Accuracy: 1868/3592 ( 52%)



  0%|          | 0/30 [00:00<?, ?it/s]

[97] Train loss: 0.0137290982


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.6887, Accuracy: 1864/3592 ( 52%)



  0%|          | 0/30 [00:00<?, ?it/s]

[98] Train loss: 0.0416973006


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.8332, Accuracy: 1802/3592 ( 50%)



  0%|          | 0/30 [00:00<?, ?it/s]

[99] Train loss: 0.0222652588


  0%|          | 0/360 [00:00<?, ?it/s]

Vail set: Loss: 2.7558, Accuracy: 1871/3592 ( 52%)



In [76]:
test_mfccs = preprocess_dataset(test_x)
test_mfccs = np.array(test_mfccs)
test_mfccs = test_mfccs.reshape(-1, test_mfccs.shape[1], test_mfccs.shape[2], 1)

In [77]:
def predict(model, test_loader, device):
    model.eval()
    model_pred = []
    with torch.no_grad():
        for wav in tqdm(iter(test_loader)):
            wav = wav.to(device)

            pred_logit = model(wav)
            pred_logit = pred_logit.argmax(dim=1, keepdim=True).squeeze(1)

            model_pred.extend(pred_logit.tolist())
    return model_pred

In [78]:
test_dataset = CustomDataset(X=test_mfccs, y= None, train_mode=False)
test_loader = DataLoader(test_dataset, batch_size = batch_size, shuffle=False)

In [79]:
# Validation Accuracy가 가장 뛰어난 모델을 불러옵니다.
checkpoint = torch.load('/content/drive/MyDrive/Colab Notebooks/졸업 프로젝트/weights.best.cnn.pth')
model = CNN().to(device)
model.load_state_dict(checkpoint)

# Inference
preds = predict(model, test_loader, device)
preds[0:5], 

  0%|          | 0/147 [00:00<?, ?it/s]

([4, 1, 2, 1, 2],)

In [80]:
len(preds)

1466

In [36]:
for wav, label in tqdm(iter(test_loader)):
  label = label.to(device)
  print(label)

  0%|          | 0/11 [00:00<?, ?it/s]

ValueError: ignored