# 목적

Simple_CNN은 아무리 epoch를 돌려도 accuracy = 30이 한계다.
좀 더 깊은 CNN을 만들면 accuracy가 올라갈까?

# 0.1. 구글 드라이브 연동

In [1]:
import os
from google.colab import drive
drive.mount('/gdrive')

Mounted at /gdrive


# 0.2. 라이브러리 임포트 및 전역변수 설정

In [2]:
from pathlib import Path
from easydict import EasyDict as edict
from PIL import Image
from tqdm import tqdm
from torchvision import transforms
from torch.utils.data import DataLoader, TensorDataset, random_split
from collections import Counter
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import os
import torch
import torch.nn as nn
import torch.optim as optim

In [3]:
root = '/gdrive/My Drive/ctp431'
inst_pool = ['cel', 'cla', 'flu', 'gac', 'gel', 'org', 'pia', 'sax', 'tru', 'vio', 'voi']

# Step 1. 간단한 CNN 모델 만들기

In [4]:
class ImprovedCNN(nn.Module):
    def __init__(self, num_class = 11):
        super(ImprovedCNN, self).__init__()

        self.conv_block = nn.Sequential(
            # block 1
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # block 2
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # block 3
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        self.fc_block = nn.Sequential(
            # block 6
            nn.Flatten(),
            nn.Linear(256 * 28 * 28, 1024),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(1024, num_class)
        )

    def forward(self, x):
        x = self.conv_block(x)
        x = self.fc_block(x)
        return x

# Step 2. 학습 돌리기

In [5]:
# 데이터 로드
test_data_path = root + "/MK2/DataSet/test_data.pt"
test_x, test_y = torch.load(test_data_path)

train_data_path = root + "/MK2/DataSet/training_data.pt"
train_x, train_y = torch.load(train_data_path)

# 데이터셋과 DataLoader 생성
train_data = TensorDataset(train_x, train_y)
test_data = TensorDataset(test_x, test_y)

train_loader = DataLoader(train_data, batch_size=35, shuffle=True)
val_loader = DataLoader(test_data, batch_size=35, shuffle=False)

  test_x, test_y = torch.load(test_data_path)
  train_x, train_y = torch.load(train_data_path)


In [6]:
# 모델, 손실 함수, 옵티마이저 초기화
model = ImprovedCNN(num_class=11)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

result_dir = Path(root) / 'MK2/Results/Improved_CNN'
result_dir.mkdir(parents=True, exist_ok=True)

# Create directory name.
num_trial=0
parent_dir = result_dir / f'trial_{num_trial}'
while parent_dir.is_dir():
    num_trial = int(parent_dir.name.replace('trial_',''))
    parent_dir = result_dir / f'trial_{num_trial+1}'
parent_dir.mkdir(parents=True, exist_ok=True)
print(f'Trial save path : {parent_dir}')

# 트레이닝 루프
epochs = 10
for epoch in tqdm(range(epochs)):
    print("\n")
    model.train()
    running_loss = 0.0
    train_count = 0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        train_count += inputs.size(0)
        print(f"Epoch {epoch}/ train_count = {train_count}")

    print(f"Epoch {epoch + 1}/{epochs}, Loss: {running_loss / len(train_loader):.4f}")

    # Validation
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)

            print(predicted)
            print(labels)

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_accuracy = 100 * correct / total
    print(f"Validation Loss: {val_loss / len(val_loader):.4f}, Accuracy: {val_accuracy:.2f}%")


model_save_path = str(parent_dir) + '/improved_cnn_model.pt'
torch.save(model.state_dict(), model_save_path)
print(f"Model saved to {model_save_path}")

Trial save path : /gdrive/My Drive/ctp431/MK2/Results/Improved_CNN/trial_0


  0%|          | 0/100 [00:00<?, ?it/s]



Epoch 0/ train_count = 35
Epoch 0/ train_count = 70
Epoch 0/ train_count = 105
Epoch 0/ train_count = 140
Epoch 0/ train_count = 175
Epoch 0/ train_count = 210
Epoch 0/ train_count = 245
Epoch 0/ train_count = 280
Epoch 0/ train_count = 315
Epoch 0/ train_count = 350
Epoch 0/ train_count = 385
Epoch 0/ train_count = 420
Epoch 0/ train_count = 455
Epoch 0/ train_count = 490
Epoch 0/ train_count = 525
Epoch 0/ train_count = 560
Epoch 0/ train_count = 595
Epoch 0/ train_count = 630
Epoch 0/ train_count = 665
Epoch 0/ train_count = 700
Epoch 0/ train_count = 735
Epoch 0/ train_count = 770
Epoch 0/ train_count = 805
Epoch 0/ train_count = 840
Epoch 0/ train_count = 875
Epoch 0/ train_count = 910
Epoch 0/ train_count = 945
Epoch 0/ train_count = 980
Epoch 0/ train_count = 1015
Epoch 0/ train_count = 1050
Epoch 0/ train_count = 1085
Epoch 0/ train_count = 1120
Epoch 0/ train_count = 1155
Epoch 0/ train_count = 1190
Epoch 0/ train_count = 1225
Epoch 0/ train_count = 1260
Epoch 0/ train_count

  1%|          | 1/100 [00:40<1:06:22, 40.23s/it]

tensor([8, 5, 5, 5, 5, 8, 5, 5, 2, 8, 8, 8, 5, 8, 5, 5, 2, 5, 2, 5, 5, 2, 5, 2,
        6, 8, 5, 5, 5, 1, 2, 2, 5, 5, 2], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
Validation Loss: 2.3889, Accuracy: 17.40%


Epoch 1/ train_count = 35
Epoch 1/ train_count = 70
Epoch 1/ train_count = 105
Epoch 1/ train_count = 140
Epoch 1/ train_count = 175
Epoch 1/ train_count = 210
Epoch 1/ train_count = 245
Epoch 1/ train_count = 280
Epoch 1/ train_count = 315
Epoch 1/ train_count = 350
Epoch 1/ train_count = 385
Epoch 1/ train_count = 420
Epoch 1/ train_count = 455
Epoch 1/ train_count = 490
Epoch 1/ train_count = 525
Epoch 1/ train_count = 560
Epoch 1/ train_count = 595
Epoch 1/ train_count = 630
Epoch 1/ train_count = 665
Epoch 1/ train_count = 700
Epoch 1/ train_count = 735
Epoch 1/ train_count = 770
Epoch 1/ train_count = 805
Epoch 1/ train_co

  2%|▏         | 2/100 [01:19<1:05:08, 39.88s/it]

tensor([0, 5, 2, 2, 5, 0, 5, 5, 2, 0, 0, 0, 5, 0, 5, 5, 2, 5, 2, 5, 5, 2, 5, 2,
        6, 3, 5, 5, 5, 6, 0, 2, 5, 4, 2], device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
Validation Loss: 2.3959, Accuracy: 23.51%


Epoch 2/ train_count = 35
Epoch 2/ train_count = 70
Epoch 2/ train_count = 105
Epoch 2/ train_count = 140
Epoch 2/ train_count = 175
Epoch 2/ train_count = 210
Epoch 2/ train_count = 245
Epoch 2/ train_count = 280
Epoch 2/ train_count = 315
Epoch 2/ train_count = 350
Epoch 2/ train_count = 385
Epoch 2/ train_count = 420
Epoch 2/ train_count = 455
Epoch 2/ train_count = 490
Epoch 2/ train_count = 525
Epoch 2/ train_count = 560
Epoch 2/ train_count = 595
Epoch 2/ train_count = 630
Epoch 2/ train_count = 665
Epoch 2/ train_count = 700
Epoch 2/ train_count = 735
Epoch 2/ train_count = 770
Epoch 2/ train_count = 805
Epoch 2/ train_co

  3%|▎         | 3/100 [02:00<1:04:55, 40.16s/it]

tensor([ 6, 10,  2, 10,  2,  6, 10, 10,  2,  4,  0,  4,  4,  0, 10,  4,  2, 10,
        10, 10,  4, 10, 10, 10,  6,  6, 10,  5, 10,  6,  4, 10, 10,  4,  2],
       device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
Validation Loss: 2.2962, Accuracy: 19.48%


Epoch 3/ train_count = 35
Epoch 3/ train_count = 70
Epoch 3/ train_count = 105
Epoch 3/ train_count = 140
Epoch 3/ train_count = 175
Epoch 3/ train_count = 210
Epoch 3/ train_count = 245
Epoch 3/ train_count = 280
Epoch 3/ train_count = 315
Epoch 3/ train_count = 350
Epoch 3/ train_count = 385
Epoch 3/ train_count = 420
Epoch 3/ train_count = 455
Epoch 3/ train_count = 490
Epoch 3/ train_count = 525
Epoch 3/ train_count = 560
Epoch 3/ train_count = 595
Epoch 3/ train_count = 630
Epoch 3/ train_count = 665
Epoch 3/ train_count = 700
Epoch 3/ train_count = 735
Epoch 3/ train_count = 770
Ep

  4%|▍         | 4/100 [02:40<1:04:14, 40.16s/it]

tensor([ 3, 10,  5, 10, 10,  3,  5, 10,  5,  0,  3,  0, 10,  3,  5, 10,  5,  5,
        10, 10, 10, 10, 10, 10,  6,  3, 10,  5,  5,  6,  0, 10,  5, 10,  5],
       device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
Validation Loss: 2.0824, Accuracy: 31.69%


Epoch 4/ train_count = 35
Epoch 4/ train_count = 70
Epoch 4/ train_count = 105
Epoch 4/ train_count = 140
Epoch 4/ train_count = 175
Epoch 4/ train_count = 210
Epoch 4/ train_count = 245
Epoch 4/ train_count = 280
Epoch 4/ train_count = 315
Epoch 4/ train_count = 350
Epoch 4/ train_count = 385
Epoch 4/ train_count = 420
Epoch 4/ train_count = 455
Epoch 4/ train_count = 490
Epoch 4/ train_count = 525
Epoch 4/ train_count = 560
Epoch 4/ train_count = 595
Epoch 4/ train_count = 630
Epoch 4/ train_count = 665
Epoch 4/ train_count = 700
Epoch 4/ train_count = 735
Epoch 4/ train_count = 770
Ep

  5%|▌         | 5/100 [03:20<1:03:42, 40.24s/it]

tensor([ 3, 10,  5, 10, 10,  3, 10, 10,  3,  4,  3,  4, 10,  9,  5, 10,  3, 10,
        10, 10, 10, 10, 10, 10,  3,  3, 10,  5,  5,  3,  0,  0, 10, 10,  5],
       device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
Validation Loss: 2.1346, Accuracy: 35.45%


Epoch 5/ train_count = 35
Epoch 5/ train_count = 70
Epoch 5/ train_count = 105
Epoch 5/ train_count = 140
Epoch 5/ train_count = 175
Epoch 5/ train_count = 210
Epoch 5/ train_count = 245
Epoch 5/ train_count = 280
Epoch 5/ train_count = 315
Epoch 5/ train_count = 350
Epoch 5/ train_count = 385
Epoch 5/ train_count = 420
Epoch 5/ train_count = 455
Epoch 5/ train_count = 490
Epoch 5/ train_count = 525
Epoch 5/ train_count = 560
Epoch 5/ train_count = 595
Epoch 5/ train_count = 630
Epoch 5/ train_count = 665
Epoch 5/ train_count = 700
Epoch 5/ train_count = 735
Epoch 5/ train_count = 770
Ep

  6%|▌         | 6/100 [04:01<1:03:03, 40.25s/it]

tensor([ 3, 10,  2, 10,  4,  3,  2, 10,  3,  4,  3,  4, 10,  9,  5,  4,  3,  3,
        10, 10, 10, 10,  9, 10,  6,  3, 10,  5,  5,  6,  0,  0, 10,  0,  2],
       device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
Validation Loss: 2.3136, Accuracy: 33.38%


Epoch 6/ train_count = 35
Epoch 6/ train_count = 70
Epoch 6/ train_count = 105
Epoch 6/ train_count = 140
Epoch 6/ train_count = 175
Epoch 6/ train_count = 210
Epoch 6/ train_count = 245
Epoch 6/ train_count = 280
Epoch 6/ train_count = 315
Epoch 6/ train_count = 350
Epoch 6/ train_count = 385
Epoch 6/ train_count = 420
Epoch 6/ train_count = 455
Epoch 6/ train_count = 490
Epoch 6/ train_count = 525
Epoch 6/ train_count = 560
Epoch 6/ train_count = 595
Epoch 6/ train_count = 630
Epoch 6/ train_count = 665
Epoch 6/ train_count = 700
Epoch 6/ train_count = 735
Epoch 6/ train_count = 770
Ep

  7%|▋         | 7/100 [04:41<1:02:25, 40.28s/it]

tensor([ 3,  5,  3, 10, 10,  0,  5, 10,  3,  0,  3,  3,  3,  9,  3, 10,  3,  3,
        10, 10, 10, 10, 10,  0,  7,  3, 10,  3,  5,  3,  0,  7, 10,  0,  3],
       device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
Validation Loss: 2.7850, Accuracy: 33.12%


Epoch 7/ train_count = 35
Epoch 7/ train_count = 70
Epoch 7/ train_count = 105
Epoch 7/ train_count = 140
Epoch 7/ train_count = 175
Epoch 7/ train_count = 210
Epoch 7/ train_count = 245
Epoch 7/ train_count = 280
Epoch 7/ train_count = 315
Epoch 7/ train_count = 350
Epoch 7/ train_count = 385
Epoch 7/ train_count = 420
Epoch 7/ train_count = 455
Epoch 7/ train_count = 490
Epoch 7/ train_count = 525
Epoch 7/ train_count = 560
Epoch 7/ train_count = 595
Epoch 7/ train_count = 630
Epoch 7/ train_count = 665
Epoch 7/ train_count = 700
Epoch 7/ train_count = 735
Epoch 7/ train_count = 770
Ep

  8%|▊         | 8/100 [05:21<1:01:48, 40.31s/it]

tensor([ 3, 10,  2, 10, 10,  3,  2, 10, 10,  4,  3,  3, 10, 10, 10, 10,  3, 10,
        10, 10, 10, 10, 10, 10,  3,  3, 10,  3, 10,  3,  9,  7, 10, 10, 10],
       device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
Validation Loss: 2.8431, Accuracy: 34.68%


Epoch 8/ train_count = 35
Epoch 8/ train_count = 70
Epoch 8/ train_count = 105
Epoch 8/ train_count = 140
Epoch 8/ train_count = 175
Epoch 8/ train_count = 210
Epoch 8/ train_count = 245
Epoch 8/ train_count = 280
Epoch 8/ train_count = 315
Epoch 8/ train_count = 350
Epoch 8/ train_count = 385
Epoch 8/ train_count = 420
Epoch 8/ train_count = 455
Epoch 8/ train_count = 490
Epoch 8/ train_count = 525
Epoch 8/ train_count = 560
Epoch 8/ train_count = 595
Epoch 8/ train_count = 630
Epoch 8/ train_count = 665
Epoch 8/ train_count = 700
Epoch 8/ train_count = 735
Epoch 8/ train_count = 770
Ep

  9%|▉         | 9/100 [06:02<1:01:08, 40.31s/it]

tensor([ 3, 10,  2, 10, 10, 10, 10, 10, 10,  3,  3,  3,  3,  3, 10, 10,  3, 10,
        10, 10, 10, 10, 10, 10,  6,  3, 10,  3,  3,  6,  0,  7, 10,  0, 10],
       device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
Validation Loss: 3.8966, Accuracy: 35.32%


Epoch 9/ train_count = 35
Epoch 9/ train_count = 70
Epoch 9/ train_count = 105
Epoch 9/ train_count = 140
Epoch 9/ train_count = 175
Epoch 9/ train_count = 210
Epoch 9/ train_count = 245
Epoch 9/ train_count = 280
Epoch 9/ train_count = 315
Epoch 9/ train_count = 350
Epoch 9/ train_count = 385
Epoch 9/ train_count = 420
Epoch 9/ train_count = 455
Epoch 9/ train_count = 490
Epoch 9/ train_count = 525
Epoch 9/ train_count = 560
Epoch 9/ train_count = 595
Epoch 9/ train_count = 630
Epoch 9/ train_count = 665
Epoch 9/ train_count = 700
Epoch 9/ train_count = 735
Epoch 9/ train_count = 770
Ep

 10%|█         | 10/100 [06:42<1:00:29, 40.33s/it]

tensor([ 5, 10,  2, 10,  4,  3, 10, 10, 10,  4,  3,  0, 10, 10, 10, 10,  3, 10,
        10, 10, 10, 10, 10, 10,  6,  3,  7,  5,  5,  3,  0,  7, 10,  0, 10],
       device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
Validation Loss: 4.2200, Accuracy: 34.94%


Epoch 10/ train_count = 35
Epoch 10/ train_count = 70
Epoch 10/ train_count = 105
Epoch 10/ train_count = 140
Epoch 10/ train_count = 175
Epoch 10/ train_count = 210
Epoch 10/ train_count = 245
Epoch 10/ train_count = 280
Epoch 10/ train_count = 315
Epoch 10/ train_count = 350
Epoch 10/ train_count = 385
Epoch 10/ train_count = 420
Epoch 10/ train_count = 455
Epoch 10/ train_count = 490
Epoch 10/ train_count = 525
Epoch 10/ train_count = 560
Epoch 10/ train_count = 595
Epoch 10/ train_count = 630
Epoch 10/ train_count = 665
Epoch 10/ train_count = 700
Epoch 10/ train_count = 735
Epoch 10

 11%|█         | 11/100 [07:22<59:47, 40.31s/it]  

tensor([ 5, 10,  2, 10, 10,  3,  2, 10, 10,  3,  3,  3, 10,  3, 10, 10,  3, 10,
        10, 10, 10, 10, 10, 10,  3,  3, 10,  3, 10,  3,  0,  7, 10,  0, 10],
       device='cuda:0')
tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       device='cuda:0')
Validation Loss: 4.6471, Accuracy: 34.29%


Epoch 11/ train_count = 35
Epoch 11/ train_count = 70
Epoch 11/ train_count = 105
Epoch 11/ train_count = 140
Epoch 11/ train_count = 175
Epoch 11/ train_count = 210
Epoch 11/ train_count = 245
Epoch 11/ train_count = 280
Epoch 11/ train_count = 315
Epoch 11/ train_count = 350
Epoch 11/ train_count = 385
Epoch 11/ train_count = 420
Epoch 11/ train_count = 455
Epoch 11/ train_count = 490
Epoch 11/ train_count = 525
Epoch 11/ train_count = 560
Epoch 11/ train_count = 595
Epoch 11/ train_count = 630
Epoch 11/ train_count = 665
Epoch 11/ train_count = 700
Epoch 11/ train_count = 735
Epoch 11

 11%|█         | 11/100 [07:36<1:01:35, 41.53s/it]


KeyboardInterrupt: 