<a href="https://colab.research.google.com/github/Ting-Wei-Chang626/Primary_AI/blob/main/demo_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Animal sound classification**

Load neccessary module

In [20]:
import pandas as pd
import librosa
import numpy as np
import matplotlib.pyplot as plt
import librosa.display
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import os

Transform audio to images

In [21]:
def get_melspectrogram_db(file_path, sr=None, n_fft=2048, hop_length=512, n_mels=128, fmin=20, fmax=8300, top_db=80):
    wav,sr = librosa.load(file_path,sr=sr)
    if wav.shape[0]<5*sr:
        wav=np.pad(wav,int(np.ceil((5*sr-wav.shape[0])/2)),mode='reflect')
    else:
        wav=wav[:5*sr]
    spec=librosa.feature.melspectrogram(y=wav, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels,fmin=fmin,fmax=fmax)
    spec_db=librosa.power_to_db(spec,top_db=top_db)

    return spec_db


def spec_to_image(spec, eps=1e-6):
    mean = spec.mean()
    std = spec.std()
    spec_norm = (spec - mean) / (std + eps)
    spec_min, spec_max = spec_norm.min(), spec_norm.max()
    spec_scaled = 255 * (spec_norm - spec_min) / (spec_max - spec_min)
    spec_scaled = spec_scaled.astype(np.uint8)

    return spec_scaled

mount google drive

In [22]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Create ESC50 data format

In [23]:
class ESC50Data(Dataset):
    def __init__(self, base, df, in_col, out_col):
        self.df = df
        self.data = []
        self.labels = []
        self.c2i={}
        self.i2c={}
        self.categories = sorted(df[out_col].unique())
        for i, category in enumerate(self.categories):
            self.c2i[category]=i
            self.i2c[i]=category

        for ind in tqdm(range(len(df))):
            row = df.iloc[ind]
            file_path = os.path.join(base,row[in_col])
            self.data.append(spec_to_image(get_melspectrogram_db(file_path))[np.newaxis,...])
            self.labels.append(self.c2i[row['category']])

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

split dataset into training and validation

In [24]:
df = pd.read_csv('/content/drive/MyDrive/NTUCE_AI_worshop_0629/ESC-50/meta/esc50_animal.csv')
df.head()

train = df[df['fold']!=5]
valid = df[df['fold']==5]

train_data = ESC50Data('/content/drive/MyDrive/NTUCE_AI_worshop_0629/ESC-50/audio', train, 'filename', 'category')
valid_data = ESC50Data('/content/drive/MyDrive/NTUCE_AI_worshop_0629/ESC-50/audio', valid, 'filename', 'category')
train_loader = DataLoader(train_data, batch_size=4, shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=4, shuffle=True)


100%|██████████| 320/320 [00:10<00:00, 30.52it/s]
100%|██████████| 80/80 [00:03<00:00, 26.57it/s]


In [25]:
import pickle
with open('indtocat.pkl','wb') as f:
  pickle.dump(train_data.i2c, f)

Create the Convolutional Neural Network

In [26]:
import torch.nn as nn
import torch.nn.functional as F

class ESC50Model(nn.Module):
    def __init__(self, input_shape, batch_size=16, num_cats=10):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size = 3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 32, kernel_size = 3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(32)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(64)
        self.conv4 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm2d(64)
        self.conv5 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.bn5 = nn.BatchNorm2d(128)
        self.conv6 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
        self.bn6 = nn.BatchNorm2d(128)
        self.conv7 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.bn7 = nn.BatchNorm2d(256)
        self.conv8 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
        self.bn8 = nn.BatchNorm2d(256)
        self.dense1 = nn.Linear(256*(((input_shape[1]//2)//2)//2)*(((input_shape[2]//2)//2)//2),500)
        self.dropout = nn.Dropout(0.5)
        self.dense2 = nn.Linear(500, num_cats)
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(self.bn1(x))
        x = self.conv2(x)
        x = F.relu(self.bn2(x))
        x = F.max_pool2d(x, kernel_size=2)
        x = self.conv3(x)
        x = F.relu(self.bn3(x))
        x = self.conv4(x)
        x = F.relu(self.bn4(x))
        x = F.max_pool2d(x, kernel_size=2)
        x = self.conv5(x)
        x = F.relu(self.bn5(x))
        x = self.conv6(x)
        x = F.relu(self.bn6(x))
        x = F.max_pool2d(x, kernel_size=2)
        x = self.conv7(x)
        x = F.relu(self.bn7(x))
        x = self.conv8(x)
        x = F.relu(self.bn8(x))
        x = x.view(x.size(0),-1)
        x = F.relu(self.dense1(x))
        x = self.dropout(x)
        x = self.dense2(x)
        return x

In [27]:
if torch.cuda.is_available():
    device=torch.device('cuda:0')
else:
    device=torch.device('cpu')

model = ESC50Model(input_shape=(1,128,431), num_cats=10).to(device)

In [28]:
learning_rate = 2e-4
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
epochs = 60
loss_fn = nn.CrossEntropyLoss()
train_losses=[]
valid_losses=[]

Declare training process

In [29]:
def setlr(optimizer, lr):
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr
    return optimizer


def lr_decay(optimizer, epoch):
    if epoch%10==0:
        new_lr = learning_rate / (10**(epoch//10))
        optimizer = setlr(optimizer, new_lr)
        print(f'Changed learning rate to {new_lr}')
    return optimizer

def train(model, loss_fn, train_loader, valid_loader, epochs, optimizer, train_losses, valid_losses, change_lr=None):
    for epoch in tqdm(range(1,epochs+1)):
        model.train()
        batch_losses=[]

        if change_lr:
            optimizer = change_lr(optimizer, epoch)

        for i, data in enumerate(train_loader):
            x, y = data
            # optimizer.zero_grad()
            x = x.to(device, dtype=torch.float32)
            y = y.to(device, dtype=torch.long)
            y_hat = model(x)
            loss = loss_fn(y_hat, y)
            loss.backward()
            batch_losses.append(loss.item())

            if i%5==0:
            # if True:
                optimizer.step()        # update parameters of net
                optimizer.zero_grad()

        train_losses.append(batch_losses)
        print(f'Epoch - {epoch} Train-Loss : {np.mean(train_losses[-1])}')
        model.eval()
        batch_losses=[]
        trace_y = []
        trace_yhat = []

        for i, data in enumerate(valid_loader):
            x, y = data
            x = x.to(device, dtype=torch.float32)
            y = y.to(device, dtype=torch.long)
            y_hat = model(x)
            loss = loss_fn(y_hat, y)
            trace_y.append(y.cpu().detach().numpy())
            trace_yhat.append(y_hat.cpu().detach().numpy())
            batch_losses.append(loss.item())

        valid_losses.append(batch_losses)
        trace_y = np.concatenate(trace_y)
        trace_yhat = np.concatenate(trace_yhat)
        accuracy = np.mean(trace_yhat.argmax(axis=1)==trace_y)
        print(f'Epoch - {epoch} Valid-Loss : {np.mean(valid_losses[-1])} Valid-Accuracy : {accuracy}')
    return model

Start training

In [30]:
model_ = train(model, loss_fn, train_loader, valid_loader, epochs, optimizer, train_losses, valid_losses, lr_decay)

torch.save(model_, "/content/drive/MyDrive/NTUCE_AI_worshop_0629/demo.pth")

  0%|          | 0/60 [00:00<?, ?it/s]

Epoch - 1 Train-Loss : 8.493129592816558


  2%|▏         | 1/60 [00:05<04:58,  5.06s/it]

Epoch - 1 Valid-Loss : 1.9350367470644414 Valid-Accuracy : 0.4125
Epoch - 2 Train-Loss : 1.7215860944706947


  3%|▎         | 2/60 [00:10<04:52,  5.04s/it]

Epoch - 2 Valid-Loss : 1.4349197069881483 Valid-Accuracy : 0.5375
Epoch - 3 Train-Loss : 1.1806510666850953


  5%|▌         | 3/60 [00:15<04:47,  5.05s/it]

Epoch - 3 Valid-Loss : 1.2770167865906843 Valid-Accuracy : 0.575
Epoch - 4 Train-Loss : 0.8439261434134095


  7%|▋         | 4/60 [00:20<04:42,  5.04s/it]

Epoch - 4 Valid-Loss : 1.3974287456949241 Valid-Accuracy : 0.6
Epoch - 5 Train-Loss : 0.7353332173100625


  8%|▊         | 5/60 [00:25<04:37,  5.04s/it]

Epoch - 5 Valid-Loss : 1.3160641859617954 Valid-Accuracy : 0.575
Epoch - 6 Train-Loss : 0.5723331836865327


 10%|█         | 6/60 [00:30<04:32,  5.05s/it]

Epoch - 6 Valid-Loss : 1.2420763378759148 Valid-Accuracy : 0.6125
Epoch - 7 Train-Loss : 0.5088089760120397


 12%|█▏        | 7/60 [00:35<04:27,  5.05s/it]

Epoch - 7 Valid-Loss : 1.354181602411336 Valid-Accuracy : 0.625
Epoch - 8 Train-Loss : 0.5056536415680398


 13%|█▎        | 8/60 [00:40<04:23,  5.06s/it]

Epoch - 8 Valid-Loss : 1.3374292193664588 Valid-Accuracy : 0.625
Epoch - 9 Train-Loss : 0.4407160873066346


 15%|█▌        | 9/60 [00:45<04:18,  5.06s/it]

Epoch - 9 Valid-Loss : 1.7929820910048675 Valid-Accuracy : 0.6125
Changed learning rate to 2e-05
Epoch - 10 Train-Loss : 0.36791622847663863


 17%|█▋        | 10/60 [00:50<04:13,  5.06s/it]

Epoch - 10 Valid-Loss : 1.483306749721669 Valid-Accuracy : 0.625
Epoch - 11 Train-Loss : 0.2533161813460934


 18%|█▊        | 11/60 [00:55<04:08,  5.06s/it]

Epoch - 11 Valid-Loss : 1.323087801359361 Valid-Accuracy : 0.6
Epoch - 12 Train-Loss : 0.21424184123293344


 20%|██        | 12/60 [01:00<04:02,  5.06s/it]

Epoch - 12 Valid-Loss : 1.266890344401554 Valid-Accuracy : 0.6625
Epoch - 13 Train-Loss : 0.20947291978456378


 22%|██▏       | 13/60 [01:05<03:57,  5.06s/it]

Epoch - 13 Valid-Loss : 1.2149267314474854 Valid-Accuracy : 0.7
Epoch - 14 Train-Loss : 0.1710885206892602


 23%|██▎       | 14/60 [01:10<03:52,  5.06s/it]

Epoch - 14 Valid-Loss : 1.2172850457311142 Valid-Accuracy : 0.65
Epoch - 15 Train-Loss : 0.1489089777305317


 25%|██▌       | 15/60 [01:15<03:47,  5.05s/it]

Epoch - 15 Valid-Loss : 1.2867649042644131 Valid-Accuracy : 0.6375
Epoch - 16 Train-Loss : 0.1334990657319924


 27%|██▋       | 16/60 [01:20<03:42,  5.05s/it]

Epoch - 16 Valid-Loss : 1.3229306477043792 Valid-Accuracy : 0.6375
Epoch - 17 Train-Loss : 0.1278062480861802


 28%|██▊       | 17/60 [01:25<03:37,  5.06s/it]

Epoch - 17 Valid-Loss : 1.2766155139186595 Valid-Accuracy : 0.6375
Epoch - 18 Train-Loss : 0.11373467972040867


 30%|███       | 18/60 [01:31<03:34,  5.12s/it]

Epoch - 18 Valid-Loss : 1.362642686048639 Valid-Accuracy : 0.6375
Epoch - 19 Train-Loss : 0.12543111271448523


 32%|███▏      | 19/60 [01:36<03:28,  5.10s/it]

Epoch - 19 Valid-Loss : 1.359461616347835 Valid-Accuracy : 0.6625
Changed learning rate to 2e-06
Epoch - 20 Train-Loss : 0.12358506129994566


 33%|███▎      | 20/60 [01:41<03:23,  5.08s/it]

Epoch - 20 Valid-Loss : 1.3216056285050855 Valid-Accuracy : 0.6625
Epoch - 21 Train-Loss : 0.11399698858657992


 35%|███▌      | 21/60 [01:46<03:17,  5.07s/it]

Epoch - 21 Valid-Loss : 1.2377858286876289 Valid-Accuracy : 0.6875
Epoch - 22 Train-Loss : 0.09444553621974308


 37%|███▋      | 22/60 [01:51<03:12,  5.07s/it]

Epoch - 22 Valid-Loss : 1.293869469408446 Valid-Accuracy : 0.7
Epoch - 23 Train-Loss : 0.11624523916228284


 38%|███▊      | 23/60 [01:56<03:07,  5.06s/it]

Epoch - 23 Valid-Loss : 1.3487008146817119 Valid-Accuracy : 0.6875
Epoch - 24 Train-Loss : 0.08130658948346925


 40%|████      | 24/60 [02:01<03:02,  5.07s/it]

Epoch - 24 Valid-Loss : 1.26744590160597 Valid-Accuracy : 0.6875
Epoch - 25 Train-Loss : 0.10231525938816828


 42%|████▏     | 25/60 [02:06<02:57,  5.06s/it]

Epoch - 25 Valid-Loss : 1.2770140046879532 Valid-Accuracy : 0.675
Epoch - 26 Train-Loss : 0.0844742519351099


 43%|████▎     | 26/60 [02:11<02:51,  5.06s/it]

Epoch - 26 Valid-Loss : 1.3642615495660722 Valid-Accuracy : 0.7125
Epoch - 27 Train-Loss : 0.08858760368479748


 45%|████▌     | 27/60 [02:16<02:47,  5.06s/it]

Epoch - 27 Valid-Loss : 1.3307039232768147 Valid-Accuracy : 0.6625
Epoch - 28 Train-Loss : 0.09324125449265282


 47%|████▋     | 28/60 [02:21<02:41,  5.06s/it]

Epoch - 28 Valid-Loss : 1.2790608533300656 Valid-Accuracy : 0.6875
Epoch - 29 Train-Loss : 0.10022753975508322


 48%|████▊     | 29/60 [02:26<02:36,  5.05s/it]

Epoch - 29 Valid-Loss : 1.3576593297859005 Valid-Accuracy : 0.6375
Changed learning rate to 2.0000000000000002e-07
Epoch - 30 Train-Loss : 0.09842776331800271


 50%|█████     | 30/60 [02:31<02:31,  5.06s/it]

Epoch - 30 Valid-Loss : 1.3131596347391223 Valid-Accuracy : 0.65
Epoch - 31 Train-Loss : 0.10826199801389284


 52%|█████▏    | 31/60 [02:37<02:28,  5.10s/it]

Epoch - 31 Valid-Loss : 1.3851116275614914 Valid-Accuracy : 0.7125
Epoch - 32 Train-Loss : 0.08514491024250663


 53%|█████▎    | 32/60 [02:42<02:22,  5.09s/it]

Epoch - 32 Valid-Loss : 1.3328962693856738 Valid-Accuracy : 0.7125
Epoch - 33 Train-Loss : 0.08475191529516053


 55%|█████▌    | 33/60 [02:47<02:17,  5.08s/it]

Epoch - 33 Valid-Loss : 1.3462011045529834 Valid-Accuracy : 0.7
Epoch - 34 Train-Loss : 0.09470912946707806


 57%|█████▋    | 34/60 [02:52<02:11,  5.07s/it]

Epoch - 34 Valid-Loss : 1.325675886823035 Valid-Accuracy : 0.65
Epoch - 35 Train-Loss : 0.1331739904822058


 58%|█████▊    | 35/60 [02:57<02:06,  5.07s/it]

Epoch - 35 Valid-Loss : 1.3118050499288074 Valid-Accuracy : 0.7125
Epoch - 36 Train-Loss : 0.09474600929682815


 60%|██████    | 36/60 [03:02<02:01,  5.06s/it]

Epoch - 36 Valid-Loss : 1.350733168498118 Valid-Accuracy : 0.6875
Epoch - 37 Train-Loss : 0.10468612241186008


 62%|██████▏   | 37/60 [03:07<01:56,  5.05s/it]

Epoch - 37 Valid-Loss : 1.284257869045541 Valid-Accuracy : 0.7125
Epoch - 38 Train-Loss : 0.09086717828040491


 63%|██████▎   | 38/60 [03:12<01:51,  5.06s/it]

Epoch - 38 Valid-Loss : 1.3256546022399562 Valid-Accuracy : 0.6875
Epoch - 39 Train-Loss : 0.11330375588763628


 65%|██████▌   | 39/60 [03:17<01:46,  5.05s/it]

Epoch - 39 Valid-Loss : 1.309082139865859 Valid-Accuracy : 0.7125
Changed learning rate to 2e-08
Epoch - 40 Train-Loss : 0.10736231474034526


 67%|██████▋   | 40/60 [03:22<01:40,  5.05s/it]

Epoch - 40 Valid-Loss : 1.3257270440157298 Valid-Accuracy : 0.6875
Epoch - 41 Train-Loss : 0.08430666087355121


 68%|██████▊   | 41/60 [03:27<01:35,  5.05s/it]

Epoch - 41 Valid-Loss : 1.3249640345793978 Valid-Accuracy : 0.7125
Epoch - 42 Train-Loss : 0.09653504038005281


 70%|███████   | 42/60 [03:32<01:30,  5.05s/it]

Epoch - 42 Valid-Loss : 1.3154439197558077 Valid-Accuracy : 0.725
Epoch - 43 Train-Loss : 0.09271794050333995


 72%|███████▏  | 43/60 [03:37<01:25,  5.05s/it]

Epoch - 43 Valid-Loss : 1.2890141234514885 Valid-Accuracy : 0.7
Epoch - 44 Train-Loss : 0.10273094085767127


 73%|███████▎  | 44/60 [03:42<01:20,  5.05s/it]

Epoch - 44 Valid-Loss : 1.3549760990758841 Valid-Accuracy : 0.7125
Epoch - 45 Train-Loss : 0.07850382683723778


 75%|███████▌  | 45/60 [03:47<01:15,  5.05s/it]

Epoch - 45 Valid-Loss : 1.3454785141617323 Valid-Accuracy : 0.7
Epoch - 46 Train-Loss : 0.11741589251253118


 77%|███████▋  | 46/60 [03:52<01:10,  5.06s/it]

Epoch - 46 Valid-Loss : 1.4116640263811462 Valid-Accuracy : 0.625
Epoch - 47 Train-Loss : 0.10615196976637549


 78%|███████▊  | 47/60 [03:57<01:05,  5.05s/it]

Epoch - 47 Valid-Loss : 1.3134814374749113 Valid-Accuracy : 0.7
Epoch - 48 Train-Loss : 0.11077922075365905


 80%|████████  | 48/60 [04:02<01:00,  5.05s/it]

Epoch - 48 Valid-Loss : 1.3525448404092457 Valid-Accuracy : 0.7125
Epoch - 49 Train-Loss : 0.07823821543413043


 82%|████████▏ | 49/60 [04:08<00:55,  5.06s/it]

Epoch - 49 Valid-Loss : 1.3495325909447273 Valid-Accuracy : 0.6875
Changed learning rate to 2e-09
Epoch - 50 Train-Loss : 0.0851294674339826


 83%|████████▎ | 50/60 [04:13<00:50,  5.05s/it]

Epoch - 50 Valid-Loss : 1.3052898573796483 Valid-Accuracy : 0.6875
Epoch - 51 Train-Loss : 0.10134056864427236


 85%|████████▌ | 51/60 [04:18<00:45,  5.06s/it]

Epoch - 51 Valid-Loss : 1.3572737872633298 Valid-Accuracy : 0.7
Epoch - 52 Train-Loss : 0.11254439152296705


 87%|████████▋ | 52/60 [04:23<00:40,  5.06s/it]

Epoch - 52 Valid-Loss : 1.3278115064138547 Valid-Accuracy : 0.7125
Epoch - 53 Train-Loss : 0.0661794289337024


 88%|████████▊ | 53/60 [04:28<00:35,  5.05s/it]

Epoch - 53 Valid-Loss : 1.3498556667978847 Valid-Accuracy : 0.675
Epoch - 54 Train-Loss : 0.10498405421897328


 90%|█████████ | 54/60 [04:33<00:30,  5.06s/it]

Epoch - 54 Valid-Loss : 1.324150443168128 Valid-Accuracy : 0.7125
Epoch - 55 Train-Loss : 0.10507870586449082


 92%|█████████▏| 55/60 [04:38<00:25,  5.05s/it]

Epoch - 55 Valid-Loss : 1.2933068805876247 Valid-Accuracy : 0.7
Epoch - 56 Train-Loss : 0.09254022259409674


 93%|█████████▎| 56/60 [04:43<00:20,  5.05s/it]

Epoch - 56 Valid-Loss : 1.3248214033006662 Valid-Accuracy : 0.7
Epoch - 57 Train-Loss : 0.13267455208329865


 95%|█████████▌| 57/60 [04:48<00:15,  5.06s/it]

Epoch - 57 Valid-Loss : 1.2808835001901797 Valid-Accuracy : 0.7125
Epoch - 58 Train-Loss : 0.12574023467073178


 97%|█████████▋| 58/60 [04:53<00:10,  5.05s/it]

Epoch - 58 Valid-Loss : 1.3521463480727107 Valid-Accuracy : 0.6875
Epoch - 59 Train-Loss : 0.10977872155439457


 98%|█████████▊| 59/60 [04:58<00:05,  5.06s/it]

Epoch - 59 Valid-Loss : 1.3360229551723024 Valid-Accuracy : 0.7
Changed learning rate to 2e-10
Epoch - 60 Train-Loss : 0.08104758621558963


100%|██████████| 60/60 [05:03<00:00,  5.06s/it]

Epoch - 60 Valid-Loss : 1.31522182224071 Valid-Accuracy : 0.675





In [31]:
test_path = "/content/drive/MyDrive/NTUCE_AI_worshop_0629/1-18757-A-4.wav"
test_data = spec_to_image(get_melspectrogram_db(test_path))


In [32]:
with open('indtocat.pkl','rb') as f:
  indtocat = pickle.load(f)

In [33]:
indtocat

{0: 'cat',
 1: 'cow',
 2: 'crow',
 3: 'dog',
 4: 'frog',
 5: 'hen',
 6: 'insects',
 7: 'pig',
 8: 'rooster',
 9: 'sheep'}

In [34]:
model_.eval()
spec_t = torch.tensor(test_data).to(device, dtype=torch.float32)
pr = model_.forward(spec_t.reshape(1,1,*spec_t.shape))
print(pr)

ind = pr.argmax(dim=1).cpu().detach().numpy().ravel()[0]
print(indtocat[ind])

tensor([[ 0.2131, -0.4615,  1.6313,  0.7894, 11.0219,  0.2985, -1.8602, -5.0685,
          0.2469, -3.2826]], device='cuda:0', grad_fn=<AddmmBackward0>)
frog


Utilize the prtrained torchvision model

In [35]:
from torchvision.models import resnet34

resnet_model = resnet34(pretrained=True)
resnet_model.fc = nn.Linear(512,10)
resnet_model.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
resnet_model = resnet_model.to(device)



In [36]:
learning_rate = 2e-4
optimizer = optim.Adam(resnet_model.parameters(), lr=learning_rate)
epochs = 60
loss_fn = nn.CrossEntropyLoss()
resnet_train_losses=[]
resnet_valid_losses=[]

model_res = train(resnet_model, loss_fn, train_loader, valid_loader, epochs, optimizer, resnet_train_losses, resnet_valid_losses, lr_decay)
torch.save(model_res, "/content/drive/MyDrive/NTUCE_AI_worshop_0629/demo_resnet.pth")

  0%|          | 0/60 [00:00<?, ?it/s]

Epoch - 1 Train-Loss : 1.8935139752924441


  2%|▏         | 1/60 [00:04<04:07,  4.19s/it]

Epoch - 1 Valid-Loss : 1.2624369056895375 Valid-Accuracy : 0.5875
Epoch - 2 Train-Loss : 1.2317240949720145


  3%|▎         | 2/60 [00:08<04:11,  4.33s/it]

Epoch - 2 Valid-Loss : 0.6645523422863334 Valid-Accuracy : 0.8125
Epoch - 3 Train-Loss : 0.9237326917238533


  5%|▌         | 3/60 [00:13<04:09,  4.38s/it]

Epoch - 3 Valid-Loss : 0.7578559407647845 Valid-Accuracy : 0.775
Epoch - 4 Train-Loss : 0.8435042890720069


  7%|▋         | 4/60 [00:17<03:58,  4.27s/it]

Epoch - 4 Valid-Loss : 0.6192652500628355 Valid-Accuracy : 0.75
Epoch - 5 Train-Loss : 0.5114855136023835


  8%|▊         | 5/60 [00:21<03:55,  4.28s/it]

Epoch - 5 Valid-Loss : 0.7241851273009914 Valid-Accuracy : 0.775
Epoch - 6 Train-Loss : 0.44501048035454005


 10%|█         | 6/60 [00:25<03:46,  4.20s/it]

Epoch - 6 Valid-Loss : 0.5626015957045638 Valid-Accuracy : 0.8375
Epoch - 7 Train-Loss : 0.49061514841159803


 12%|█▏        | 7/60 [00:29<03:39,  4.14s/it]

Epoch - 7 Valid-Loss : 0.5449572647080003 Valid-Accuracy : 0.8
Epoch - 8 Train-Loss : 0.43951829060679304


 13%|█▎        | 8/60 [00:33<03:36,  4.17s/it]

Epoch - 8 Valid-Loss : 0.46264521556413457 Valid-Accuracy : 0.8875
Epoch - 9 Train-Loss : 0.4261244320194237


 15%|█▌        | 9/60 [00:37<03:30,  4.13s/it]

Epoch - 9 Valid-Loss : 0.9544681025781756 Valid-Accuracy : 0.8125
Changed learning rate to 2e-05
Epoch - 10 Train-Loss : 0.3545800596126355


 17%|█▋        | 10/60 [00:41<03:25,  4.10s/it]

Epoch - 10 Valid-Loss : 0.655398366236841 Valid-Accuracy : 0.8
Epoch - 11 Train-Loss : 0.30735697483178226


 18%|█▊        | 11/60 [00:46<03:22,  4.13s/it]

Epoch - 11 Valid-Loss : 0.4877796083589601 Valid-Accuracy : 0.85
Epoch - 12 Train-Loss : 0.28536321729188785


 20%|██        | 12/60 [00:50<03:18,  4.15s/it]

Epoch - 12 Valid-Loss : 0.438315633083425 Valid-Accuracy : 0.8625
Epoch - 13 Train-Loss : 0.23652501582400873


 22%|██▏       | 13/60 [00:54<03:12,  4.11s/it]

Epoch - 13 Valid-Loss : 0.47925437674228843 Valid-Accuracy : 0.8375
Epoch - 14 Train-Loss : 0.2772375236847438


 23%|██▎       | 14/60 [00:58<03:08,  4.11s/it]

Epoch - 14 Valid-Loss : 0.4349103530514299 Valid-Accuracy : 0.85
Epoch - 15 Train-Loss : 0.270326972019393


 25%|██▌       | 15/60 [01:03<03:13,  4.30s/it]

Epoch - 15 Valid-Loss : 0.3885332172209644 Valid-Accuracy : 0.8875
Epoch - 16 Train-Loss : 0.28911242869216947


 27%|██▋       | 16/60 [01:07<03:05,  4.21s/it]

Epoch - 16 Valid-Loss : 0.42282330233088034 Valid-Accuracy : 0.875
Epoch - 17 Train-Loss : 0.20431080295238643


 28%|██▊       | 17/60 [01:11<02:58,  4.16s/it]

Epoch - 17 Valid-Loss : 0.4090720707502896 Valid-Accuracy : 0.8625
Epoch - 18 Train-Loss : 0.19430840185377746


 30%|███       | 18/60 [01:15<02:56,  4.20s/it]

Epoch - 18 Valid-Loss : 0.3925555025959511 Valid-Accuracy : 0.8875
Epoch - 19 Train-Loss : 0.21534940546262077


 32%|███▏      | 19/60 [01:19<02:50,  4.15s/it]

Epoch - 19 Valid-Loss : 0.3736626130415971 Valid-Accuracy : 0.85
Changed learning rate to 2e-06
Epoch - 20 Train-Loss : 0.13140365504659712


 33%|███▎      | 20/60 [01:24<02:55,  4.39s/it]

Epoch - 20 Valid-Loss : 0.4300312008595938 Valid-Accuracy : 0.9
Epoch - 21 Train-Loss : 0.24120848617749288


 35%|███▌      | 21/60 [01:28<02:49,  4.35s/it]

Epoch - 21 Valid-Loss : 0.3610162109334169 Valid-Accuracy : 0.875
Epoch - 22 Train-Loss : 0.288354642665945


 37%|███▋      | 22/60 [01:32<02:41,  4.25s/it]

Epoch - 22 Valid-Loss : 0.34601344612341906 Valid-Accuracy : 0.8875
Epoch - 23 Train-Loss : 0.1832355290884152


 38%|███▊      | 23/60 [01:36<02:34,  4.19s/it]

Epoch - 23 Valid-Loss : 0.33046003287787756 Valid-Accuracy : 0.9
Epoch - 24 Train-Loss : 0.24114081466686912


 40%|████      | 24/60 [01:40<02:31,  4.21s/it]

Epoch - 24 Valid-Loss : 0.3220584530105951 Valid-Accuracy : 0.9125
Epoch - 25 Train-Loss : 0.3222210594045464


 42%|████▏     | 25/60 [01:45<02:25,  4.17s/it]

Epoch - 25 Valid-Loss : 0.46673951055990975 Valid-Accuracy : 0.85
Epoch - 26 Train-Loss : 0.18699998869560658


 43%|████▎     | 26/60 [01:49<02:20,  4.12s/it]

Epoch - 26 Valid-Loss : 0.3607932432807502 Valid-Accuracy : 0.8875
Epoch - 27 Train-Loss : 0.2598874173592776


 45%|████▌     | 27/60 [01:53<02:16,  4.14s/it]

Epoch - 27 Valid-Loss : 0.3716938439946034 Valid-Accuracy : 0.875
Epoch - 28 Train-Loss : 0.17199036452220753


 47%|████▋     | 28/60 [01:57<02:12,  4.15s/it]

Epoch - 28 Valid-Loss : 0.3357804055679935 Valid-Accuracy : 0.8875
Epoch - 29 Train-Loss : 0.24620626842370258


 48%|████▊     | 29/60 [02:01<02:07,  4.11s/it]

Epoch - 29 Valid-Loss : 0.46309173957089345 Valid-Accuracy : 0.8375
Changed learning rate to 2.0000000000000002e-07
Epoch - 30 Train-Loss : 0.263478850806132


 50%|█████     | 30/60 [02:05<02:02,  4.10s/it]

Epoch - 30 Valid-Loss : 0.29903448495829255 Valid-Accuracy : 0.8875
Epoch - 31 Train-Loss : 0.16499294589739294


 52%|█████▏    | 31/60 [02:09<02:00,  4.14s/it]

Epoch - 31 Valid-Loss : 0.37519897585388906 Valid-Accuracy : 0.875
Epoch - 32 Train-Loss : 0.14351988536072896


 53%|█████▎    | 32/60 [02:13<01:54,  4.10s/it]

Epoch - 32 Valid-Loss : 0.4110618869517566 Valid-Accuracy : 0.85
Epoch - 33 Train-Loss : 0.26329009130131453


 55%|█████▌    | 33/60 [02:17<01:49,  4.07s/it]

Epoch - 33 Valid-Loss : 0.394777720986707 Valid-Accuracy : 0.875
Epoch - 34 Train-Loss : 0.17113461225526408


 57%|█████▋    | 34/60 [02:22<01:47,  4.14s/it]

Epoch - 34 Valid-Loss : 0.35991176005659326 Valid-Accuracy : 0.9
Epoch - 35 Train-Loss : 0.2571627179800998


 58%|█████▊    | 35/60 [02:26<01:42,  4.11s/it]

Epoch - 35 Valid-Loss : 0.4328353438882651 Valid-Accuracy : 0.8625
Epoch - 36 Train-Loss : 0.14681710580480284


 60%|██████    | 36/60 [02:30<01:37,  4.08s/it]

Epoch - 36 Valid-Loss : 0.4007037749015808 Valid-Accuracy : 0.8875
Epoch - 37 Train-Loss : 0.20745547486003488


 62%|██████▏   | 37/60 [02:34<01:34,  4.13s/it]

Epoch - 37 Valid-Loss : 0.3339252211651683 Valid-Accuracy : 0.8875
Epoch - 38 Train-Loss : 0.19400559923960828


 63%|██████▎   | 38/60 [02:38<01:30,  4.12s/it]

Epoch - 38 Valid-Loss : 0.3484721782356587 Valid-Accuracy : 0.875
Epoch - 39 Train-Loss : 0.1808655634522438


 65%|██████▌   | 39/60 [02:42<01:25,  4.09s/it]

Epoch - 39 Valid-Loss : 0.45599082273538405 Valid-Accuracy : 0.8875
Changed learning rate to 2e-08
Epoch - 40 Train-Loss : 0.2058391122962348


 67%|██████▋   | 40/60 [02:46<01:22,  4.10s/it]

Epoch - 40 Valid-Loss : 0.36097610380645617 Valid-Accuracy : 0.9125
Epoch - 41 Train-Loss : 0.2531687853334006


 68%|██████▊   | 41/60 [02:50<01:18,  4.13s/it]

Epoch - 41 Valid-Loss : 0.41851947503369047 Valid-Accuracy : 0.8625
Epoch - 42 Train-Loss : 0.1888188556593377


 70%|███████   | 42/60 [02:54<01:13,  4.09s/it]

Epoch - 42 Valid-Loss : 0.38793102233862553 Valid-Accuracy : 0.9
Epoch - 43 Train-Loss : 0.19623367784079165


 72%|███████▏  | 43/60 [02:58<01:09,  4.07s/it]

Epoch - 43 Valid-Loss : 0.38672231195105267 Valid-Accuracy : 0.875
Epoch - 44 Train-Loss : 0.22788413804955782


 73%|███████▎  | 44/60 [03:03<01:06,  4.13s/it]

Epoch - 44 Valid-Loss : 0.40158805135492914 Valid-Accuracy : 0.875
Epoch - 45 Train-Loss : 0.2787574108981062


 75%|███████▌  | 45/60 [03:07<01:01,  4.09s/it]

Epoch - 45 Valid-Loss : 0.32237462753400054 Valid-Accuracy : 0.8875
Epoch - 46 Train-Loss : 0.23766597763751635


 77%|███████▋  | 46/60 [03:11<00:56,  4.07s/it]

Epoch - 46 Valid-Loss : 0.34071061317619067 Valid-Accuracy : 0.875
Epoch - 47 Train-Loss : 0.24501965874806048


 78%|███████▊  | 47/60 [03:15<00:53,  4.13s/it]

Epoch - 47 Valid-Loss : 0.35253394345882044 Valid-Accuracy : 0.8625
Epoch - 48 Train-Loss : 0.20280671139480547


 80%|████████  | 48/60 [03:19<00:49,  4.11s/it]

Epoch - 48 Valid-Loss : 0.41803974038025443 Valid-Accuracy : 0.875
Epoch - 49 Train-Loss : 0.25333342439262196


 82%|████████▏ | 49/60 [03:23<00:44,  4.08s/it]

Epoch - 49 Valid-Loss : 0.3470247536635071 Valid-Accuracy : 0.85
Changed learning rate to 2e-09
Epoch - 50 Train-Loss : 0.1583801112312358


 83%|████████▎ | 50/60 [03:27<00:41,  4.11s/it]

Epoch - 50 Valid-Loss : 0.36793867029795707 Valid-Accuracy : 0.8625
Epoch - 51 Train-Loss : 0.22732701923232526


 85%|████████▌ | 51/60 [03:31<00:37,  4.12s/it]

Epoch - 51 Valid-Loss : 0.4199320650325717 Valid-Accuracy : 0.8375
Epoch - 52 Train-Loss : 0.15636422171955927


 87%|████████▋ | 52/60 [03:35<00:32,  4.09s/it]

Epoch - 52 Valid-Loss : 0.40164618716532907 Valid-Accuracy : 0.8375
Epoch - 53 Train-Loss : 0.20576796054374427


 88%|████████▊ | 53/60 [03:39<00:28,  4.09s/it]

Epoch - 53 Valid-Loss : 0.45742010690918133 Valid-Accuracy : 0.9
Epoch - 54 Train-Loss : 0.18566409453633242


 90%|█████████ | 54/60 [03:44<00:24,  4.13s/it]

Epoch - 54 Valid-Loss : 0.4722889001746459 Valid-Accuracy : 0.8625
Epoch - 55 Train-Loss : 0.14085740789305418


 92%|█████████▏| 55/60 [03:48<00:20,  4.09s/it]

Epoch - 55 Valid-Loss : 0.46122767509625645 Valid-Accuracy : 0.825
Epoch - 56 Train-Loss : 0.15873219208442607


 93%|█████████▎| 56/60 [03:52<00:16,  4.06s/it]

Epoch - 56 Valid-Loss : 0.29562955505207356 Valid-Accuracy : 0.9
Epoch - 57 Train-Loss : 0.14199382541701197


 95%|█████████▌| 57/60 [03:56<00:12,  4.13s/it]

Epoch - 57 Valid-Loss : 0.3777003275713923 Valid-Accuracy : 0.8625
Epoch - 58 Train-Loss : 0.13736653383821248


 97%|█████████▋| 58/60 [04:00<00:08,  4.10s/it]

Epoch - 58 Valid-Loss : 0.40582533076401434 Valid-Accuracy : 0.8625
Epoch - 59 Train-Loss : 0.18686452153488062


 98%|█████████▊| 59/60 [04:04<00:04,  4.08s/it]

Epoch - 59 Valid-Loss : 0.40546989137410494 Valid-Accuracy : 0.875
Changed learning rate to 2e-10
Epoch - 60 Train-Loss : 0.2072351422859356


100%|██████████| 60/60 [04:08<00:00,  4.14s/it]

Epoch - 60 Valid-Loss : 0.36372851276074697 Valid-Accuracy : 0.875





In [37]:
model_res.eval()
pr = model_res.forward(spec_t.reshape(1,1,*spec_t.shape))
print(pr)

ind = pr.argmax(dim=1).cpu().detach().numpy().ravel()[0]
print(indtocat[ind])

tensor([[-4.6115, -1.7040,  0.6630, -0.8509, 12.4408, -1.1918, -1.9656,  0.2346,
         -0.4667, -3.3789]], device='cuda:0', grad_fn=<AddmmBackward0>)
frog


Check paramter number for both models

In [38]:
# for the custom model
total_params = sum(
	param.numel() for param in model_.parameters()
)
trainable_params = sum(
	p.numel() for p in model_.parameters() if p.requires_grad
)
print("-----Our model------")
print("Total parameter number: ", total_params)
print("Trainable parameter number: ", trainable_params)

# for the resnet model
total_params = sum(
	param.numel() for param in model_res.parameters()
)
trainable_params = sum(
	p.numel() for p in model_res.parameters() if p.requires_grad
)

print("-----Resnet model------")
print("Total parameter number: ", total_params)
print("Trainable parameter number: ", trainable_params)



-----Our model------
Total parameter number:  109723110
Trainable parameter number:  109723110
-----Resnet model------
Total parameter number:  21283530
Trainable parameter number:  21283530
