<a href="https://colab.research.google.com/github/Ting-Wei-Chang626/Primary_AI/blob/main/demo_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Animal sound classification**

Load neccessary module

In [None]:
import pandas as pd
import librosa
import numpy as np
import matplotlib.pyplot as plt
import librosa.display
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import os

Transform audio to images

In [None]:
def get_melspectrogram_db(file_path, sr=None, n_fft=2048, hop_length=512, n_mels=128, fmin=20, fmax=8300, top_db=80):
    wav,sr = librosa.load(file_path,sr=sr)
    if wav.shape[0]<5*sr:
        wav=np.pad(wav,int(np.ceil((5*sr-wav.shape[0])/2)),mode='reflect')
    else:
        wav=wav[:5*sr]
    spec=librosa.feature.melspectrogram(y=wav, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels,fmin=fmin,fmax=fmax)
    spec_db=librosa.power_to_db(spec,top_db=top_db)

    return spec_db


def spec_to_image(spec, eps=1e-6):
    mean = spec.mean()
    std = spec.std()
    spec_norm = (spec - mean) / (std + eps)
    spec_min, spec_max = spec_norm.min(), spec_norm.max()
    spec_scaled = 255 * (spec_norm - spec_min) / (spec_max - spec_min)
    spec_scaled = spec_scaled.astype(np.uint8)

    return spec_scaled

mount google drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Create ESC50 data format

In [None]:
class ESC50Data(Dataset):
    def __init__(self, base, df, in_col, out_col):
        self.df = df
        self.data = []
        self.labels = []
        self.c2i={}
        self.i2c={}
        self.categories = sorted(df[out_col].unique())
        for i, category in enumerate(self.categories):
            self.c2i[category]=i
            self.i2c[i]=category

        for ind in tqdm(range(len(df))):
            row = df.iloc[ind]
            file_path = os.path.join(base,row[in_col])
            self.data.append(spec_to_image(get_melspectrogram_db(file_path))[np.newaxis,...])
            self.labels.append(self.c2i[row['category']])

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

split dataset into training and validation

In [None]:
df = pd.read_csv('/content/drive/MyDrive/NTUCE_AI_worshop_0629/ESC-50/meta/esc50_animal.csv')
df.head()

train = df[df['fold']!=5]
valid = df[df['fold']==5]

train_data = ESC50Data('/content/drive/MyDrive/NTUCE_AI_worshop_0629/ESC-50/audio', train, 'filename', 'category')
valid_data = ESC50Data('/content/drive/MyDrive/NTUCE_AI_worshop_0629/ESC-50/audio', valid, 'filename', 'category')
train_loader = DataLoader(train_data, batch_size=4, shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=4, shuffle=True)


100%|██████████| 320/320 [00:19<00:00, 16.48it/s]
100%|██████████| 80/80 [00:02<00:00, 35.15it/s]


In [None]:
import pickle
with open('indtocat.pkl','wb') as f:
  pickle.dump(train_data.i2c, f)

Create the Convolutional Neural Network

In [None]:
import torch.nn as nn
import torch.nn.functional as F

def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)

class ESC50Model(nn.Module):
    def __init__(self, input_shape, num_cats=10):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size = 3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 32, kernel_size = 3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(32)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(64)
        self.conv4 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm2d(64)
        self.conv5 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.bn5 = nn.BatchNorm2d(128)
        self.conv6 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
        self.bn6 = nn.BatchNorm2d(128)
        self.conv7 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.bn7 = nn.BatchNorm2d(256)
        self.conv8 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
        self.bn8 = nn.BatchNorm2d(256)
        self.dense1 = nn.Linear(256*(((input_shape[1]//2)//2)//2)*(((input_shape[2]//2)//2)//2),500)
        self.dropout = nn.Dropout(0.5)
        self.dense2 = nn.Linear(500, num_cats)
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(self.bn1(x))
        x = self.conv2(x)
        x = F.relu(self.bn2(x))
        x = F.max_pool2d(x, kernel_size=2)
        x = self.conv3(x)
        x = F.relu(self.bn3(x))
        x = self.conv4(x)
        x = F.relu(self.bn4(x))
        x = F.max_pool2d(x, kernel_size=2)
        x = self.conv5(x)
        x = F.relu(self.bn5(x))
        x = self.conv6(x)
        x = F.relu(self.bn6(x))
        x = F.max_pool2d(x, kernel_size=2)
        x = self.conv7(x)
        x = F.relu(self.bn7(x))
        x = self.conv8(x)
        x = F.relu(self.bn8(x))
        x = x.view(x.size(0),-1)
        x = F.relu(self.dense1(x))
        x = self.dropout(x)
        x = self.dense2(x)
        return x

In [None]:
if torch.cuda.is_available():
    device=torch.device('cuda:0')
else:
    device=torch.device('cpu')

model = ESC50Model(input_shape=(1,128,431), num_cats=10).to(device)
# model.apply(weights_init)


In [None]:
learning_rate = 2e-4
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
epochs = 60
loss_fn = nn.CrossEntropyLoss()
train_losses=[]
valid_losses=[]

Declare training process

In [None]:
def setlr(optimizer, lr):
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr
    return optimizer


def lr_decay(optimizer, epoch):
    if epoch%10==0:
        new_lr = learning_rate / (10**(epoch//10))
        optimizer = setlr(optimizer, new_lr)
        print(f'Changed learning rate to {new_lr}')
    return optimizer

def train(model, loss_fn, train_loader, valid_loader, epochs, optimizer, train_losses, valid_losses, change_lr=None):
    accuracy_best = 0
    for epoch in tqdm(range(1,epochs+1)):
        model.train()
        batch_losses=[]

        if change_lr:
            optimizer = change_lr(optimizer, epoch)

        for i, data in enumerate(train_loader):
            x, y = data
            # optimizer.zero_grad()
            x = x.to(device, dtype=torch.float32)
            y = y.to(device, dtype=torch.long)
            y_hat = model(x)
            loss = loss_fn(y_hat, y)
            loss.backward()
            batch_losses.append(loss.item())

            if i%5==0:
            # if True:
                optimizer.step()        # update parameters of net
                optimizer.zero_grad()

        train_losses.append(batch_losses)
        print(f'Epoch - {epoch} Train-Loss : {np.mean(train_losses[-1])}')
        model.eval()
        batch_losses=[]
        trace_y = []
        trace_yhat = []

        # Validation
        for i, data in enumerate(valid_loader):
            x, y = data
            x = x.to(device, dtype=torch.float32)
            y = y.to(device, dtype=torch.long)
            y_hat = model(x)
            loss = loss_fn(y_hat, y)
            trace_y.append(y.cpu().detach().numpy())
            trace_yhat.append(y_hat.cpu().detach().numpy())
            batch_losses.append(loss.item())

        valid_losses.append(batch_losses)
        trace_y = np.concatenate(trace_y)
        trace_yhat = np.concatenate(trace_yhat)
        accuracy = np.mean(trace_yhat.argmax(axis=1)==trace_y)
        print(f'Epoch - {epoch} Valid-Loss : {np.mean(valid_losses[-1])} Valid-Accuracy : {accuracy}')
        if accuracy > accuracy_best:
            accuracy_best = accuracy
            torch.save(model, "/content/drive/MyDrive/NTUCE_AI_worshop_0629/demo_best.pth")
    return model

Start training

In [None]:
model_ = train(model, loss_fn, train_loader, valid_loader, epochs, optimizer, train_losses, valid_losses, lr_decay)

torch.save(model_, "/content/drive/MyDrive/NTUCE_AI_worshop_0629/demo_last.pth")

  0%|          | 0/60 [00:00<?, ?it/s]

Epoch - 1 Train-Loss : 8.611197458207608
Epoch - 1 Valid-Loss : 7.08762629032135 Valid-Accuracy : 0.2875


  2%|▏         | 1/60 [00:07<07:24,  7.54s/it]

Epoch - 2 Train-Loss : 3.0811526399105786
Epoch - 2 Valid-Loss : 1.6310142785310746 Valid-Accuracy : 0.525


  3%|▎         | 2/60 [00:13<06:16,  6.49s/it]

Epoch - 3 Train-Loss : 1.330388044565916
Epoch - 3 Valid-Loss : 1.3295795127749443 Valid-Accuracy : 0.575


  5%|▌         | 3/60 [00:18<05:43,  6.02s/it]

Epoch - 4 Train-Loss : 0.9018883723765612
Epoch - 4 Valid-Loss : 1.147267423570156 Valid-Accuracy : 0.6


  7%|▋         | 4/60 [00:24<05:25,  5.82s/it]

Epoch - 5 Train-Loss : 0.5722439526813105


  8%|▊         | 5/60 [00:27<04:38,  5.06s/it]

Epoch - 5 Valid-Loss : 1.154698547720909 Valid-Accuracy : 0.6
Epoch - 6 Train-Loss : 0.46196677738334985


 10%|█         | 6/60 [00:31<04:07,  4.58s/it]

Epoch - 6 Valid-Loss : 1.264000671519898 Valid-Accuracy : 0.5875
Epoch - 7 Train-Loss : 0.3019594577664975
Epoch - 7 Valid-Loss : 1.4031501114368439 Valid-Accuracy : 0.6375


 12%|█▏        | 7/60 [00:37<04:17,  4.86s/it]

Epoch - 8 Train-Loss : 0.2602976614696672
Epoch - 8 Valid-Loss : 1.1542544439435005 Valid-Accuracy : 0.65


 13%|█▎        | 8/60 [00:42<04:24,  5.08s/it]

Epoch - 9 Train-Loss : 0.21485752170920022


 15%|█▌        | 9/60 [00:46<03:57,  4.66s/it]

Epoch - 9 Valid-Loss : 1.333081977441907 Valid-Accuracy : 0.6375
Changed learning rate to 2e-05
Epoch - 10 Train-Loss : 0.1639610338927014


 17%|█▋        | 10/60 [00:50<03:37,  4.35s/it]

Epoch - 10 Valid-Loss : 1.2831499833613633 Valid-Accuracy : 0.65
Epoch - 11 Train-Loss : 0.1490105654404033
Epoch - 11 Valid-Loss : 1.2381132060196252 Valid-Accuracy : 0.6875


 18%|█▊        | 11/60 [00:55<03:54,  4.80s/it]

Epoch - 12 Train-Loss : 0.12449498089481495


 20%|██        | 12/60 [00:59<03:34,  4.47s/it]

Epoch - 12 Valid-Loss : 1.1894936598371715 Valid-Accuracy : 0.6875
Epoch - 13 Train-Loss : 0.11440444197614852


 22%|██▏       | 13/60 [01:03<03:18,  4.23s/it]

Epoch - 13 Valid-Loss : 1.171246603364125 Valid-Accuracy : 0.6875
Epoch - 14 Train-Loss : 0.10734828773929621


 23%|██▎       | 14/60 [01:07<03:11,  4.17s/it]

Epoch - 14 Valid-Loss : 1.1565642174333335 Valid-Accuracy : 0.6875
Epoch - 15 Train-Loss : 0.07201327900766046


 25%|██▌       | 15/60 [01:11<03:06,  4.14s/it]

Epoch - 15 Valid-Loss : 1.16955705601722 Valid-Accuracy : 0.675
Epoch - 16 Train-Loss : 0.1280556398012777


 27%|██▋       | 16/60 [01:15<02:56,  4.01s/it]

Epoch - 16 Valid-Loss : 1.2169260379858315 Valid-Accuracy : 0.675
Epoch - 17 Train-Loss : 0.07970634805533336


 28%|██▊       | 17/60 [01:18<02:48,  3.91s/it]

Epoch - 17 Valid-Loss : 1.2127644706051797 Valid-Accuracy : 0.675
Epoch - 18 Train-Loss : 0.08205141384351008
Epoch - 18 Valid-Loss : 1.1298810824751855 Valid-Accuracy : 0.7125


 30%|███       | 18/60 [01:24<03:08,  4.48s/it]

Epoch - 19 Train-Loss : 0.07324037505240995


 32%|███▏      | 19/60 [01:28<02:54,  4.27s/it]

Epoch - 19 Valid-Loss : 1.1843609446659684 Valid-Accuracy : 0.675
Changed learning rate to 2e-06
Epoch - 20 Train-Loss : 0.09234532285190653


 33%|███▎      | 20/60 [01:31<02:43,  4.09s/it]

Epoch - 20 Valid-Loss : 1.2278095559217035 Valid-Accuracy : 0.6875
Epoch - 21 Train-Loss : 0.0733264511654852


 35%|███▌      | 21/60 [01:35<02:35,  3.98s/it]

Epoch - 21 Valid-Loss : 1.2460506664123385 Valid-Accuracy : 0.675
Epoch - 22 Train-Loss : 0.08409591258969158


 37%|███▋      | 22/60 [01:39<02:28,  3.91s/it]

Epoch - 22 Valid-Loss : 1.2101245612313505 Valid-Accuracy : 0.675
Epoch - 23 Train-Loss : 0.07655538209510268


 38%|███▊      | 23/60 [01:43<02:22,  3.85s/it]

Epoch - 23 Valid-Loss : 1.1880948377773166 Valid-Accuracy : 0.675
Epoch - 24 Train-Loss : 0.10701181824551895


 40%|████      | 24/60 [01:46<02:17,  3.81s/it]

Epoch - 24 Valid-Loss : 1.195823784172535 Valid-Accuracy : 0.6875
Epoch - 25 Train-Loss : 0.07171912879348383


 42%|████▏     | 25/60 [01:50<02:12,  3.78s/it]

Epoch - 25 Valid-Loss : 1.20731782913208 Valid-Accuracy : 0.675
Epoch - 26 Train-Loss : 0.07569840559663135


 43%|████▎     | 26/60 [01:54<02:07,  3.76s/it]

Epoch - 26 Valid-Loss : 1.1933357201516628 Valid-Accuracy : 0.675
Epoch - 27 Train-Loss : 0.07775809309387113


 45%|████▌     | 27/60 [01:57<02:03,  3.74s/it]

Epoch - 27 Valid-Loss : 1.1546794302761554 Valid-Accuracy : 0.7
Epoch - 28 Train-Loss : 0.07116159031957067


 47%|████▋     | 28/60 [02:01<01:59,  3.74s/it]

Epoch - 28 Valid-Loss : 1.1634348352556116 Valid-Accuracy : 0.675
Epoch - 29 Train-Loss : 0.08588073437003914


 48%|████▊     | 29/60 [02:05<01:55,  3.73s/it]

Epoch - 29 Valid-Loss : 1.236514870414976 Valid-Accuracy : 0.675
Changed learning rate to 2.0000000000000002e-07
Epoch - 30 Train-Loss : 0.09132441873116477


 50%|█████     | 30/60 [02:09<01:51,  3.73s/it]

Epoch - 30 Valid-Loss : 1.2420859768986703 Valid-Accuracy : 0.675
Epoch - 31 Train-Loss : 0.0760097158870849


 52%|█████▏    | 31/60 [02:12<01:47,  3.72s/it]

Epoch - 31 Valid-Loss : 1.2311554508283735 Valid-Accuracy : 0.675
Epoch - 32 Train-Loss : 0.07393318749855098


 53%|█████▎    | 32/60 [02:16<01:44,  3.73s/it]

Epoch - 32 Valid-Loss : 1.2048763419268653 Valid-Accuracy : 0.6875
Epoch - 33 Train-Loss : 0.0834596709071775


 55%|█████▌    | 33/60 [02:20<01:40,  3.73s/it]

Epoch - 33 Valid-Loss : 1.1400886574294418 Valid-Accuracy : 0.6875
Epoch - 34 Train-Loss : 0.06205820193245017


 57%|█████▋    | 34/60 [02:24<01:36,  3.73s/it]

Epoch - 34 Valid-Loss : 1.2078559456393123 Valid-Accuracy : 0.675
Epoch - 35 Train-Loss : 0.06771458813382196


 58%|█████▊    | 35/60 [02:27<01:33,  3.73s/it]

Epoch - 35 Valid-Loss : 1.1909025102853774 Valid-Accuracy : 0.675
Epoch - 36 Train-Loss : 0.07594294528571481


 60%|██████    | 36/60 [02:31<01:29,  3.74s/it]

Epoch - 36 Valid-Loss : 1.2041524973697961 Valid-Accuracy : 0.6875
Epoch - 37 Train-Loss : 0.06097629718969984


 62%|██████▏   | 37/60 [02:35<01:25,  3.74s/it]

Epoch - 37 Valid-Loss : 1.1826015666127205 Valid-Accuracy : 0.675
Epoch - 38 Train-Loss : 0.10121080742392223


 63%|██████▎   | 38/60 [02:39<01:22,  3.74s/it]

Epoch - 38 Valid-Loss : 1.1669315446168185 Valid-Accuracy : 0.6875
Epoch - 39 Train-Loss : 0.06834579603055317


 65%|██████▌   | 39/60 [02:42<01:18,  3.74s/it]

Epoch - 39 Valid-Loss : 1.2073859483003617 Valid-Accuracy : 0.6875
Changed learning rate to 2e-08
Epoch - 40 Train-Loss : 0.06159742112868116


 67%|██████▋   | 40/60 [02:46<01:14,  3.74s/it]

Epoch - 40 Valid-Loss : 1.1503571750596167 Valid-Accuracy : 0.675
Epoch - 41 Train-Loss : 0.07529980827494001


 68%|██████▊   | 41/60 [02:50<01:11,  3.74s/it]

Epoch - 41 Valid-Loss : 1.1586137251462787 Valid-Accuracy : 0.6875
Epoch - 42 Train-Loss : 0.06082385961226464


 70%|███████   | 42/60 [02:53<01:07,  3.74s/it]

Epoch - 42 Valid-Loss : 1.1792177237570285 Valid-Accuracy : 0.6875
Epoch - 43 Train-Loss : 0.068966179445124


 72%|███████▏  | 43/60 [02:57<01:03,  3.74s/it]

Epoch - 43 Valid-Loss : 1.1976872310042381 Valid-Accuracy : 0.675
Epoch - 44 Train-Loss : 0.07039405986433848


 73%|███████▎  | 44/60 [03:01<00:59,  3.75s/it]

Epoch - 44 Valid-Loss : 1.139041418582201 Valid-Accuracy : 0.6875
Epoch - 45 Train-Loss : 0.06925518076168373


 75%|███████▌  | 45/60 [03:05<00:56,  3.74s/it]

Epoch - 45 Valid-Loss : 1.11426379554905 Valid-Accuracy : 0.7125
Epoch - 46 Train-Loss : 0.07038487032696139


 77%|███████▋  | 46/60 [03:08<00:52,  3.74s/it]

Epoch - 46 Valid-Loss : 1.133808888681233 Valid-Accuracy : 0.6875
Epoch - 47 Train-Loss : 0.07748952706497221


 78%|███████▊  | 47/60 [03:12<00:48,  3.75s/it]

Epoch - 47 Valid-Loss : 1.1659792106598617 Valid-Accuracy : 0.7
Epoch - 48 Train-Loss : 0.06846977426321246


 80%|████████  | 48/60 [03:16<00:44,  3.75s/it]

Epoch - 48 Valid-Loss : 1.167129964567721 Valid-Accuracy : 0.6875
Epoch - 49 Train-Loss : 0.09237117786251474


 82%|████████▏ | 49/60 [03:20<00:41,  3.75s/it]

Epoch - 49 Valid-Loss : 1.1553374844137578 Valid-Accuracy : 0.6875
Changed learning rate to 2e-09
Epoch - 50 Train-Loss : 0.1015486677849367


 83%|████████▎ | 50/60 [03:23<00:37,  3.75s/it]

Epoch - 50 Valid-Loss : 1.1566354209091514 Valid-Accuracy : 0.675
Epoch - 51 Train-Loss : 0.06454044757265365


 85%|████████▌ | 51/60 [03:27<00:33,  3.75s/it]

Epoch - 51 Valid-Loss : 1.1729874648153782 Valid-Accuracy : 0.6875
Epoch - 52 Train-Loss : 0.07122168903733836


 87%|████████▋ | 52/60 [03:31<00:29,  3.75s/it]

Epoch - 52 Valid-Loss : 1.1601107218069955 Valid-Accuracy : 0.6875
Epoch - 53 Train-Loss : 0.08068023947525944


 88%|████████▊ | 53/60 [03:35<00:26,  3.74s/it]

Epoch - 53 Valid-Loss : 1.1819515753537417 Valid-Accuracy : 0.675
Epoch - 54 Train-Loss : 0.07121343358230661


 90%|█████████ | 54/60 [03:38<00:22,  3.75s/it]

Epoch - 54 Valid-Loss : 1.1816440291702748 Valid-Accuracy : 0.675
Epoch - 55 Train-Loss : 0.0544324862333724


 92%|█████████▏| 55/60 [03:42<00:18,  3.75s/it]

Epoch - 55 Valid-Loss : 1.1682399291254115 Valid-Accuracy : 0.6875
Epoch - 56 Train-Loss : 0.07693879268863384


 93%|█████████▎| 56/60 [03:46<00:15,  3.75s/it]

Epoch - 56 Valid-Loss : 1.1955179994925857 Valid-Accuracy : 0.675
Epoch - 57 Train-Loss : 0.07676352553680772


 95%|█████████▌| 57/60 [03:50<00:11,  3.77s/it]

Epoch - 57 Valid-Loss : 1.2604366876534185 Valid-Accuracy : 0.675
Epoch - 58 Train-Loss : 0.0741199141364632


 97%|█████████▋| 58/60 [03:54<00:07,  3.76s/it]

Epoch - 58 Valid-Loss : 1.195100761204958 Valid-Accuracy : 0.6875
Epoch - 59 Train-Loss : 0.07569937620100972


 98%|█████████▊| 59/60 [03:57<00:03,  3.76s/it]

Epoch - 59 Valid-Loss : 1.1719783302920406 Valid-Accuracy : 0.6875
Changed learning rate to 2e-10
Epoch - 60 Train-Loss : 0.06036309548362624


100%|██████████| 60/60 [04:01<00:00,  4.03s/it]

Epoch - 60 Valid-Loss : 1.194910854101181 Valid-Accuracy : 0.675





In [None]:
test_path = "/content/drive/MyDrive/NTUCE_AI_worshop_0629/1-18757-A-4.wav"
test_data = spec_to_image(get_melspectrogram_db(test_path))


In [None]:
with open('indtocat.pkl','rb') as f:
  indtocat = pickle.load(f)

In [None]:
indtocat

{0: 'cat',
 1: 'cow',
 2: 'crow',
 3: 'dog',
 4: 'frog',
 5: 'hen',
 6: 'insects',
 7: 'pig',
 8: 'rooster',
 9: 'sheep'}

In [None]:
model_.eval()
spec_t = torch.tensor(test_data).to(device, dtype=torch.float32)
pr = model_.forward(spec_t.reshape(1,1,*spec_t.shape))
print(pr)

ind = pr.argmax(dim=1).cpu().detach().numpy().ravel()[0]
print(indtocat[ind])

tensor([[ -1.9977,  -7.4264,   2.2421,   3.6151,  14.4480,   1.8496, -10.2308,
           1.9156,   0.1999,  -6.0684]], device='cuda:0',
       grad_fn=<AddmmBackward0>)
frog


Utilize the prtrained torchvision model

In [None]:
from torchvision.models import resnet34

resnet_model = resnet34(pretrained=True)
resnet_model.fc = nn.Linear(512,10)
resnet_model.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
resnet_model = resnet_model.to(device)



In [None]:
learning_rate = 2e-4
optimizer = optim.Adam(resnet_model.parameters(), lr=learning_rate)
epochs = 60
loss_fn = nn.CrossEntropyLoss()
resnet_train_losses=[]
resnet_valid_losses=[]

model_res = train(resnet_model, loss_fn, train_loader, valid_loader, epochs, optimizer, resnet_train_losses, resnet_valid_losses, lr_decay)
torch.save(model_res, "/content/drive/MyDrive/NTUCE_AI_worshop_0629/demo_resnet.pth")

  0%|          | 0/60 [00:00<?, ?it/s]

Epoch - 1 Train-Loss : 1.5688763864338398
Epoch - 1 Valid-Loss : 0.8573809742927552 Valid-Accuracy : 0.7375


  2%|▏         | 1/60 [00:03<03:45,  3.83s/it]

Epoch - 2 Train-Loss : 0.7663656551390886
Epoch - 2 Valid-Loss : 0.526629748288542 Valid-Accuracy : 0.8375


  3%|▎         | 2/60 [00:07<03:37,  3.75s/it]

Epoch - 3 Train-Loss : 0.5045787865296006
Epoch - 3 Valid-Loss : 0.5174542079679668 Valid-Accuracy : 0.8625


  5%|▌         | 3/60 [00:10<03:24,  3.60s/it]

Epoch - 4 Train-Loss : 0.3635936779435724


  7%|▋         | 4/60 [00:14<03:09,  3.39s/it]

Epoch - 4 Valid-Loss : 0.5705091351876035 Valid-Accuracy : 0.8375
Epoch - 5 Train-Loss : 0.3626438620965928


  8%|▊         | 5/60 [00:17<03:00,  3.29s/it]

Epoch - 5 Valid-Loss : 0.4445588522357866 Valid-Accuracy : 0.8625
Epoch - 6 Train-Loss : 0.28120747609063984


 10%|█         | 6/60 [00:20<02:55,  3.25s/it]

Epoch - 6 Valid-Loss : 0.5264101419597864 Valid-Accuracy : 0.825
Epoch - 7 Train-Loss : 0.2875570555916056


 12%|█▏        | 7/60 [00:23<02:49,  3.19s/it]

Epoch - 7 Valid-Loss : 0.6077893340960145 Valid-Accuracy : 0.825
Epoch - 8 Train-Loss : 0.23769542850204745


 13%|█▎        | 8/60 [00:26<02:43,  3.15s/it]

Epoch - 8 Valid-Loss : 0.5865228373149876 Valid-Accuracy : 0.825
Epoch - 9 Train-Loss : 0.12514966373564676


 15%|█▌        | 9/60 [00:29<02:38,  3.11s/it]

Epoch - 9 Valid-Loss : 0.48310006824322044 Valid-Accuracy : 0.8375
Changed learning rate to 2e-05
Epoch - 10 Train-Loss : 0.1684544420102611
Epoch - 10 Valid-Loss : 0.37660761071601884 Valid-Accuracy : 0.875


 17%|█▋        | 10/60 [00:33<02:43,  3.27s/it]

Epoch - 11 Train-Loss : 0.10716986721381545


 18%|█▊        | 11/60 [00:36<02:37,  3.21s/it]

Epoch - 11 Valid-Loss : 0.37581212979275735 Valid-Accuracy : 0.85
Epoch - 12 Train-Loss : 0.10481870602234267


 20%|██        | 12/60 [00:39<02:31,  3.16s/it]

Epoch - 12 Valid-Loss : 0.3298530480940826 Valid-Accuracy : 0.8625
Epoch - 13 Train-Loss : 0.12843201236100868


 22%|██▏       | 13/60 [00:42<02:26,  3.12s/it]

Epoch - 13 Valid-Loss : 0.3641399937681854 Valid-Accuracy : 0.8375
Epoch - 14 Train-Loss : 0.11646812914987095
Epoch - 14 Valid-Loss : 0.3477646114741219 Valid-Accuracy : 0.9


 23%|██▎       | 14/60 [00:45<02:28,  3.23s/it]

Epoch - 15 Train-Loss : 0.09751414356287569


 25%|██▌       | 15/60 [00:48<02:22,  3.17s/it]

Epoch - 15 Valid-Loss : 0.3981098561664112 Valid-Accuracy : 0.85
Epoch - 16 Train-Loss : 0.10164756387239322


 27%|██▋       | 16/60 [00:51<02:17,  3.13s/it]

Epoch - 16 Valid-Loss : 0.368286284385249 Valid-Accuracy : 0.875
Epoch - 17 Train-Loss : 0.08883054063189774


 28%|██▊       | 17/60 [00:54<02:13,  3.10s/it]

Epoch - 17 Valid-Loss : 0.3440846582001541 Valid-Accuracy : 0.9
Epoch - 18 Train-Loss : 0.12639214964001438


 30%|███       | 18/60 [00:57<02:10,  3.10s/it]

Epoch - 18 Valid-Loss : 0.38414780944294763 Valid-Accuracy : 0.875
Epoch - 19 Train-Loss : 0.07188855044951196


 32%|███▏      | 19/60 [01:00<02:06,  3.09s/it]

Epoch - 19 Valid-Loss : 0.2606477632318274 Valid-Accuracy : 0.9
Changed learning rate to 2e-06
Epoch - 20 Train-Loss : 0.057724826934281735
Epoch - 20 Valid-Loss : 0.28650603409041653 Valid-Accuracy : 0.925


 33%|███▎      | 20/60 [01:04<02:05,  3.14s/it]

Epoch - 21 Train-Loss : 0.05613580776262097


 35%|███▌      | 21/60 [01:07<02:01,  3.11s/it]

Epoch - 21 Valid-Loss : 0.28029144610336515 Valid-Accuracy : 0.9125
Epoch - 22 Train-Loss : 0.07104191922408062


 37%|███▋      | 22/60 [01:10<01:57,  3.08s/it]

Epoch - 22 Valid-Loss : 0.26429343660711313 Valid-Accuracy : 0.925
Epoch - 23 Train-Loss : 0.06314043001038953


 38%|███▊      | 23/60 [01:13<01:54,  3.08s/it]

Epoch - 23 Valid-Loss : 0.25129013203841166 Valid-Accuracy : 0.925
Epoch - 24 Train-Loss : 0.0544101741805207


 40%|████      | 24/60 [01:16<01:51,  3.09s/it]

Epoch - 24 Valid-Loss : 0.2813809419050813 Valid-Accuracy : 0.8875
Epoch - 25 Train-Loss : 0.06625151406333316


 42%|████▏     | 25/60 [01:19<01:47,  3.07s/it]

Epoch - 25 Valid-Loss : 0.2760508062900044 Valid-Accuracy : 0.9125
Epoch - 26 Train-Loss : 0.0582008753583068


 43%|████▎     | 26/60 [01:22<01:43,  3.06s/it]

Epoch - 26 Valid-Loss : 0.30684875932201977 Valid-Accuracy : 0.8875
Epoch - 27 Train-Loss : 0.06068164095049724


 45%|████▌     | 27/60 [01:25<01:40,  3.05s/it]

Epoch - 27 Valid-Loss : 0.2703261337359436 Valid-Accuracy : 0.925
Epoch - 28 Train-Loss : 0.06602762141847052


 47%|████▋     | 28/60 [01:28<01:37,  3.05s/it]

Epoch - 28 Valid-Loss : 0.23782231346995103 Valid-Accuracy : 0.925
Epoch - 29 Train-Loss : 0.06734812322829384


 48%|████▊     | 29/60 [01:31<01:34,  3.04s/it]

Epoch - 29 Valid-Loss : 0.3060972036793828 Valid-Accuracy : 0.925
Changed learning rate to 2.0000000000000002e-07
Epoch - 30 Train-Loss : 0.08714995815535076


 50%|█████     | 30/60 [01:34<01:31,  3.04s/it]

Epoch - 30 Valid-Loss : 0.2955343852227088 Valid-Accuracy : 0.9125
Epoch - 31 Train-Loss : 0.06466388176195323


 52%|█████▏    | 31/60 [01:37<01:28,  3.04s/it]

Epoch - 31 Valid-Loss : 0.34177953779581005 Valid-Accuracy : 0.9
Epoch - 32 Train-Loss : 0.06569915185973514


 53%|█████▎    | 32/60 [01:40<01:25,  3.04s/it]

Epoch - 32 Valid-Loss : 0.29442135386634616 Valid-Accuracy : 0.9125
Epoch - 33 Train-Loss : 0.05226858004461974


 55%|█████▌    | 33/60 [01:43<01:22,  3.04s/it]

Epoch - 33 Valid-Loss : 0.2937493132980308 Valid-Accuracy : 0.9125
Epoch - 34 Train-Loss : 0.08805586268135812


 57%|█████▋    | 34/60 [01:46<01:18,  3.04s/it]

Epoch - 34 Valid-Loss : 0.29980594721855597 Valid-Accuracy : 0.9125
Epoch - 35 Train-Loss : 0.05074376208067406


 58%|█████▊    | 35/60 [01:49<01:15,  3.03s/it]

Epoch - 35 Valid-Loss : 0.3047643273603171 Valid-Accuracy : 0.925
Epoch - 36 Train-Loss : 0.08643085185030941


 60%|██████    | 36/60 [01:52<01:12,  3.04s/it]

Epoch - 36 Valid-Loss : 0.2311479528871132 Valid-Accuracy : 0.925
Epoch - 37 Train-Loss : 0.0488860298471991


 62%|██████▏   | 37/60 [01:56<01:15,  3.29s/it]

Epoch - 37 Valid-Loss : 0.3299933301168494 Valid-Accuracy : 0.9
Epoch - 38 Train-Loss : 0.041124470697832295


 63%|██████▎   | 38/60 [01:59<01:10,  3.21s/it]

Epoch - 38 Valid-Loss : 0.31122961940709504 Valid-Accuracy : 0.9125
Epoch - 39 Train-Loss : 0.06918281805992592


 65%|██████▌   | 39/60 [02:02<01:06,  3.16s/it]

Epoch - 39 Valid-Loss : 0.32246233185287565 Valid-Accuracy : 0.9
Changed learning rate to 2e-08
Epoch - 40 Train-Loss : 0.05626256435643882


 67%|██████▋   | 40/60 [02:05<01:02,  3.12s/it]

Epoch - 40 Valid-Loss : 0.2796557418012526 Valid-Accuracy : 0.9125
Epoch - 41 Train-Loss : 0.0683880128141027


 68%|██████▊   | 41/60 [02:08<00:58,  3.10s/it]

Epoch - 41 Valid-Loss : 0.30810016817995345 Valid-Accuracy : 0.9125
Epoch - 42 Train-Loss : 0.07899005315557588


 70%|███████   | 42/60 [02:11<00:55,  3.08s/it]

Epoch - 42 Valid-Loss : 0.2871694725501584 Valid-Accuracy : 0.9125
Epoch - 43 Train-Loss : 0.05274375731532928


 72%|███████▏  | 43/60 [02:14<00:52,  3.06s/it]

Epoch - 43 Valid-Loss : 0.3177440739236772 Valid-Accuracy : 0.9125
Epoch - 44 Train-Loss : 0.06982887983613181


 73%|███████▎  | 44/60 [02:18<00:48,  3.05s/it]

Epoch - 44 Valid-Loss : 0.35552229328313845 Valid-Accuracy : 0.9
Epoch - 45 Train-Loss : 0.0492479201144306


 75%|███████▌  | 45/60 [02:21<00:45,  3.06s/it]

Epoch - 45 Valid-Loss : 0.3076129746623337 Valid-Accuracy : 0.8875
Epoch - 46 Train-Loss : 0.09234648623969406


 77%|███████▋  | 46/60 [02:24<00:42,  3.05s/it]

Epoch - 46 Valid-Loss : 0.31964953341521324 Valid-Accuracy : 0.9
Epoch - 47 Train-Loss : 0.09436033187957946


 78%|███████▊  | 47/60 [02:27<00:39,  3.05s/it]

Epoch - 47 Valid-Loss : 0.2793412616592832 Valid-Accuracy : 0.925
Epoch - 48 Train-Loss : 0.07788603347726167


 80%|████████  | 48/60 [02:30<00:36,  3.04s/it]

Epoch - 48 Valid-Loss : 0.27144864898873494 Valid-Accuracy : 0.925
Epoch - 49 Train-Loss : 0.06341691737179644


 82%|████████▏ | 49/60 [02:33<00:33,  3.04s/it]

Epoch - 49 Valid-Loss : 0.29616908571551903 Valid-Accuracy : 0.9
Changed learning rate to 2e-09
Epoch - 50 Train-Loss : 0.06274819987593219


 83%|████████▎ | 50/60 [02:36<00:30,  3.05s/it]

Epoch - 50 Valid-Loss : 0.2709925149800256 Valid-Accuracy : 0.9
Epoch - 51 Train-Loss : 0.04374350992438849


 85%|████████▌ | 51/60 [02:39<00:27,  3.05s/it]

Epoch - 51 Valid-Loss : 0.30485767592908813 Valid-Accuracy : 0.9125
Epoch - 52 Train-Loss : 0.08009618000069167


 87%|████████▋ | 52/60 [02:42<00:24,  3.04s/it]

Epoch - 52 Valid-Loss : 0.31830754820257423 Valid-Accuracy : 0.9125
Epoch - 53 Train-Loss : 0.05995595224958379


 88%|████████▊ | 53/60 [02:45<00:21,  3.09s/it]

Epoch - 53 Valid-Loss : 0.30217412624042483 Valid-Accuracy : 0.9125
Epoch - 54 Train-Loss : 0.04980100480897818


 90%|█████████ | 54/60 [02:48<00:18,  3.09s/it]

Epoch - 54 Valid-Loss : 0.3324837021064013 Valid-Accuracy : 0.9125
Epoch - 55 Train-Loss : 0.057382458818028684


 92%|█████████▏| 55/60 [02:51<00:15,  3.07s/it]

Epoch - 55 Valid-Loss : 0.27941938327858223 Valid-Accuracy : 0.9125
Epoch - 56 Train-Loss : 0.08015390854561702


 93%|█████████▎| 56/60 [02:54<00:12,  3.06s/it]

Epoch - 56 Valid-Loss : 0.29140656447270885 Valid-Accuracy : 0.9
Epoch - 57 Train-Loss : 0.05225558344973251


 95%|█████████▌| 57/60 [02:57<00:09,  3.06s/it]

Epoch - 57 Valid-Loss : 0.34258092413656416 Valid-Accuracy : 0.9125
Epoch - 58 Train-Loss : 0.09169285573298111


 97%|█████████▋| 58/60 [03:00<00:06,  3.06s/it]

Epoch - 58 Valid-Loss : 0.304550259798998 Valid-Accuracy : 0.9125
Epoch - 59 Train-Loss : 0.10026359476323705


 98%|█████████▊| 59/60 [03:03<00:03,  3.05s/it]

Epoch - 59 Valid-Loss : 0.2731340953207109 Valid-Accuracy : 0.9
Changed learning rate to 2e-10
Epoch - 60 Train-Loss : 0.09968608622439205


100%|██████████| 60/60 [03:06<00:00,  3.12s/it]

Epoch - 60 Valid-Loss : 0.27116550093051045 Valid-Accuracy : 0.925





In [None]:
model_res.eval()
pr = model_res.forward(spec_t.reshape(1,1,*spec_t.shape))
print(pr)

ind = pr.argmax(dim=1).cpu().detach().numpy().ravel()[0]
print(indtocat[ind])

tensor([[-1.9101, -2.7312, -0.9272, -1.6028, 10.2817,  0.0989, -2.5776, -1.2754,
         -3.8266, -2.2465]], device='cuda:0', grad_fn=<AddmmBackward0>)
frog


Check paramter number for both models

In [None]:
# for the custom model
total_params = sum(
	param.numel() for param in model_.parameters()
)
trainable_params = sum(
	p.numel() for p in model_.parameters() if p.requires_grad
)
print("-----Our model------")
print("Total parameter number: ", total_params)
print("Trainable parameter number: ", trainable_params)

# for the resnet model
total_params = sum(
	param.numel() for param in model_res.parameters()
)
trainable_params = sum(
	p.numel() for p in model_res.parameters() if p.requires_grad
)

print("-----Resnet model------")
print("Total parameter number: ", total_params)
print("Trainable parameter number: ", trainable_params)



-----Our model------
Total parameter number:  109723110
Trainable parameter number:  109723110
-----Resnet model------
Total parameter number:  21283530
Trainable parameter number:  21283530
