<a href="https://colab.research.google.com/github/Ting-Wei-Chang626/Primary_AI/blob/main/demo_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Animal sound classification**

Load neccessary module

In [None]:
import pandas as pd
import librosa
import numpy as np
import matplotlib.pyplot as plt
import librosa.display
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import os

Transform audio to images

In [None]:
def get_melspectrogram_db(file_path, sr=None, n_fft=2048, hop_length=512, n_mels=128, fmin=20, fmax=8300, top_db=80):
    wav,sr = librosa.load(file_path,sr=sr)
    if wav.shape[0]<5*sr:
        wav=np.pad(wav,int(np.ceil((5*sr-wav.shape[0])/2)),mode='reflect')
    else:
        wav=wav[:5*sr]
    spec=librosa.feature.melspectrogram(y=wav, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels,fmin=fmin,fmax=fmax)
    spec_db=librosa.power_to_db(spec,top_db=top_db)

    return spec_db


def spec_to_image(spec, eps=1e-6):
    mean = spec.mean()
    std = spec.std()
    spec_norm = (spec - mean) / (std + eps)
    spec_min, spec_max = spec_norm.min(), spec_norm.max()
    spec_scaled = 255 * (spec_norm - spec_min) / (spec_max - spec_min)
    spec_scaled = spec_scaled.astype(np.uint8)

    return spec_scaled

mount google drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Create ESC50 data format

In [None]:
class ESC50Data(Dataset):
    def __init__(self, base, df, in_col, out_col):
        self.df = df
        self.data = []
        self.labels = []
        self.c2i={}
        self.i2c={}
        self.categories = sorted(df[out_col].unique())
        for i, category in enumerate(self.categories):
            self.c2i[category]=i
            self.i2c[i]=category

        for ind in tqdm(range(len(df))):
            row = df.iloc[ind]
            file_path = os.path.join(base,row[in_col])
            self.data.append(spec_to_image(get_melspectrogram_db(file_path))[np.newaxis,...])
            self.labels.append(self.c2i[row['category']])

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

split dataset into training and validation

In [None]:
df = pd.read_csv('/content/drive/MyDrive/NTUCE_AI_worshop_0629/ESC-50/meta/esc50_animal.csv')
df.head()

train = df[df['fold']!=5]
valid = df[df['fold']==5]

train_data = ESC50Data('/content/drive/MyDrive/NTUCE_AI_worshop_0629/ESC-50/audio', train, 'filename', 'category')
valid_data = ESC50Data('/content/drive/MyDrive/NTUCE_AI_worshop_0629/ESC-50/audio', valid, 'filename', 'category')
train_loader = DataLoader(train_data, batch_size=4, shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=4, shuffle=True)


100%|██████████| 320/320 [00:19<00:00, 16.84it/s]
100%|██████████| 80/80 [00:02<00:00, 34.80it/s]


In [None]:
import pickle
with open('indtocat.pkl','wb') as f:
  pickle.dump(train_data.i2c, f)

Create the Convolutional Neural Network

In [None]:
import torch.nn as nn
import torch.nn.functional as F

def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)

class ESC50Model(nn.Module):
    def __init__(self, input_shape, batch_size=16, num_cats=10):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size = 3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 32, kernel_size = 3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(32)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(64)
        self.conv4 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm2d(64)
        self.conv5 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.bn5 = nn.BatchNorm2d(128)
        self.conv6 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
        self.bn6 = nn.BatchNorm2d(128)
        self.conv7 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.bn7 = nn.BatchNorm2d(256)
        self.conv8 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
        self.bn8 = nn.BatchNorm2d(256)
        self.dense1 = nn.Linear(256*(((input_shape[1]//2)//2)//2)*(((input_shape[2]//2)//2)//2),500)
        self.dropout = nn.Dropout(0.5)
        self.dense2 = nn.Linear(500, num_cats)
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(self.bn1(x))
        x = self.conv2(x)
        x = F.relu(self.bn2(x))
        x = F.max_pool2d(x, kernel_size=2)
        x = self.conv3(x)
        x = F.relu(self.bn3(x))
        x = self.conv4(x)
        x = F.relu(self.bn4(x))
        x = F.max_pool2d(x, kernel_size=2)
        x = self.conv5(x)
        x = F.relu(self.bn5(x))
        x = self.conv6(x)
        x = F.relu(self.bn6(x))
        x = F.max_pool2d(x, kernel_size=2)
        x = self.conv7(x)
        x = F.relu(self.bn7(x))
        x = self.conv8(x)
        x = F.relu(self.bn8(x))
        x = x.view(x.size(0),-1)
        x = F.relu(self.dense1(x))
        x = self.dropout(x)
        x = self.dense2(x)
        return x

In [None]:
if torch.cuda.is_available():
    device=torch.device('cuda:0')
else:
    device=torch.device('cpu')

model = ESC50Model(input_shape=(1,128,431), num_cats=10).to(device)
# model.apply(weights_init)


In [None]:
learning_rate = 2e-4
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
epochs = 60
loss_fn = nn.CrossEntropyLoss()
train_losses=[]
valid_losses=[]

Declare training process

In [None]:
def setlr(optimizer, lr):
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr
    return optimizer


def lr_decay(optimizer, epoch):
    if epoch%10==0:
        new_lr = learning_rate / (10**(epoch//10))
        optimizer = setlr(optimizer, new_lr)
        print(f'Changed learning rate to {new_lr}')
    return optimizer

def train(model, loss_fn, train_loader, valid_loader, epochs, optimizer, train_losses, valid_losses, change_lr=None):
    accuracy_best = 0
    for epoch in tqdm(range(1,epochs+1)):
        model.train()
        batch_losses=[]

        if change_lr:
            optimizer = change_lr(optimizer, epoch)

        for i, data in enumerate(train_loader):
            x, y = data
            # optimizer.zero_grad()
            x = x.to(device, dtype=torch.float32)
            y = y.to(device, dtype=torch.long)
            y_hat = model(x)
            loss = loss_fn(y_hat, y)
            loss.backward()
            batch_losses.append(loss.item())

            if i%5==0:
            # if True:
                optimizer.step()        # update parameters of net
                optimizer.zero_grad()

        train_losses.append(batch_losses)
        print(f'Epoch - {epoch} Train-Loss : {np.mean(train_losses[-1])}')
        model.eval()
        batch_losses=[]
        trace_y = []
        trace_yhat = []

        # Validation
        for i, data in enumerate(valid_loader):
            x, y = data
            x = x.to(device, dtype=torch.float32)
            y = y.to(device, dtype=torch.long)
            y_hat = model(x)
            loss = loss_fn(y_hat, y)
            trace_y.append(y.cpu().detach().numpy())
            trace_yhat.append(y_hat.cpu().detach().numpy())
            batch_losses.append(loss.item())

        valid_losses.append(batch_losses)
        trace_y = np.concatenate(trace_y)
        trace_yhat = np.concatenate(trace_yhat)
        accuracy = np.mean(trace_yhat.argmax(axis=1)==trace_y)
        print(f'Epoch - {epoch} Valid-Loss : {np.mean(valid_losses[-1])} Valid-Accuracy : {accuracy}')
        if accuracy > accuracy_best:
            accuracy_best = accuracy
            torch.save(model, "/content/drive/MyDrive/NTUCE_AI_worshop_0629/demo_best.pth")
    return model

Start training

In [None]:
model_ = train(model, loss_fn, train_loader, valid_loader, epochs, optimizer, train_losses, valid_losses, lr_decay)

torch.save(model_, "/content/drive/MyDrive/NTUCE_AI_worshop_0629/demo_last.pth")

  0%|          | 0/60 [00:00<?, ?it/s]

Epoch - 1 Train-Loss : 8.913888461887836
Epoch - 1 Valid-Loss : 2.925560799241066 Valid-Accuracy : 0.2875


  2%|▏         | 1/60 [00:07<07:32,  7.67s/it]

Epoch - 2 Train-Loss : 1.8189916864037514
Epoch - 2 Valid-Loss : 1.405715494044125 Valid-Accuracy : 0.525


  3%|▎         | 2/60 [00:13<06:09,  6.37s/it]

Epoch - 3 Train-Loss : 1.1731634847819805
Epoch - 3 Valid-Loss : 1.4005602411925793 Valid-Accuracy : 0.5625


  5%|▌         | 3/60 [00:18<05:42,  6.01s/it]

Epoch - 4 Train-Loss : 0.8448737461119891


  7%|▋         | 4/60 [00:22<04:46,  5.11s/it]

Epoch - 4 Valid-Loss : 1.260612753778696 Valid-Accuracy : 0.55
Epoch - 5 Train-Loss : 0.617923581181094
Epoch - 5 Valid-Loss : 1.2438402209430932 Valid-Accuracy : 0.5875


  8%|▊         | 5/60 [00:28<04:57,  5.41s/it]

Epoch - 6 Train-Loss : 0.47493798926007


 10%|█         | 6/60 [00:32<04:22,  4.86s/it]

Epoch - 6 Valid-Loss : 1.2378309920430184 Valid-Accuracy : 0.55
Epoch - 7 Train-Loss : 0.39762091404991223


 12%|█▏        | 7/60 [00:35<03:58,  4.50s/it]

Epoch - 7 Valid-Loss : 1.395635821740143 Valid-Accuracy : 0.575
Epoch - 8 Train-Loss : 0.3857440281077288
Epoch - 8 Valid-Loss : 1.1713423946872354 Valid-Accuracy : 0.625


 13%|█▎        | 8/60 [00:41<04:14,  4.89s/it]

Epoch - 9 Train-Loss : 0.3253842734498903


 15%|█▌        | 9/60 [00:45<03:52,  4.55s/it]

Epoch - 9 Valid-Loss : 1.4408689633011817 Valid-Accuracy : 0.6
Changed learning rate to 2e-05
Epoch - 10 Train-Loss : 0.24134346750797703
Epoch - 10 Valid-Loss : 1.3471949830651284 Valid-Accuracy : 0.6625


 17%|█▋        | 10/60 [00:51<04:02,  4.86s/it]

Epoch - 11 Train-Loss : 0.16414440690132323


 18%|█▊        | 11/60 [00:54<03:41,  4.53s/it]

Epoch - 11 Valid-Loss : 1.4142618850804864 Valid-Accuracy : 0.625
Epoch - 12 Train-Loss : 0.17071544993232238


 20%|██        | 12/60 [00:58<03:26,  4.31s/it]

Epoch - 12 Valid-Loss : 1.3343504277989269 Valid-Accuracy : 0.65
Epoch - 13 Train-Loss : 0.11716849663862376


 22%|██▏       | 13/60 [01:02<03:15,  4.15s/it]

Epoch - 13 Valid-Loss : 1.3801248325034976 Valid-Accuracy : 0.6375
Epoch - 14 Train-Loss : 0.1605357376180109


 23%|██▎       | 14/60 [01:06<03:05,  4.03s/it]

Epoch - 14 Valid-Loss : 1.2862490174826235 Valid-Accuracy : 0.6375
Epoch - 15 Train-Loss : 0.14186649474722798


 25%|██▌       | 15/60 [01:09<02:58,  3.96s/it]

Epoch - 15 Valid-Loss : 1.2697971256216989 Valid-Accuracy : 0.65
Epoch - 16 Train-Loss : 0.13439152036153246


 27%|██▋       | 16/60 [01:14<02:58,  4.05s/it]

Epoch - 16 Valid-Loss : 1.2704366724938154 Valid-Accuracy : 0.6375
Epoch - 17 Train-Loss : 0.11713009183604299


 28%|██▊       | 17/60 [01:17<02:50,  3.96s/it]

Epoch - 17 Valid-Loss : 1.2630925248377025 Valid-Accuracy : 0.6375
Epoch - 18 Train-Loss : 0.10188874868617859
Epoch - 18 Valid-Loss : 1.1844169716117903 Valid-Accuracy : 0.6875


 30%|███       | 18/60 [01:23<03:10,  4.54s/it]

Epoch - 19 Train-Loss : 0.0921581621747464


 32%|███▏      | 19/60 [01:27<02:56,  4.31s/it]

Epoch - 19 Valid-Loss : 1.2612009037286043 Valid-Accuracy : 0.6875
Changed learning rate to 2e-06
Epoch - 20 Train-Loss : 0.09735288989395485


 33%|███▎      | 20/60 [01:31<02:45,  4.13s/it]

Epoch - 20 Valid-Loss : 1.237943247752264 Valid-Accuracy : 0.6875
Epoch - 21 Train-Loss : 0.08392649361194345


 35%|███▌      | 21/60 [01:35<02:36,  4.02s/it]

Epoch - 21 Valid-Loss : 1.2651945915305987 Valid-Accuracy : 0.675
Epoch - 22 Train-Loss : 0.10746219030952489


 37%|███▋      | 22/60 [01:38<02:29,  3.94s/it]

Epoch - 22 Valid-Loss : 1.2802061961847357 Valid-Accuracy : 0.675
Epoch - 23 Train-Loss : 0.10126532939884783


 38%|███▊      | 23/60 [01:42<02:23,  3.87s/it]

Epoch - 23 Valid-Loss : 1.2492336057126523 Valid-Accuracy : 0.6625
Epoch - 24 Train-Loss : 0.10155344534305186


 40%|████      | 24/60 [01:46<02:17,  3.83s/it]

Epoch - 24 Valid-Loss : 1.2179969573393463 Valid-Accuracy : 0.6875
Epoch - 25 Train-Loss : 0.1128263147038524


 42%|████▏     | 25/60 [01:50<02:14,  3.85s/it]

Epoch - 25 Valid-Loss : 1.2417567896656692 Valid-Accuracy : 0.6875
Epoch - 26 Train-Loss : 0.0886030783130991


 43%|████▎     | 26/60 [01:54<02:14,  3.97s/it]

Epoch - 26 Valid-Loss : 1.2580516598420217 Valid-Accuracy : 0.6875
Epoch - 27 Train-Loss : 0.09506514252261695


 45%|████▌     | 27/60 [01:58<02:09,  3.92s/it]

Epoch - 27 Valid-Loss : 1.2236876418348401 Valid-Accuracy : 0.675
Epoch - 28 Train-Loss : 0.11755643726501148


 47%|████▋     | 28/60 [02:01<02:03,  3.87s/it]

Epoch - 28 Valid-Loss : 1.2675951136276127 Valid-Accuracy : 0.65
Epoch - 29 Train-Loss : 0.09702894371666844


 48%|████▊     | 29/60 [02:05<01:58,  3.83s/it]

Epoch - 29 Valid-Loss : 1.2436872269317973 Valid-Accuracy : 0.6875
Changed learning rate to 2.0000000000000002e-07
Epoch - 30 Train-Loss : 0.12937883814829548


 50%|█████     | 30/60 [02:09<01:54,  3.80s/it]

Epoch - 30 Valid-Loss : 1.220684336865088 Valid-Accuracy : 0.675
Epoch - 31 Train-Loss : 0.1019985035454738


 52%|█████▏    | 31/60 [02:13<01:49,  3.78s/it]

Epoch - 31 Valid-Loss : 1.2675746218301356 Valid-Accuracy : 0.675
Epoch - 32 Train-Loss : 0.08715954778999731


 53%|█████▎    | 32/60 [02:16<01:45,  3.77s/it]

Epoch - 32 Valid-Loss : 1.2429671249352396 Valid-Accuracy : 0.6875
Epoch - 33 Train-Loss : 0.0752155511861929
Epoch - 33 Valid-Loss : 1.2523170834407211 Valid-Accuracy : 0.7125


 55%|█████▌    | 33/60 [02:22<01:55,  4.29s/it]

Epoch - 34 Train-Loss : 0.11203140784512584


 57%|█████▋    | 34/60 [02:26<01:47,  4.13s/it]

Epoch - 34 Valid-Loss : 1.2497739258687943 Valid-Accuracy : 0.6625
Epoch - 35 Train-Loss : 0.08172593439285265


 58%|█████▊    | 35/60 [02:29<01:40,  4.01s/it]

Epoch - 35 Valid-Loss : 1.2564042442361825 Valid-Accuracy : 0.65
Epoch - 36 Train-Loss : 0.1122526874212781


 60%|██████    | 36/60 [02:33<01:34,  3.95s/it]

Epoch - 36 Valid-Loss : 1.2274443888105453 Valid-Accuracy : 0.6875
Epoch - 37 Train-Loss : 0.08579954199685744


 62%|██████▏   | 37/60 [02:37<01:29,  3.89s/it]

Epoch - 37 Valid-Loss : 1.2424913845956325 Valid-Accuracy : 0.675
Epoch - 38 Train-Loss : 0.105224884639847


 63%|██████▎   | 38/60 [02:41<01:24,  3.85s/it]

Epoch - 38 Valid-Loss : 1.2297383228316903 Valid-Accuracy : 0.6875
Epoch - 39 Train-Loss : 0.10165015010352363


 65%|██████▌   | 39/60 [02:44<01:20,  3.81s/it]

Epoch - 39 Valid-Loss : 1.2238353570573963 Valid-Accuracy : 0.675
Changed learning rate to 2e-08
Epoch - 40 Train-Loss : 0.10407396770369814


 67%|██████▋   | 40/60 [02:48<01:15,  3.80s/it]

Epoch - 40 Valid-Loss : 1.294704258418642 Valid-Accuracy : 0.6375
Epoch - 41 Train-Loss : 0.08591763498952787


 68%|██████▊   | 41/60 [02:52<01:11,  3.78s/it]

Epoch - 41 Valid-Loss : 1.2281554291024803 Valid-Accuracy : 0.7125
Epoch - 42 Train-Loss : 0.09820063206179838


 70%|███████   | 42/60 [02:56<01:07,  3.77s/it]

Epoch - 42 Valid-Loss : 1.2986797161400319 Valid-Accuracy : 0.65
Epoch - 43 Train-Loss : 0.08620582155053853


 72%|███████▏  | 43/60 [02:59<01:03,  3.76s/it]

Epoch - 43 Valid-Loss : 1.2897395312786102 Valid-Accuracy : 0.6625
Epoch - 44 Train-Loss : 0.11911601059655368


 73%|███████▎  | 44/60 [03:03<01:00,  3.76s/it]

Epoch - 44 Valid-Loss : 1.2446679971530101 Valid-Accuracy : 0.675
Epoch - 45 Train-Loss : 0.08115014284521749


 75%|███████▌  | 45/60 [03:07<00:56,  3.76s/it]

Epoch - 45 Valid-Loss : 1.2737580596702174 Valid-Accuracy : 0.7
Epoch - 46 Train-Loss : 0.09187787936898531


 77%|███████▋  | 46/60 [03:11<00:52,  3.78s/it]

Epoch - 46 Valid-Loss : 1.2856765780656132 Valid-Accuracy : 0.65
Epoch - 47 Train-Loss : 0.08119058845222753


 78%|███████▊  | 47/60 [03:15<00:49,  3.77s/it]

Epoch - 47 Valid-Loss : 1.2417496261536143 Valid-Accuracy : 0.675
Epoch - 48 Train-Loss : 0.0846800530613109


 80%|████████  | 48/60 [03:18<00:45,  3.77s/it]

Epoch - 48 Valid-Loss : 1.2545381878502666 Valid-Accuracy : 0.675
Epoch - 49 Train-Loss : 0.08351745307991223


 82%|████████▏ | 49/60 [03:22<00:41,  3.76s/it]

Epoch - 49 Valid-Loss : 1.232508867699653 Valid-Accuracy : 0.6625
Changed learning rate to 2e-09
Epoch - 50 Train-Loss : 0.10956481629546033


 83%|████████▎ | 50/60 [03:26<00:37,  3.75s/it]

Epoch - 50 Valid-Loss : 1.2445404664525994 Valid-Accuracy : 0.6875
Epoch - 51 Train-Loss : 0.08959176433490938


 85%|████████▌ | 51/60 [03:30<00:33,  3.75s/it]

Epoch - 51 Valid-Loss : 1.2702430821955204 Valid-Accuracy : 0.675
Epoch - 52 Train-Loss : 0.11229250455626243


 87%|████████▋ | 52/60 [03:33<00:29,  3.75s/it]

Epoch - 52 Valid-Loss : 1.246043173642829 Valid-Accuracy : 0.6875
Epoch - 53 Train-Loss : 0.08157351430054405


 88%|████████▊ | 53/60 [03:37<00:26,  3.75s/it]

Epoch - 53 Valid-Loss : 1.2618023604154587 Valid-Accuracy : 0.65
Epoch - 54 Train-Loss : 0.0956644775284076


 90%|█████████ | 54/60 [03:41<00:22,  3.75s/it]

Epoch - 54 Valid-Loss : 1.241628707142081 Valid-Accuracy : 0.675
Epoch - 55 Train-Loss : 0.10125426385457104


 92%|█████████▏| 55/60 [03:45<00:18,  3.75s/it]

Epoch - 55 Valid-Loss : 1.2346230797702447 Valid-Accuracy : 0.6875
Epoch - 56 Train-Loss : 0.08721341224227216


 93%|█████████▎| 56/60 [03:48<00:14,  3.75s/it]

Epoch - 56 Valid-Loss : 1.2583941501798108 Valid-Accuracy : 0.675
Epoch - 57 Train-Loss : 0.0801999108709424


 95%|█████████▌| 57/60 [03:52<00:11,  3.74s/it]

Epoch - 57 Valid-Loss : 1.295427887700498 Valid-Accuracy : 0.6625
Epoch - 58 Train-Loss : 0.13416284749910118


 97%|█████████▋| 58/60 [03:56<00:07,  3.74s/it]

Epoch - 58 Valid-Loss : 1.2285472210496664 Valid-Accuracy : 0.675
Epoch - 59 Train-Loss : 0.0779355357399254


 98%|█████████▊| 59/60 [03:59<00:03,  3.74s/it]

Epoch - 59 Valid-Loss : 1.2555963883176446 Valid-Accuracy : 0.6625
Changed learning rate to 2e-10
Epoch - 60 Train-Loss : 0.06893853294204746


100%|██████████| 60/60 [04:03<00:00,  4.06s/it]

Epoch - 60 Valid-Loss : 1.2546275392276585 Valid-Accuracy : 0.675





In [None]:
test_path = "/content/drive/MyDrive/NTUCE_AI_worshop_0629/1-18757-A-4.wav"
test_data = spec_to_image(get_melspectrogram_db(test_path))


In [None]:
with open('indtocat.pkl','rb') as f:
  indtocat = pickle.load(f)

In [None]:
indtocat

{0: 'cat',
 1: 'cow',
 2: 'crow',
 3: 'dog',
 4: 'frog',
 5: 'hen',
 6: 'insects',
 7: 'pig',
 8: 'rooster',
 9: 'sheep'}

In [None]:
model_.eval()
spec_t = torch.tensor(test_data).to(device, dtype=torch.float32)
pr = model_.forward(spec_t.reshape(1,1,*spec_t.shape))
print(pr)

ind = pr.argmax(dim=1).cpu().detach().numpy().ravel()[0]
print(indtocat[ind])

tensor([[ 1.5491, -4.9679,  2.8326,  3.6070, 14.2429,  0.8141, -4.5870,  5.9992,
          4.8902, -1.6729]], device='cuda:0', grad_fn=<AddmmBackward0>)
frog


Utilize the prtrained torchvision model

In [None]:
from torchvision.models import resnet34

resnet_model = resnet34(pretrained=True)
resnet_model.fc = nn.Linear(512,10)
resnet_model.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
resnet_model = resnet_model.to(device)

Downloading: "https://download.pytorch.org/models/resnet34-b627a593.pth" to /root/.cache/torch/hub/checkpoints/resnet34-b627a593.pth
100%|██████████| 83.3M/83.3M [00:00<00:00, 96.9MB/s]


In [None]:
learning_rate = 2e-4
optimizer = optim.Adam(resnet_model.parameters(), lr=learning_rate)
epochs = 60
loss_fn = nn.CrossEntropyLoss()
resnet_train_losses=[]
resnet_valid_losses=[]

model_res = train(resnet_model, loss_fn, train_loader, valid_loader, epochs, optimizer, resnet_train_losses, resnet_valid_losses, lr_decay)
torch.save(model_res, "/content/drive/MyDrive/NTUCE_AI_worshop_0629/demo_resnet.pth")

  0%|          | 0/60 [00:00<?, ?it/s]

Epoch - 1 Train-Loss : 1.722518005222082
Epoch - 1 Valid-Loss : 0.9410615712404251 Valid-Accuracy : 0.6625


  2%|▏         | 1/60 [00:03<03:19,  3.39s/it]

Epoch - 2 Train-Loss : 0.788768338225782
Epoch - 2 Valid-Loss : 0.6503754971548915 Valid-Accuracy : 0.75


  3%|▎         | 2/60 [00:06<03:15,  3.37s/it]

Epoch - 3 Train-Loss : 0.5163225864991545


  5%|▌         | 3/60 [00:10<03:12,  3.39s/it]

Epoch - 3 Valid-Loss : 0.6826194947585463 Valid-Accuracy : 0.7375
Epoch - 4 Train-Loss : 0.3794070062227547
Epoch - 4 Valid-Loss : 0.44378172839060426 Valid-Accuracy : 0.825


  7%|▋         | 4/60 [00:13<03:10,  3.40s/it]

Epoch - 5 Train-Loss : 0.2590615545166656


  8%|▊         | 5/60 [00:16<03:00,  3.28s/it]

Epoch - 5 Valid-Loss : 0.3920005956082605 Valid-Accuracy : 0.825
Epoch - 6 Train-Loss : 0.27294360344531016
Epoch - 6 Valid-Loss : 0.4021492123603821 Valid-Accuracy : 0.85


 10%|█         | 6/60 [00:19<02:57,  3.29s/it]

Epoch - 7 Train-Loss : 0.2261541804531589
Epoch - 7 Valid-Loss : 0.3619231500546448 Valid-Accuracy : 0.875


 12%|█▏        | 7/60 [00:23<02:57,  3.36s/it]

Epoch - 8 Train-Loss : 0.2592962355236523


 13%|█▎        | 8/60 [00:26<02:51,  3.29s/it]

Epoch - 8 Valid-Loss : 0.42318918416276574 Valid-Accuracy : 0.875
Epoch - 9 Train-Loss : 0.2688062356202863
Epoch - 9 Valid-Loss : 0.44938104022294284 Valid-Accuracy : 0.9


 15%|█▌        | 9/60 [00:29<02:48,  3.30s/it]

Changed learning rate to 2e-05
Epoch - 10 Train-Loss : 0.1430533092294354


 17%|█▋        | 10/60 [00:33<02:41,  3.23s/it]

Epoch - 10 Valid-Loss : 0.47132340259850025 Valid-Accuracy : 0.9
Epoch - 11 Train-Loss : 0.1524590419139713
Epoch - 11 Valid-Loss : 0.3597653173143044 Valid-Accuracy : 0.9125


 18%|█▊        | 11/60 [00:36<02:41,  3.29s/it]

Epoch - 12 Train-Loss : 0.07850270945054945


 20%|██        | 12/60 [00:39<02:34,  3.23s/it]

Epoch - 12 Valid-Loss : 0.5073887185601051 Valid-Accuracy : 0.8875
Epoch - 13 Train-Loss : 0.1357509621535428


 22%|██▏       | 13/60 [00:42<02:29,  3.17s/it]

Epoch - 13 Valid-Loss : 0.4403694957261905 Valid-Accuracy : 0.9
Epoch - 14 Train-Loss : 0.09209277614427265


 23%|██▎       | 14/60 [00:45<02:24,  3.14s/it]

Epoch - 14 Valid-Loss : 0.4356614749878645 Valid-Accuracy : 0.9125
Epoch - 15 Train-Loss : 0.1026498535531573


 25%|██▌       | 15/60 [00:48<02:20,  3.12s/it]

Epoch - 15 Valid-Loss : 0.405149401089875 Valid-Accuracy : 0.9
Epoch - 16 Train-Loss : 0.11481254351092503


 27%|██▋       | 16/60 [00:51<02:18,  3.14s/it]

Epoch - 16 Valid-Loss : 0.41779789350694047 Valid-Accuracy : 0.9125
Epoch - 17 Train-Loss : 0.0600513655343093


 28%|██▊       | 17/60 [00:54<02:13,  3.11s/it]

Epoch - 17 Valid-Loss : 0.3451330568408594 Valid-Accuracy : 0.9125
Epoch - 18 Train-Loss : 0.07624574157234747


 30%|███       | 18/60 [00:57<02:09,  3.09s/it]

Epoch - 18 Valid-Loss : 0.32887700599458186 Valid-Accuracy : 0.9
Epoch - 19 Train-Loss : 0.08490346874459646


 32%|███▏      | 19/60 [01:01<02:06,  3.07s/it]

Epoch - 19 Valid-Loss : 0.33559043573914094 Valid-Accuracy : 0.9125
Changed learning rate to 2e-06
Epoch - 20 Train-Loss : 0.053961768513545394


 33%|███▎      | 20/60 [01:04<02:02,  3.07s/it]

Epoch - 20 Valid-Loss : 0.28461153064854444 Valid-Accuracy : 0.9125
Epoch - 21 Train-Loss : 0.07277304080198518


 35%|███▌      | 21/60 [01:07<01:59,  3.06s/it]

Epoch - 21 Valid-Loss : 0.3327546211890876 Valid-Accuracy : 0.9
Epoch - 22 Train-Loss : 0.0784123781195376


 37%|███▋      | 22/60 [01:10<01:55,  3.05s/it]

Epoch - 22 Valid-Loss : 0.30774451846518786 Valid-Accuracy : 0.9125
Epoch - 23 Train-Loss : 0.08270317055867053


 38%|███▊      | 23/60 [01:13<01:52,  3.04s/it]

Epoch - 23 Valid-Loss : 0.31771729511528973 Valid-Accuracy : 0.8875
Epoch - 24 Train-Loss : 0.05186661182378884


 40%|████      | 24/60 [01:16<01:49,  3.04s/it]

Epoch - 24 Valid-Loss : 0.32917273706407285 Valid-Accuracy : 0.8875
Epoch - 25 Train-Loss : 0.09896168794948608


 42%|████▏     | 25/60 [01:19<01:46,  3.05s/it]

Epoch - 25 Valid-Loss : 0.32524594184942546 Valid-Accuracy : 0.9
Epoch - 26 Train-Loss : 0.08201315251353662


 43%|████▎     | 26/60 [01:22<01:43,  3.04s/it]

Epoch - 26 Valid-Loss : 0.3141063135350123 Valid-Accuracy : 0.9
Epoch - 27 Train-Loss : 0.06821502587699797


 45%|████▌     | 27/60 [01:25<01:40,  3.04s/it]

Epoch - 27 Valid-Loss : 0.33006327917100864 Valid-Accuracy : 0.9125
Epoch - 28 Train-Loss : 0.08890703464276158


 47%|████▋     | 28/60 [01:28<01:37,  3.04s/it]

Epoch - 28 Valid-Loss : 0.32315858504152856 Valid-Accuracy : 0.9125
Epoch - 29 Train-Loss : 0.0529734386887867


 48%|████▊     | 29/60 [01:31<01:34,  3.05s/it]

Epoch - 29 Valid-Loss : 0.27985365423373876 Valid-Accuracy : 0.9125
Changed learning rate to 2.0000000000000002e-07
Epoch - 30 Train-Loss : 0.05916313614870887


 50%|█████     | 30/60 [01:34<01:31,  3.05s/it]

Epoch - 30 Valid-Loss : 0.3153297195211053 Valid-Accuracy : 0.9125
Epoch - 31 Train-Loss : 0.050341729997307995


 52%|█████▏    | 31/60 [01:37<01:28,  3.05s/it]

Epoch - 31 Valid-Loss : 0.3058010423381347 Valid-Accuracy : 0.9125
Epoch - 32 Train-Loss : 0.07175994095450733


 53%|█████▎    | 32/60 [01:40<01:25,  3.04s/it]

Epoch - 32 Valid-Loss : 0.30776514944300287 Valid-Accuracy : 0.9125
Epoch - 33 Train-Loss : 0.07431886568956543


 55%|█████▌    | 33/60 [01:43<01:22,  3.05s/it]

Epoch - 33 Valid-Loss : 0.34609628370963036 Valid-Accuracy : 0.9125
Epoch - 34 Train-Loss : 0.11291970688616856


 57%|█████▋    | 34/60 [01:46<01:19,  3.05s/it]

Epoch - 34 Valid-Loss : 0.3039404526003636 Valid-Accuracy : 0.9125
Epoch - 35 Train-Loss : 0.0645854647824308


 58%|█████▊    | 35/60 [01:49<01:16,  3.05s/it]

Epoch - 35 Valid-Loss : 0.2826264690142125 Valid-Accuracy : 0.9
Epoch - 36 Train-Loss : 0.09746244681300595
Epoch - 36 Valid-Loss : 0.26110253180377185 Valid-Accuracy : 0.925


 60%|██████    | 36/60 [01:53<01:14,  3.12s/it]

Epoch - 37 Train-Loss : 0.09614194503810722


 62%|██████▏   | 37/60 [01:56<01:11,  3.10s/it]

Epoch - 37 Valid-Loss : 0.2705773341527674 Valid-Accuracy : 0.9125
Epoch - 38 Train-Loss : 0.04072977328032721


 63%|██████▎   | 38/60 [01:59<01:08,  3.09s/it]

Epoch - 38 Valid-Loss : 0.2966525930329226 Valid-Accuracy : 0.9125
Epoch - 39 Train-Loss : 0.10065074184676633


 65%|██████▌   | 39/60 [02:02<01:04,  3.08s/it]

Epoch - 39 Valid-Loss : 0.28835026590386403 Valid-Accuracy : 0.9
Changed learning rate to 2e-08
Epoch - 40 Train-Loss : 0.08898830090183765


 67%|██████▋   | 40/60 [02:05<01:01,  3.08s/it]

Epoch - 40 Valid-Loss : 0.3203112410148606 Valid-Accuracy : 0.9
Epoch - 41 Train-Loss : 0.12941490746452472


 68%|██████▊   | 41/60 [02:08<00:58,  3.07s/it]

Epoch - 41 Valid-Loss : 0.2918119803071022 Valid-Accuracy : 0.9125
Epoch - 42 Train-Loss : 0.0673064446717035


 70%|███████   | 42/60 [02:11<00:55,  3.07s/it]

Epoch - 42 Valid-Loss : 0.30066843174863606 Valid-Accuracy : 0.9
Epoch - 43 Train-Loss : 0.0373662521596998


 72%|███████▏  | 43/60 [02:14<00:52,  3.06s/it]

Epoch - 43 Valid-Loss : 0.2971767030787305 Valid-Accuracy : 0.9125
Epoch - 44 Train-Loss : 0.06643206563894637


 73%|███████▎  | 44/60 [02:17<00:48,  3.06s/it]

Epoch - 44 Valid-Loss : 0.28650984015548603 Valid-Accuracy : 0.9125
Epoch - 45 Train-Loss : 0.06529811677755788


 75%|███████▌  | 45/60 [02:20<00:45,  3.05s/it]

Epoch - 45 Valid-Loss : 0.3478465311927721 Valid-Accuracy : 0.9125
Epoch - 46 Train-Loss : 0.05304760469007306


 77%|███████▋  | 46/60 [02:23<00:42,  3.06s/it]

Epoch - 46 Valid-Loss : 0.27670041177188975 Valid-Accuracy : 0.9125
Epoch - 47 Train-Loss : 0.07640385163249448


 78%|███████▊  | 47/60 [02:26<00:39,  3.06s/it]

Epoch - 47 Valid-Loss : 0.2982303741155192 Valid-Accuracy : 0.9125
Epoch - 48 Train-Loss : 0.0497146779904142


 80%|████████  | 48/60 [02:29<00:36,  3.06s/it]

Epoch - 48 Valid-Loss : 0.3052887156722136 Valid-Accuracy : 0.9125
Epoch - 49 Train-Loss : 0.06614607619412709


 82%|████████▏ | 49/60 [02:32<00:33,  3.05s/it]

Epoch - 49 Valid-Loss : 0.3478330384590663 Valid-Accuracy : 0.9
Changed learning rate to 2e-09
Epoch - 50 Train-Loss : 0.11548753563256468


 83%|████████▎ | 50/60 [02:35<00:30,  3.05s/it]

Epoch - 50 Valid-Loss : 0.27282902158331124 Valid-Accuracy : 0.925
Epoch - 51 Train-Loss : 0.0683144820126472


 85%|████████▌ | 51/60 [02:38<00:27,  3.05s/it]

Epoch - 51 Valid-Loss : 0.27840488476213066 Valid-Accuracy : 0.925
Epoch - 52 Train-Loss : 0.04637540691473987


 87%|████████▋ | 52/60 [02:41<00:24,  3.05s/it]

Epoch - 52 Valid-Loss : 0.30498740714974704 Valid-Accuracy : 0.9
Epoch - 53 Train-Loss : 0.06113415951258503


 88%|████████▊ | 53/60 [02:44<00:21,  3.05s/it]

Epoch - 53 Valid-Loss : 0.3212277528014965 Valid-Accuracy : 0.9
Epoch - 54 Train-Loss : 0.04491859636618756


 90%|█████████ | 54/60 [02:47<00:18,  3.04s/it]

Epoch - 54 Valid-Loss : 0.3705269823083654 Valid-Accuracy : 0.9125
Epoch - 55 Train-Loss : 0.06845070022973232


 92%|█████████▏| 55/60 [02:51<00:15,  3.05s/it]

Epoch - 55 Valid-Loss : 0.27481006209709447 Valid-Accuracy : 0.9
Epoch - 56 Train-Loss : 0.05706266206689179


 93%|█████████▎| 56/60 [02:54<00:12,  3.05s/it]

Epoch - 56 Valid-Loss : 0.35583885335363447 Valid-Accuracy : 0.8875
Epoch - 57 Train-Loss : 0.07904190128610936


 95%|█████████▌| 57/60 [02:57<00:09,  3.05s/it]

Epoch - 57 Valid-Loss : 0.3119036343035987 Valid-Accuracy : 0.9125
Epoch - 58 Train-Loss : 0.03683344910095911


 97%|█████████▋| 58/60 [03:00<00:06,  3.04s/it]

Epoch - 58 Valid-Loss : 0.31239161122357473 Valid-Accuracy : 0.9
Epoch - 59 Train-Loss : 0.07441552465897985


 98%|█████████▊| 59/60 [03:03<00:03,  3.04s/it]

Epoch - 59 Valid-Loss : 0.35749408696137835 Valid-Accuracy : 0.8875
Changed learning rate to 2e-10
Epoch - 60 Train-Loss : 0.055019253664067944


100%|██████████| 60/60 [03:06<00:00,  3.10s/it]

Epoch - 60 Valid-Loss : 0.32450849846936763 Valid-Accuracy : 0.9





In [None]:
model_res.eval()
pr = model_res.forward(spec_t.reshape(1,1,*spec_t.shape))
print(pr)

ind = pr.argmax(dim=1).cpu().detach().numpy().ravel()[0]
print(indtocat[ind])

tensor([[-0.9925, -2.2728, -1.4880,  0.4746, 11.7686, -0.2666, -2.7350, -1.9204,
         -2.0594, -0.2178]], device='cuda:0', grad_fn=<AddmmBackward0>)
frog


Check paramter number for both models

In [None]:
# for the custom model
total_params = sum(
	param.numel() for param in model_.parameters()
)
trainable_params = sum(
	p.numel() for p in model_.parameters() if p.requires_grad
)
print("-----Our model------")
print("Total parameter number: ", total_params)
print("Trainable parameter number: ", trainable_params)

# for the resnet model
total_params = sum(
	param.numel() for param in model_res.parameters()
)
trainable_params = sum(
	p.numel() for p in model_res.parameters() if p.requires_grad
)

print("-----Resnet model------")
print("Total parameter number: ", total_params)
print("Trainable parameter number: ", trainable_params)



-----Our model------
Total parameter number:  109723110
Trainable parameter number:  109723110
-----Resnet model------
Total parameter number:  21283530
Trainable parameter number:  21283530
