In [15]:
!pip install librosa torch torchaudio torchsummary einops

Collecting torchsummary
  Downloading torchsummary-1.5.1-py3-none-any.whl.metadata (296 bytes)
Downloading torchsummary-1.5.1-py3-none-any.whl (2.8 kB)
Installing collected packages: torchsummary
Successfully installed torchsummary-1.5.1


In [1]:
import json
import os
import math
import librosa


import torch
import torch.nn as nn
from torch.utils.data import random_split
import torchaudio

from einops import rearrange, pack

## Data Preprocessing (Run only once)
The code below does the following:
1. Iterate through the dataset directory to capture the names of genres
2. Load the audio files from each genre's directory and split it into segments
3. Compute MFCCs for each segment
4. Save the final dataset in a JSON file

Code taken from [tutorial code.](https://github.com/musikalkemist/DeepLearningForAudioWithPython/blob/master/12-%20Music%20genre%20classification%3A%20Preparing%20the%20dataset/code/extract_data.py)

In [2]:
DATASET_PATH = "DATA/genres_original"
JSON_PATH = "data_10.json"
SAMPLE_RATE = 22050
TRACK_DURATION = 30 # measured in seconds
SAMPLES_PER_TRACK = SAMPLE_RATE * TRACK_DURATION

In [3]:
def save_mfcc_torch(dataset_path, num_mfcc=13, n_fft=2048, hop_length=512, num_segments=5):
    """Extracts MFCCs from music dataset and saves them into a json file along witgh genre labels.

        :param dataset_path (str): Path to dataset
        :param num_mfcc (int): Number of coefficients to extract
        :param n_fft (int): Interval we consider to apply FFT. Measured in # of samples
        :param hop_length (int): Sliding window for FFT. Measured in # of samples
        :param: num_segments (int): Number of segments we want to divide sample tracks into
        :return:
        """

    # dictionary to store mapping, labels, and MFCCs
    data = {
        "mapping": [],
        "labels": [],
        "mfcc": []
    }

    if os.path.exists('mfcc.pt') and os.path.exists('labels.pt') and os.path.exists('mapping.json'):
        data['mfcc'] = torch.load('mfcc.pt')
        data['labels'] = torch.load('labels.pt')
        with open('mapping.json', 'r') as f:
            data['mapping'] = json.load(f)
        return data['mfcc'], data['labels'], data['mapping']
    

    samples_per_segment = int(SAMPLES_PER_TRACK / num_segments)
    num_mfcc_vectors_per_segment = math.ceil(samples_per_segment / hop_length)
    errors = []
    # loop through all genre sub-folder
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):

        # ensure we're processing a genre sub-folder level
        if dirpath is not dataset_path:

            # save genre label (i.e., sub-folder name) in the mapping
            semantic_label = dirpath.split("/")[-1]
            data["mapping"].append(semantic_label)
            print("\nProcessing: {}".format(semantic_label))

            # process all audio files in genre sub-dir
            for f in filenames:

		        # load audio file
                file_path = os.path.join(dirpath, f)
                
                try:
                    
                    raw_signal, sample_rate = torchaudio.load(file_path) # signal dimensions: 1, time
                    
                    #offset = min(signals.shape[-1] - (signals.shape[-1] % num_segments), num_segments * samples_per_segment)
                    offset = (num_segments - 1) * samples_per_segment
                    first_segments = rearrange(             # signal dimensions: segment, samples_per_segment
                        raw_signal[0,:offset], 
                        "(segments samples_per_segment) -> segments samples_per_segment",
                        segments=num_segments - 1
                    )
                    
                    mfcc_transform = torchaudio.transforms.MFCC(
                        sample_rate=sample_rate,
                        n_mfcc=num_mfcc,
                        melkwargs={
                            'n_fft': n_fft,
                            'hop_length': hop_length
                        }
                    )

                    first_segments = mfcc_transform(first_segments)
                    last_segment = mfcc_transform(raw_signal[:,offset:]) # last_signal: 1, num_mfcc, num_hops

                    print(f'Processing:{f}, SR={sample_rate}, FirstSegLen={first_segments.shape}, LastSegLen={last_segment.shape}')

                    data['mfcc'].append(first_segments) # signal dimensions: file, segment, num_mfcc, num_hops
                    data['labels'].extend([i - 1] * (num_segments-1)) 
                    if last_segment.shape[-1] >= num_mfcc_vectors_per_segment:
                        data['mfcc'].append(last_segment[:,:,:num_mfcc_vectors_per_segment])
                        data['labels'].extend([i - 1])
                    
                except Exception as e:
                    errors.append({
                        'File':file_path,
                        'Error': e
                    })
                    continue
    
    data['mfcc'], ps = pack(data['mfcc'], "* num_mfcc num_hops") # merge all files into 1 tensor
    
    data['labels'] = torch.tensor(data['labels'])

    torch.save(data['mfcc'], 'mfcc.pt')
    torch.save(data['labels'], 'labels.pt')
    with open('mapping.json','w') as file:
        json.dump(data['mapping'], file)
    
    print('\n\n\n\n')
    print('-'* 30)
    print('Following errors occured')
    for e in errors:
        print(e)
    
    return data['mfcc'], data['labels'], data['mapping']

data_torch = save_mfcc_torch(DATASET_PATH, num_segments=10)

  data['mfcc'] = torch.load('mfcc.pt')
  data['labels'] = torch.load('labels.pt')


In [4]:
data_torch[0].shape, data_torch[1].shape, data_torch[2]

(torch.Size([9986, 13, 130]),
 torch.Size([9986]),
 ['genres_original\\blues',
  'genres_original\\classical',
  'genres_original\\country',
  'genres_original\\disco',
  'genres_original\\hiphop',
  'genres_original\\jazz',
  'genres_original\\metal',
  'genres_original\\pop',
  'genres_original\\reggae',
  'genres_original\\rock'])

### Loading data in numpy and lists (from reference code)

In [36]:
def save_mfcc(dataset_path, json_path, num_mfcc=13, n_fft=2048, hop_length=512, num_segments=5):
    """Extracts MFCCs from music dataset and saves them into a json file along witgh genre labels.

        :param dataset_path (str): Path to dataset
        :param json_path (str): Path to json file used to save MFCCs
        :param num_mfcc (int): Number of coefficients to extract
        :param n_fft (int): Interval we consider to apply FFT. Measured in # of samples
        :param hop_length (int): Sliding window for FFT. Measured in # of samples
        :param: num_segments (int): Number of segments we want to divide sample tracks into
        :return:
        """

    # dictionary to store mapping, labels, and MFCCs
    data = {
        "mapping": [],
        "labels": [],
        "mfcc": []
    }

    samples_per_segment = int(SAMPLES_PER_TRACK / num_segments)
    num_mfcc_vectors_per_segment = math.ceil(samples_per_segment / hop_length)
    errors = []
    # loop through all genre sub-folder
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):

        # ensure we're processing a genre sub-folder level
        if dirpath is not dataset_path:

            # save genre label (i.e., sub-folder name) in the mapping
            semantic_label = dirpath.split("/")[-1]
            data["mapping"].append(semantic_label)
            print("\nProcessing: {}".format(semantic_label))

            # process all audio files in genre sub-dir
            for f in filenames:

		        # load audio file
                file_path = os.path.join(dirpath, f)
                
                try:
                    signal, sample_rate = librosa.load(file_path, sr=SAMPLE_RATE)
                except Exception as e:
                    errors.append({
                        'File':file_path,
                        'Error': e
                    })
                    continue
                print(f'Processing:{file_path}, SR={sample_rate}, SigLen={len(signal)}')
                # process all segments of audio file
                for d in range(num_segments):

                    # calculate start and finish sample for current segment
                    start = samples_per_segment * d
                    finish = start + samples_per_segment

                    # extract mfcc
                    mfcc = librosa.feature.mfcc(y=signal[start:finish], sr=sample_rate, n_mfcc=num_mfcc, n_fft=n_fft, hop_length=hop_length)
                    mfcc = mfcc.T

                    # store only mfcc feature with expected number of vectors
                    if len(mfcc) == num_mfcc_vectors_per_segment:
                        data["mfcc"].append(mfcc.tolist())
                        data["labels"].append(i-1)
                        #print("{}, segment:{}".format(file_path, d+1))

    # save MFCCs to json file
    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)
    print('\n\n\n\n')
    print('-'* 30)
    print('Following errors occured')
    for e in errors:
        print(e)


save_mfcc(DATASET_PATH, JSON_PATH, num_segments=10)


Processing: genres_original\blues
Processing:DATA/genres_original\blues\blues.00000.wav, SR=22050, SigLen=661794
Processing:DATA/genres_original\blues\blues.00001.wav, SR=22050, SigLen=661794
Processing:DATA/genres_original\blues\blues.00002.wav, SR=22050, SigLen=661794
Processing:DATA/genres_original\blues\blues.00003.wav, SR=22050, SigLen=661794
Processing:DATA/genres_original\blues\blues.00004.wav, SR=22050, SigLen=661794
Processing:DATA/genres_original\blues\blues.00005.wav, SR=22050, SigLen=661794
Processing:DATA/genres_original\blues\blues.00006.wav, SR=22050, SigLen=661794
Processing:DATA/genres_original\blues\blues.00007.wav, SR=22050, SigLen=661794
Processing:DATA/genres_original\blues\blues.00008.wav, SR=22050, SigLen=661794
Processing:DATA/genres_original\blues\blues.00009.wav, SR=22050, SigLen=661794
Processing:DATA/genres_original\blues\blues.00010.wav, SR=22050, SigLen=661794
Processing:DATA/genres_original\blues\blues.00011.wav, SR=22050, SigLen=661794
Processing:DATA/g

  signal, sample_rate = librosa.load(file_path, sr=SAMPLE_RATE)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Processing:DATA/genres_original\jazz\jazz.00055.wav, SR=22050, SigLen=661794
Processing:DATA/genres_original\jazz\jazz.00056.wav, SR=22050, SigLen=665280
Processing:DATA/genres_original\jazz\jazz.00057.wav, SR=22050, SigLen=661794
Processing:DATA/genres_original\jazz\jazz.00058.wav, SR=22050, SigLen=661794
Processing:DATA/genres_original\jazz\jazz.00059.wav, SR=22050, SigLen=672100
Processing:DATA/genres_original\jazz\jazz.00060.wav, SR=22050, SigLen=661794
Processing:DATA/genres_original\jazz\jazz.00061.wav, SR=22050, SigLen=666820
Processing:DATA/genres_original\jazz\jazz.00062.wav, SR=22050, SigLen=661794
Processing:DATA/genres_original\jazz\jazz.00063.wav, SR=22050, SigLen=669240
Processing:DATA/genres_original\jazz\jazz.00064.wav, SR=22050, SigLen=661980
Processing:DATA/genres_original\jazz\jazz.00065.wav, SR=22050, SigLen=661794
Processing:DATA/genres_original\jazz\jazz.00066.wav, SR=22050, SigLen=667480
Processing:DATA/genres_original\jazz\jazz.00067.wav, SR=22050, SigLen=661794

## Dataset and Dataloader Wrappers

In [5]:
from torch.utils.data import Dataset, DataLoader
device = 'cuda' if torch.cuda.is_available() else 'cpu'

class AudioDataset(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y
        self.Y = self.Y.to(torch.float)
        
        self.X = self.X.to(device)
        self.Y = self.Y.to(device)

    def __len__(self):
        return self.X.shape[0]
    
    def __getitem__(self, idx):
        return self.X[idx,:,:], self.Y[idx,:]


audio_mfccs, labels, mapping = save_mfcc_torch(DATASET_PATH, num_segments=10)

labels = torch.nn.functional.one_hot(labels)

split = (0.75, 0.15, 0.1)
train_split = int(audio_mfccs.shape[0] * split[0])
val_split = int(audio_mfccs.shape[0] * split[1])
test_split = audio_mfccs.shape[0] - train_split - val_split

dataset = AudioDataset(audio_mfccs, labels)
train_set, val_set, test_set = random_split(dataset, [train_split, val_split, test_split])

train_loader = DataLoader(train_set, batch_size=128, shuffle=True)
val_loader = DataLoader(val_set, batch_size=128, shuffle=True)
test_loader = DataLoader(test_set, batch_size=128, shuffle=True)

  data['mfcc'] = torch.load('mfcc.pt')
  data['labels'] = torch.load('labels.pt')


## Training

In [11]:
!pip install wandb

Collecting wandb
  Downloading wandb-0.18.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
Collecting docker-pycreds>=0.4.0 (from wandb)
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl.metadata (1.8 kB)
Collecting sentry-sdk>=2.0.0 (from wandb)
  Downloading sentry_sdk-2.18.0-py2.py3-none-any.whl.metadata (9.9 kB)
Collecting setproctitle (from wandb)
  Downloading setproctitle-1.3.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Downloading wandb-0.18.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.1/16.1 MB[0m [31m21.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Downloading sentry_sdk-2.18.0-py2.py3-none-any.whl (317 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m317.5/317.5 kB[0m [31m10.9 MB/s[0m eta [3

In [6]:
import wandb
import logging
logger = logging.getLogger("wandb")
logger.setLevel(logging.ERROR)

os.environ["WANDB_SILENT"]="true"

def calc_accuracy(output, labels):
    with torch.no_grad():
        predictions = torch.argmax(output, dim = 1)
        actual = torch.argmax(labels, dim = 1)
        correct = (predictions == actual).to(int).sum().item()
        return correct / output.shape[0]


def calc_val_acc(model, val_loader, loss_criterion):
    model.eval()
    total_loss = 0
    total_acc = 0
    with torch.no_grad():
        for audio_mfcc, labels in val_loader:
            output = model(audio_mfcc)
            total_loss += loss_criterion(output, labels)
            total_acc += calc_accuracy(output, labels)
    return total_loss / len(val_loader), total_acc / len(val_loader)


def train(model, optimizer, loss_criterion, train_loader, val_loader, num_epochs = 50,
           log_wandb = True, run_name = 'run1',traininig_metadata = None, logging_freq = 5, # logging
           regularization = None, reg_lambda = 0.0): # regularization
    run = None
    if(log_wandb):
        if traininig_metadata is None:
            traininig_metadata = {
                "epochs": 50,
                "optimizer": type(optimizer).__name__
            }
        run = wandb.init(
            name = run_name,
            project='audio-models',
            config=traininig_metadata
        )

    train_loss, train_acc, val_loss, val_acc = [], [], [], []

    for epoch in range(num_epochs):
        for i, batch in enumerate(train_loader):
            model.train()
            audio_mfcc, labels = batch
            optimizer.zero_grad()
            # print(audio_mfcc.shape)
            output = model(audio_mfcc)
            loss = loss_criterion(output, labels)

            if regularization == 'L1':
                loss += reg_lambda * sum(p.abs().sum() for p in model.parameters())
            elif regularization == 'L2':
                loss += reg_lambda * sum(p.pow(2).sum() for p in model.parameters())


            loss.backward()
            optimizer.step()

            if i % logging_freq == 0:
                t_loss = loss.item()
                t_acc = calc_accuracy(output, labels)
                v_loss, v_acc = calc_val_acc(model, val_loader, loss_criterion)
                print(f'Epoch[{epoch}/{num_epochs}]_Batch[{i}/{len(train_loader)}]: \
                      Train Loss = {t_loss:.3f}, Train Accuracy = {t_acc:.3f} \
                      Val Loss = {v_loss:.3f}, Val Accuracy = {v_acc:.3f}')
                train_loss.append(t_loss)
                val_loss.append(v_loss)
                train_acc.append(t_acc)
                val_acc.append(v_acc)
                
                
                if log_wandb:
                     run.log({
                                'train/accuracy':t_acc,
                                'train/loss':t_loss,
                                'val/accuracy':v_acc,
                                'val/loss':v_loss,
                                'batch':epoch * len(train_loader) + i,
                                'epoch':epoch
                            })
    run.finish()
    return model, train_loss, train_acc, val_loss, val_acc

## Simple ANN

In [178]:
ann_model_1 = nn.Sequential(
    nn.Flatten(),
    nn.Linear(1690, 512),
    nn.ReLU(),
    nn.Linear(512, 256),
    nn.ReLU(),
    nn.Linear(256, 64),
    nn.ReLU(),
    nn.Linear(64, 10),
    nn.Softmax()
)

loss_criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(ann_model_1.parameters(), lr=0.0001)

In [None]:
ann_model_1, train_loss, train_acc, val_loss, val_acc = train(
        ann_model_1,
        optimizer,
        loss_criterion, 
        train_loader, 
        val_loader, 
        num_epochs=100, 
        log_wandb=True, 
        run_name="100-Epochs-SimpleANN",
        traininig_metadata={
            "Optimizer":"Adam",
            "Epochs":100,
            "Model":"ANN-512-256-64-10"
        }
)

  return self._call_impl(*args, **kwargs)


Epoch[0/100]_Batch[0/59]:                       Train Loss = 2.305, Train Accuracy = 0.164                       Val Loss = 2.271, Val Accuracy = 0.171
Epoch[0/100]_Batch[5/59]:                       Train Loss = 2.151, Train Accuracy = 0.297                       Val Loss = 2.185, Val Accuracy = 0.262
Epoch[0/100]_Batch[10/59]:                       Train Loss = 2.153, Train Accuracy = 0.305                       Val Loss = 2.185, Val Accuracy = 0.261
Epoch[0/100]_Batch[15/59]:                       Train Loss = 2.147, Train Accuracy = 0.312                       Val Loss = 2.173, Val Accuracy = 0.271
Epoch[0/100]_Batch[20/59]:                       Train Loss = 2.136, Train Accuracy = 0.320                       Val Loss = 2.151, Val Accuracy = 0.309
Epoch[0/100]_Batch[25/59]:                       Train Loss = 2.153, Train Accuracy = 0.289                       Val Loss = 2.131, Val Accuracy = 0.324
Epoch[0/100]_Batch[30/59]:                       Train Loss = 2.051, Train Accuracy 

0,1
batch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇█████
epoch,▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▅▅▆▆▆▆▇▇▇███
train/accuracy,▁▃▃▄▃▄▄▅▅▄▆▅▆▅▆▆▅▅▆▅▆█▆█▇▇███▇▇▇▇█▇▇▇▇██
train/loss,█▇▆▅▅▅▄▃▃▄▄▃▃▃▂▃▃▃▃▃▃▃▃▂▃▂▂▁▂▂▁▂▂▁▁▁▁▁▁▁
val/accuracy,▁▁▂▂▃▃▃▃▄▄▄▄▄▄▄▄▄▃▄▅▄▅▆▇▇▇▇▇▇▇▇▇▇▇▇▇████
val/loss,██▇▆▅▅▅▅▅▅▅▄▄▄▅▄▄▄▄▄▄▂▂▂▂▁▂▂▂▂▂▂▂▂▂▁▁▁▁▁

0,1
batch,5896.0
epoch,99.0
train/accuracy,0.96094
train/loss,1.50141
val/accuracy,0.69526
val/loss,1.76514


In [181]:
ann_model_2 = nn.Sequential(
    nn.Flatten(),
    nn.Linear(1690, 256),
    nn.ReLU(),
    nn.Linear(256, 256),
    nn.ReLU(),
    nn.Linear(256, 256),
    nn.ReLU(),
    nn.Linear(256, 64),
    nn.ReLU(),
    nn.Linear(64, 10),
    nn.Softmax()
)

loss_criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(ann_model_2.parameters(), lr=0.0001)

ann_model_2, train_loss_2, train_acc_2, val_loss_2, val_acc_2 = train(
        ann_model_2,
        optimizer,
        loss_criterion, 
        train_loader, 
        val_loader, 
        num_epochs=100, 
        log_wandb=True, 
        run_name="100-Epochs-DeepANN",
        traininig_metadata={
            "Optimizer":"Adam",
            "Epochs":100,
            "Model":"ANN-256-256-256-64-10"
        }
)

  return self._call_impl(*args, **kwargs)


Epoch[0/100]_Batch[0/59]:                       Train Loss = 2.292, Train Accuracy = 0.141                       Val Loss = 2.266, Val Accuracy = 0.181
Epoch[0/100]_Batch[5/59]:                       Train Loss = 2.207, Train Accuracy = 0.203                       Val Loss = 2.207, Val Accuracy = 0.203
Epoch[0/100]_Batch[10/59]:                       Train Loss = 2.215, Train Accuracy = 0.242                       Val Loss = 2.159, Val Accuracy = 0.309
Epoch[0/100]_Batch[15/59]:                       Train Loss = 2.096, Train Accuracy = 0.383                       Val Loss = 2.131, Val Accuracy = 0.316
Epoch[0/100]_Batch[20/59]:                       Train Loss = 2.079, Train Accuracy = 0.383                       Val Loss = 2.113, Val Accuracy = 0.349
Epoch[0/100]_Batch[25/59]:                       Train Loss = 2.097, Train Accuracy = 0.344                       Val Loss = 2.096, Val Accuracy = 0.363
Epoch[0/100]_Batch[30/59]:                       Train Loss = 2.086, Train Accuracy 

0,1
batch,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆▇▇▇▇██
epoch,▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▇▇▇▇████
train/accuracy,▁▂▄▄▄▅▅▆▆▆▇▆▇▇▇▇▆▇█▇█▇▇▆▇▇█▇▇██▇█▇█▇█▇█▇
train/loss,██▇█▇▅▅▅▆▄▃▂▅▄▂▂▂▁▂▂▃▂▃▃▂▂▂▁▂▁▁▂▂▂▂▂▂▁▁▂
val/accuracy,▁▂▅▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇█▇▇███▇█████████▇█████
val/loss,███▇▇▅▄▅▅▆▄▄▃▃▄▃▄▃▂▃▂▄▃▂▂▂▃▃▂▃▂▂▂▁▄▂▁▂▁▅

0,1
batch,5896.0
epoch,99.0
train/accuracy,0.92188
train/loss,1.53977
val/accuracy,0.66852
val/loss,1.79173


In [182]:
ann_model_3 = nn.Sequential(
    nn.Flatten(),
    nn.Linear(1690, 1024),
    nn.ReLU(),
    nn.Linear(1024, 512),
    nn.ReLU(),
    nn.Linear(512, 256),
    nn.ReLU(),
    nn.Linear(256, 10),
    nn.Softmax()
)

loss_criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(ann_model_3.parameters(), lr=0.0001)

ann_model_3, train_loss_3, train_acc_3, val_loss_3, val_acc_3 = train(
        ann_model_3,
        optimizer,
        loss_criterion, 
        train_loader, 
        val_loader, 
        num_epochs=100, 
        log_wandb=True, 
        run_name="100-Epochs-WideANN",
        traininig_metadata={
            "Optimizer":"Adam",
            "Epochs":100,
            "Model":"ANN-1024-512-256-10"
        }
)

  return self._call_impl(*args, **kwargs)


Epoch[0/100]_Batch[0/59]:                       Train Loss = 2.294, Train Accuracy = 0.164                       Val Loss = 2.295, Val Accuracy = 0.152
Epoch[0/100]_Batch[5/59]:                       Train Loss = 2.184, Train Accuracy = 0.273                       Val Loss = 2.246, Val Accuracy = 0.210
Epoch[0/100]_Batch[10/59]:                       Train Loss = 2.213, Train Accuracy = 0.234                       Val Loss = 2.237, Val Accuracy = 0.217
Epoch[0/100]_Batch[15/59]:                       Train Loss = 2.226, Train Accuracy = 0.234                       Val Loss = 2.214, Val Accuracy = 0.249
Epoch[0/100]_Batch[20/59]:                       Train Loss = 2.250, Train Accuracy = 0.203                       Val Loss = 2.231, Val Accuracy = 0.226
Epoch[0/100]_Batch[25/59]:                       Train Loss = 2.201, Train Accuracy = 0.250                       Val Loss = 2.196, Val Accuracy = 0.261
Epoch[0/100]_Batch[30/59]:                       Train Loss = 2.150, Train Accuracy 

0,1
batch,▁▁▂▂▂▂▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇████
epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇██
train/accuracy,▁▃▃▃▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆▆▆▆▆▇▇▇████▇█████
train/loss,█▆▅▆▅▄▅▅▅▄▃▄▄▄▄▄▄▃▃▃▃▃▃▃▃▃▃▁▂▂▁▁▂▁▁▁▁▁▁▁
val/accuracy,▁▂▂▂▃▃▄▄▄▄▃▄▄▃▄▄▄▄▄▄▄▄▄▅▄▇▇▇▇▇█▇█▇▇▇█▇██
val/loss,██▅▅▅▅▅▄▄▄▄▄▄▄▃▄▃▃▃▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
batch,5896.0
epoch,99.0
train/accuracy,0.96875
train/loss,1.49369
val/accuracy,0.71117
val/loss,1.74979


## ANN with Dropout and L2 Regularization

In [12]:
ann_dropout_l2_1 = nn.Sequential(
    nn.Flatten(),
    nn.Linear(1690, 512),
    nn.ReLU(),
    nn.Dropout(0.3),
    nn.Linear(512, 256),
    nn.ReLU(),
    nn.Dropout(0.3),
    nn.Linear(256, 64),
    nn.ReLU(),
    nn.Dropout(0.3),
    nn.Linear(64, 10),
    nn.Softmax()
)

loss_criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(ann_dropout_l2_1.parameters(), lr=0.0001)

ann_dropout_l2_1, train_loss_4, train_acc_4, val_loss_4, val_acc_4 = train(
        ann_dropout_l2_1,
        optimizer,
        loss_criterion, 
        train_loader, 
        val_loader, 
        num_epochs=100, 
        log_wandb=True, 
        run_name="100-Epochs-ANN-Dropout-L2",
        traininig_metadata={
            "Optimizer":"Adam",
            "Epochs":100,
            "Model":"ANN-512-256-64-10-Dropout-L2"
        }
)

  from .autonotebook import tqdm as notebook_tqdm
  return self._call_impl(*args, **kwargs)


Epoch[0/100]_Batch[0/59]:                       Train Loss = 2.293, Train Accuracy = 0.141                       Val Loss = 2.231, Val Accuracy = 0.231


  return self._call_impl(*args, **kwargs)


Epoch[0/100]_Batch[5/59]:                       Train Loss = 2.260, Train Accuracy = 0.188                       Val Loss = 2.189, Val Accuracy = 0.252
Epoch[0/100]_Batch[10/59]:                       Train Loss = 2.226, Train Accuracy = 0.219                       Val Loss = 2.172, Val Accuracy = 0.275
Epoch[0/100]_Batch[15/59]:                       Train Loss = 2.230, Train Accuracy = 0.195                       Val Loss = 2.142, Val Accuracy = 0.315
Epoch[0/100]_Batch[20/59]:                       Train Loss = 2.142, Train Accuracy = 0.297                       Val Loss = 2.129, Val Accuracy = 0.325
Epoch[0/100]_Batch[25/59]:                       Train Loss = 2.162, Train Accuracy = 0.320                       Val Loss = 2.124, Val Accuracy = 0.335
Epoch[0/100]_Batch[30/59]:                       Train Loss = 2.167, Train Accuracy = 0.273                       Val Loss = 2.123, Val Accuracy = 0.331
Epoch[0/100]_Batch[35/59]:                       Train Loss = 2.140, Train Accuracy

## CNN Architecture

In [12]:
from einops.layers.torch import Rearrange
from torchsummary import summary

In [13]:
# height_out = ((height_in + 2*padding - kernel_height) / stride_height) + 1
# width_out = ((width_in + 2*padding - kernel_width) / stride_width) + 1
def get_simple_cnn():
    cnn_3L_64_batchnorm_dropout = nn.Sequential(
        # b 13 130 -> b 1 13 130
        Rearrange("batch hops mfcc_coeffs -> batch 1 hops mfcc_coeffs"), # add channel dimension
        
        # b 1 13 130 -> b 32 11 128
        nn.Conv2d(kernel_size=(3,3), in_channels=1, out_channels=32, ),
        nn.ReLU(),
        # b 32 11 128 -> b 32 6 64
        nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2), padding=(1,1)),
        nn.BatchNorm2d(num_features=32),
    
        # b 32 6 64 -> b 32 4 62
        nn.Conv2d(kernel_size=(3,3), in_channels=32, out_channels=32),
        nn.ReLU(),
    
        # b 32 4 62 -> b 32 2 31
        nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2), padding=(1,1)),
        nn.BatchNorm2d(num_features=32),
    
        # b 32 2 31 -> b 32 1 30
        nn.Conv2d(kernel_size=(2,2), in_channels=32, out_channels=32),
        nn.ReLU(),
    
        # b 32 1 30 -> b 32 1 15
        nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=(1,0)),
        nn.BatchNorm2d(num_features=32),
    
        # b 32 1 15 -> b 480
        nn.Flatten(),
        nn.Linear(480, 64),
        nn.ReLU(),
        nn.Dropout(0.3),
    
        nn.Linear(64, 10),
        nn.Softmax()
    )
    return cnn_3L_64_batchnorm_dropout

cnn_3L_64_batchnorm_dropout = get_simple_cnn()
summary(cnn_3L_64_batchnorm_dropout, input_size=(13, 130))

RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same

In [8]:
loss_criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn_3L_64_batchnorm_dropout.parameters(), lr=0.0001)

cnn_3L_64_batchnorm_dropout, train_loss_cnn1, train_acc_cnn1, val_loss_cnn1, val_acc_cnn1 = train(
        cnn_3L_64_batchnorm_dropout,
        optimizer,
        loss_criterion, 
        train_loader, 
        val_loader, 
        num_epochs=100, 
        log_wandb=True, 
        run_name="100-Epochs-CNN-BatchNorm-Dropout",
        traininig_metadata={
            "Optimizer":"Adam",
            "Epochs":100,
            "Model":"CNN-Basic"
        }
)

  return self._call_impl(*args, **kwargs)


Epoch[0/100]_Batch[0/59]:                       Train Loss = 2.297, Train Accuracy = 0.156                       Val Loss = 2.302, Val Accuracy = 0.115


  return self._call_impl(*args, **kwargs)


Epoch[0/100]_Batch[5/59]:                       Train Loss = 2.296, Train Accuracy = 0.219                       Val Loss = 2.301, Val Accuracy = 0.188
Epoch[0/100]_Batch[10/59]:                       Train Loss = 2.289, Train Accuracy = 0.180                       Val Loss = 2.298, Val Accuracy = 0.205
Epoch[0/100]_Batch[15/59]:                       Train Loss = 2.291, Train Accuracy = 0.141                       Val Loss = 2.292, Val Accuracy = 0.224
Epoch[0/100]_Batch[20/59]:                       Train Loss = 2.271, Train Accuracy = 0.242                       Val Loss = 2.282, Val Accuracy = 0.232
Epoch[0/100]_Batch[25/59]:                       Train Loss = 2.249, Train Accuracy = 0.281                       Val Loss = 2.268, Val Accuracy = 0.247
Epoch[0/100]_Batch[30/59]:                       Train Loss = 2.250, Train Accuracy = 0.289                       Val Loss = 2.251, Val Accuracy = 0.272
Epoch[0/100]_Batch[35/59]:                       Train Loss = 2.244, Train Accuracy

### Reference Code (For Architecture Comparison)

In [11]:
import tensorflow.keras as keras

def print_reference_model():
    model = keras.Sequential()

    input_shape = (130, 13, 1)

    # 1st conv layer
    model.add(keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(keras.layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same'))
    model.add(keras.layers.BatchNormalization())

    # 2nd conv layer
    model.add(keras.layers.Conv2D(32, (3, 3), activation='relu'))
    model.add(keras.layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same'))
    model.add(keras.layers.BatchNormalization())

    # 3rd conv layer
    model.add(keras.layers.Conv2D(32, (2, 2), activation='relu'))
    model.add(keras.layers.MaxPooling2D((2, 2), strides=(2, 2), padding='same'))
    model.add(keras.layers.BatchNormalization())

    # flatten output and feed it into dense layer
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dropout(0.3))

    # output layer
    model.add(keras.layers.Dense(10, activation='softmax'))

    print(model.summary())

print_reference_model()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 128, 11, 32)       320       
                                                                 
 max_pooling2d (MaxPooling2  (None, 64, 6, 32)         0         
 D)                                                              
                                                                 
 batch_normalization (Batch  (None, 64, 6, 32)         128       
 Normalization)                                                  
                                                                 
 conv2d_1 (Conv2D)           (None, 62, 4, 32)         9248      
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 31, 2, 32)         0         
 g2D)                                                            
                                                        

## CNN with Skip Connections

In [9]:
from einops import pack
from einops.layers.torch import Rearrange

class ResNet(nn.Module):
    def __init__(self):
        super(ResNet, self).__init__()
        # b 13 130 -> b 1 13 130
        self.rearrange = Rearrange("batch hops mfcc_coeffs -> batch 1 hops mfcc_coeffs") # add channel dimension
        # b 1 13 130 -> b 32 11 128
        self.cnn1 = nn.Conv2d(kernel_size=(3,3), in_channels=1, out_channels=32)
        self.relu = nn.ReLU()
        # b 32 11 128 -> b 32 6 64
        self.mp1 = nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2), padding=(1,1))
        # b 32 6 64 -> b 32 1 16
        self.mp12 = nn.MaxPool2d(kernel_size=(4,4), stride=(4, 4))


        self.bn1 = nn.BatchNorm2d(num_features=32)
        # b 32 6 64 -> b 32 4 62
        self.cnn2 = nn.Conv2d(kernel_size=(3,3), in_channels=32, out_channels=32)
        # b 32 4 62 -> b 32 2 31
        self.mp2 = nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2), padding=(1,1))
        
        # b 32 2 31 -> b 32 1 15
        self.mp22 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        
        self.bn2 = nn.BatchNorm2d(num_features=32)
        # b 32 2 31 -> b 32 1 30
        self.cnn3 = nn.Conv2d(kernel_size=(2,2), in_channels=32, out_channels=32)
        # b 32 1 30 -> b 32 1 15
        self.mp3 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=(1,0))
        self.bn3 = nn.BatchNorm2d(num_features=32)
        
        # flatten o/p of mp3: b 32 1 15 -> b 480 
        # flatten o/p of mp22: b 32 1 15 -> b 480 
        # flatten o/p of mp12: b 32 1 15 -> b 512
        self.flat = nn.Flatten()

        # b (480 + 480 + 512) -> b 64
        self.mlp1 = nn.Linear(1472, 64)
        self.dropout = nn.Dropout(0.3)
        self.mlp2 = nn.Linear(64, 10)
        self.soft = nn.Softmax()
    
    def forward(self, x):
        x = self.rearrange(x)

        # convolution blocks
        l1 = self.bn1(self.mp1(self.relu(self.cnn1(x))))
        l2 = self.bn2(self.mp2(self.relu(self.cnn2(l1))))
        l3 = self.bn3(self.mp3(self.relu(self.cnn3(l2))))

        # appending all blocks into a single vector
        # [(batch, x), (batch, y), (batch, z)] -> (batch, x+y+z)
        a, b, c = self.flat(self.mp12(l1)), self.flat(self.mp22(l2)), self.flat(l3)
        #print(a.shape,b.shape, c.shape)
        l4, ps = pack([a,b,c], "batch *")
        
        # print(l4.shape)
        l4 = self.mlp1(l4)
        l4 = self.dropout(l4)
        l4 = self.mlp2(l4)
        return self.soft(l4)

resnet_3L_64_batchnorm_dropout = ResNet()
# summary(resnet_3L_64_batchnorm_dropout, input_size=(13, 130))

In [10]:
resnet_3L_64_batchnorm_dropout = ResNet()
resnet_3L_64_batchnorm_dropout.to(device)
# resnet_3L_64_batchnorm_dropout.compile()
loss_criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnet_3L_64_batchnorm_dropout.parameters(), lr=0.0001)

resnet_3L_64_batchnorm_dropout, train_loss_cnn2, train_acc_cnn2, val_loss_cnn2, val_acc_cnn2 = train(
        resnet_3L_64_batchnorm_dropout,
        optimizer,
        loss_criterion, 
        train_loader, 
        val_loader, 
        num_epochs=100, 
        log_wandb=True, 
        run_name="100-Epochs-Custom-Resnet",
        traininig_metadata={
            "Optimizer":"Adam",
            "Epochs":100,
            "Model":"CNN-Resnet"
        }
)

  return self._call_impl(*args, **kwargs)


Epoch[0/100]_Batch[0/59]:                       Train Loss = 2.319, Train Accuracy = 0.023                       Val Loss = 2.318, Val Accuracy = 0.084


  return self._call_impl(*args, **kwargs)


Epoch[0/100]_Batch[5/59]:                       Train Loss = 2.277, Train Accuracy = 0.195                       Val Loss = 2.286, Val Accuracy = 0.146
Epoch[0/100]_Batch[10/59]:                       Train Loss = 2.233, Train Accuracy = 0.305                       Val Loss = 2.247, Val Accuracy = 0.262
Epoch[0/100]_Batch[15/59]:                       Train Loss = 2.183, Train Accuracy = 0.289                       Val Loss = 2.204, Val Accuracy = 0.284
Epoch[0/100]_Batch[20/59]:                       Train Loss = 2.131, Train Accuracy = 0.375                       Val Loss = 2.177, Val Accuracy = 0.284
Epoch[0/100]_Batch[25/59]:                       Train Loss = 2.160, Train Accuracy = 0.289                       Val Loss = 2.163, Val Accuracy = 0.284
Epoch[0/100]_Batch[30/59]:                       Train Loss = 2.127, Train Accuracy = 0.344                       Val Loss = 2.147, Val Accuracy = 0.303
Epoch[0/100]_Batch[35/59]:                       Train Loss = 2.117, Train Accuracy

## More Epochs: CNN vs Custom Resnet

In [15]:
cnn_3L_64_batchnorm_dropout_200 = get_simple_cnn()
cnn_3L_64_batchnorm_dropout_200.to(device)
loss_criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn_3L_64_batchnorm_dropout_200.parameters(), lr=0.0001)

cnn_3L_64_batchnorm_dropout_200, train_loss_cnn3, train_acc_cnn3, val_loss_cnn3, val_acc_cnn3 = train(
        cnn_3L_64_batchnorm_dropout_200,
        optimizer,
        loss_criterion, 
        train_loader, 
        val_loader, 
        num_epochs=200, 
        log_wandb=True, 
        run_name="200-Epochs-CNN-BatchNorm-Dropout",
        traininig_metadata={
            "Optimizer":"Adam",
            "Epochs":200,
            "Model":"CNN-Basic-Longer-Training"
        }
)

Epoch[0/200]_Batch[0/59]:                       Train Loss = 2.302, Train Accuracy = 0.125                       Val Loss = 2.303, Val Accuracy = 0.084
Epoch[0/200]_Batch[5/59]:                       Train Loss = 2.300, Train Accuracy = 0.125                       Val Loss = 2.302, Val Accuracy = 0.102
Epoch[0/200]_Batch[10/59]:                       Train Loss = 2.294, Train Accuracy = 0.203                       Val Loss = 2.300, Val Accuracy = 0.145
Epoch[0/200]_Batch[15/59]:                       Train Loss = 2.287, Train Accuracy = 0.219                       Val Loss = 2.295, Val Accuracy = 0.169
Epoch[0/200]_Batch[20/59]:                       Train Loss = 2.283, Train Accuracy = 0.164                       Val Loss = 2.288, Val Accuracy = 0.197
Epoch[0/200]_Batch[25/59]:                       Train Loss = 2.261, Train Accuracy = 0.258                       Val Loss = 2.279, Val Accuracy = 0.212
Epoch[0/200]_Batch[30/59]:                       Train Loss = 2.254, Train Accuracy 

In [16]:
resnet_3L_64_batchnorm_dropout_200 = ResNet()
resnet_3L_64_batchnorm_dropout_200.to(device)
# resnet_3L_64_batchnorm_dropout.compile()
loss_criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnet_3L_64_batchnorm_dropout_200.parameters(), lr=0.0001)

resnet_3L_64_batchnorm_dropout_200, train_loss_cnn4, train_acc_cnn4, val_loss_cnn4, val_acc_cnn4 = train(
        resnet_3L_64_batchnorm_dropout_200,
        optimizer,
        loss_criterion, 
        train_loader, 
        val_loader, 
        num_epochs=200, 
        log_wandb=True, 
        run_name="200-Epochs-Custom-Resnet",
        traininig_metadata={
            "Optimizer":"Adam",
            "Epochs":200,
            "Model":"CNN-Resnet-Longer-Training"
        }
)

Epoch[0/200]_Batch[0/59]:                       Train Loss = 2.302, Train Accuracy = 0.086                       Val Loss = 2.324, Val Accuracy = 0.082
Epoch[0/200]_Batch[5/59]:                       Train Loss = 2.264, Train Accuracy = 0.227                       Val Loss = 2.258, Val Accuracy = 0.164
Epoch[0/200]_Batch[10/59]:                       Train Loss = 2.224, Train Accuracy = 0.219                       Val Loss = 2.229, Val Accuracy = 0.225
Epoch[0/200]_Batch[15/59]:                       Train Loss = 2.202, Train Accuracy = 0.266                       Val Loss = 2.193, Val Accuracy = 0.271
Epoch[0/200]_Batch[20/59]:                       Train Loss = 2.167, Train Accuracy = 0.281                       Val Loss = 2.173, Val Accuracy = 0.278
Epoch[0/200]_Batch[25/59]:                       Train Loss = 2.157, Train Accuracy = 0.289                       Val Loss = 2.152, Val Accuracy = 0.293
Epoch[0/200]_Batch[30/59]:                       Train Loss = 2.121, Train Accuracy 

## Adding Batchnorm, Separating Linear Projections for Different Residual Layers

In [19]:
from einops import pack
from einops.layers.torch import Rearrange

class ResNet_Sep(nn.Module):
    def __init__(self):
        super(ResNet_Sep, self).__init__()
        # b 13 130 -> b 1 13 130
        self.rearrange = Rearrange("batch hops mfcc_coeffs -> batch 1 hops mfcc_coeffs") # add channel dimension
        # b 1 13 130 -> b 32 11 128
        self.cnn1 = nn.Conv2d(kernel_size=(3,3), in_channels=1, out_channels=32)
        self.relu = nn.ReLU()
        # b 32 11 128 -> b 32 6 64
        self.mp1 = nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2), padding=(1,1))
        


        self.bn1 = nn.BatchNorm2d(num_features=32)
        # b 32 6 64 -> b 32 4 62
        self.cnn2 = nn.Conv2d(kernel_size=(3,3), in_channels=32, out_channels=32)
        # b 32 4 62 -> b 32 2 31
        self.mp2 = nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2), padding=(1,1))
        
        self.bn2 = nn.BatchNorm2d(num_features=32)
        # b 32 2 31 -> b 32 1 30
        self.cnn3 = nn.Conv2d(kernel_size=(2,2), in_channels=32, out_channels=32)
        # b 32 1 30 -> b 32 1 15
        self.mp3 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=(1,0))
        self.bn3 = nn.BatchNorm2d(num_features=32)


        # b 32 6 64 -> b 32 1 16
        self.mp12 = nn.MaxPool2d(kernel_size=(4,4), stride=(4, 4))
        self.mlp12 = nn.Linear(512, 64)
        self.bn12 = nn.BatchNorm2d(num_features=32)
        # b 32 2 31 -> b 32 1 15
        self.mp22 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        self.mlp22 = nn.Linear(480, 64)
        self.bn22 = nn.BatchNorm2d(num_features=32)
        
        
        
        # flatten o/p of mp3: b 32 1 15 -> b 480 
        # flatten o/p of mp22: b 32 1 15 -> b 480 
        # flatten o/p of mp12: b 32 1 15 -> b 512
        self.flat = nn.Flatten()

        self.mlp1 = nn.Linear(480, 64)
        self.dropout = nn.Dropout(0.3)
        self.dropout2 = nn.Dropout(0.5)
        self.mlp2 = nn.Linear(64, 10)
        self.soft = nn.Softmax()
    
    def forward(self, x):
        x = self.rearrange(x)

        # convolution blocks
        l1 = self.bn1(self.mp1(self.relu(self.cnn1(x))))
        l2 = self.bn2(self.mp2(self.relu(self.cnn2(l1))))
        l3 = self.bn3(self.mp3(self.relu(self.cnn3(l2))))

        # appending all blocks into a single vector
        # [(batch, x), (batch, y), (batch, z)] -> (batch, x+y+z)
        a, b, c = self.dropout2(self.mlp12(self.flat(self.bn12(self.mp12(l1))))), self.dropout2(self.mlp22(self.flat(self.bn12(self.mp22(l2))))), self.dropout(self.mlp1(self.flat(l3)))
        #print(a.shape,b.shape, c.shape)
        # l4, ps = pack([a,b,c], "batch *")
        l4 = a + b + c
        l4 = self.dropout(l4)
        l4 = self.mlp2(l4)
        return self.soft(l4)

resnet_sep = ResNet_Sep()
# summary(resnet_3L_64_batchnorm_dropout, input_size=(13, 130))

In [20]:
resnet_sep = ResNet_Sep()
resnet_sep.to(device)
# resnet_3L_64_batchnorm_dropout.compile()
loss_criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnet_sep.parameters(), lr=0.0001)

resnet_sep, train_loss_cnn5, train_acc_cnn5, val_loss_cnn5, val_acc_cnn5 = train(
        resnet_sep,
        optimizer,
        loss_criterion, 
        train_loader, 
        val_loader, 
        num_epochs=200, 
        log_wandb=True, 
        run_name="200-Epochs-Custom-Resnet-Sep",
        traininig_metadata={
            "Optimizer":"Adam",
            "Epochs":200,
            "Model":"CNN-Resnet-Sep-Longer-Training"
        }
)

Epoch[0/200]_Batch[0/59]:                       Train Loss = 2.315, Train Accuracy = 0.109                       Val Loss = 2.345, Val Accuracy = 0.097
Epoch[0/200]_Batch[5/59]:                       Train Loss = 2.295, Train Accuracy = 0.109                       Val Loss = 2.304, Val Accuracy = 0.100
Epoch[0/200]_Batch[10/59]:                       Train Loss = 2.257, Train Accuracy = 0.227                       Val Loss = 2.274, Val Accuracy = 0.144
Epoch[0/200]_Batch[15/59]:                       Train Loss = 2.209, Train Accuracy = 0.289                       Val Loss = 2.240, Val Accuracy = 0.269
Epoch[0/200]_Batch[20/59]:                       Train Loss = 2.194, Train Accuracy = 0.289                       Val Loss = 2.207, Val Accuracy = 0.308
Epoch[0/200]_Batch[25/59]:                       Train Loss = 2.135, Train Accuracy = 0.391                       Val Loss = 2.178, Val Accuracy = 0.309
Epoch[0/200]_Batch[30/59]:                       Train Loss = 2.120, Train Accuracy 