<a href="https://colab.research.google.com/github/Faisal-NSU/CSE465/blob/main/Subesco%20Training%20Skeleton.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Unzip Dataset (Subesco)

# Download And Extract

In [2]:
#Model Link
!gdown --id 1NiKtbbebxL8kNPw7bsbCr85lprAomcNw

Downloading...
From: https://drive.google.com/uc?id=1NiKtbbebxL8kNPw7bsbCr85lprAomcNw
To: /content/Resnet50_Ravdess_71acc.pth
100% 94.4M/94.4M [00:01<00:00, 71.7MB/s]


In [3]:
#Subesco.zip
!gdown --id 1k-afEJdwz5Tf4-bsuKOJzP7xn-KQTlkM

Downloading...
From: https://drive.google.com/uc?id=1k-afEJdwz5Tf4-bsuKOJzP7xn-KQTlkM
To: /content/SUBESCO.zip
100% 1.65G/1.65G [00:17<00:00, 93.2MB/s]


In [4]:
import zipfile
dataset_directory = '/content/SUBESCO.zip'
zip_ref = zipfile.ZipFile(dataset_directory, 'r')
zip_ref.extractall('/content')
zip_ref.close()

# Custom Dataset Class

Create custom Dataset

In [5]:
import os
import torch
from torch.utils.data import Dataset
import pandas as pd
import torchaudio
from torch.utils.data import DataLoader
import torch.optim.lr_scheduler as lr_scheduler
from torch.nn.functional import normalize


class CustomDataset(Dataset):
    def __init__(self, annotations_file, audio_dir, transformation, target_sample_rate, num_samples, device):
        self.annotations = pd.read_csv(annotations_file)
        self.audio_dir = audio_dir
        self.device = device
        self.transformation = transformation.to(self.device)
        self.target_sample_rate = target_sample_rate
        self.num_samples = num_samples

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        audio_sample_path = self._get_audio_sample_path(index)
        #print(audio_sample_path)
        label = self._get_audio_sample_label(index)
        #print(label)
        signal, sr = torchaudio.load(audio_sample_path)
        signal = signal.to(self.device)
        signal = self._resample_if_necessary(signal, sr)
        signal = self._mix_down_if_necessary(signal)
        signal = self._cut_if_necessary(signal)
        signal = self._right_pad_if_necessary(signal)
        signal = self.transformation(signal)
        # produce delta and delta-delta and add to mfcc
        delta = torchaudio.functional.compute_deltas(signal)
        delta2 = torchaudio.functional.compute_deltas(delta)
        #signal = torch.cat((signal, delta, delta2), 1) # concatenating deltas
        
        signal = signal + delta + delta2
        avg_mfcc_deltas = signal
        #columns = list(signal[0][1].shape)
        #columns = int(columns[0])
        #avg_mfcc_deltas = (torch.sum(signal, 2))
        #avg_mfcc_deltas /= 60
        
        #means = avg_mfcc_deltas.mean(dim=1, keepdim=True)
        #stds = avg_mfcc_deltas.std(dim=1, keepdim=True)
        #avg_mfcc_deltas = (avg_mfcc_deltas - means) / stds #NORMALIZED
        #avg_mfcc_deltas = normalize(avg_mfcc_deltas, p=2.0)

        return avg_mfcc_deltas,label

    def _cut_if_necessary(self, signal):
        if signal.shape[1] > self.num_samples:
            # print(signal.shape[1]) # print sample size
            signal = signal[:, :self.num_samples]
        return signal

    def _right_pad_if_necessary(self, signal):
        length_signal = signal.shape[1]
        if length_signal < self.num_samples:
            num_missing_samples = self.num_samples - length_signal
            last_dim_padding = (0, num_missing_samples)
            signal = torch.nn.functional.pad(signal, last_dim_padding)
        return signal

    def _resample_if_necessary(self, signal, sr):
        if sr != self.target_sample_rate:
            resampler = torchaudio.transforms.Resample(sr, self.target_sample_rate)
            resampler = resampler.to(device)
            signal = resampler(signal)
        return signal

    def _mix_down_if_necessary(self, signal):
        if signal.shape[0] > 1:
            signal = torch.mean(signal, dim=0, keepdim=True)
        return signal

    # for any audio index this function returns the audio path
    def _get_audio_sample_path(self, index):
        path = os.path.join(self.audio_dir, self.annotations.iloc[index,0])
        return path
    
    # for specified audio index this will return the label from the data csv
    def _get_audio_sample_label(self, index):
        return self.annotations.iloc[index, 4]

Instantiate the dataset

In [6]:
TRAIN_CSV = '/content/SUBESCO/train/train.csv'
TEST_CSV = '/content/SUBESCO/test/test.csv'
VALID_CSV = '/content/SUBESCO/valid/valid.csv'

TRAIN_PATH = '/content/SUBESCO/train/'
TEST_PATH = '/content/SUBESCO/test/'
VALID_PATH = '/content/SUBESCO/valid/'

SAMPLE_RATE = 48000
NUM_SAMPLES = SAMPLE_RATE*4

device = ''
if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"
print(f"Using device {device}")

mfcc = torchaudio.transforms.MFCC(sample_rate=SAMPLE_RATE, n_mfcc=20, melkwargs={"n_fft": 2048, "hop_length": 512, "power": 2})

train_dataset = CustomDataset(TRAIN_CSV,
                        TRAIN_PATH,
                        mfcc,
                        SAMPLE_RATE,
                        NUM_SAMPLES,
                        device)
print(f"There are {len(train_dataset)} samples in the train.")

val_dataset = CustomDataset(VALID_CSV,
                        VALID_PATH,
                        mfcc,
                        SAMPLE_RATE,
                        NUM_SAMPLES,
                        device)

print(f"There are {len(val_dataset)} samples in the valid.")

test_dataset = CustomDataset(TEST_CSV,
                        TEST_PATH,
                        mfcc,
                        SAMPLE_RATE,
                        NUM_SAMPLES,
                        device)
print(f"There are {len(test_dataset)} samples in the test.")

Using device cuda
There are 4900 samples in the train.
There are 700 samples in the valid.
There are 1400 samples in the test.


In [7]:
signal, label = val_dataset[0]
print(signal.shape)
df = pd.read_csv(TRAIN_CSV)
print(df['label_id'].value_counts())
df = pd.read_csv(TEST_CSV)
print(df['label_id'].value_counts())
df = pd.read_csv(VALID_CSV)
print(df['label_id'].value_counts())


torch.Size([1, 20, 376])
3    700
6    700
2    700
5    700
1    700
4    700
0    700
Name: label_id, dtype: int64
6    200
5    200
4    200
3    200
2    200
1    200
0    200
Name: label_id, dtype: int64
6    100
5    100
4    100
3    100
2    100
1    100
0    100
Name: label_id, dtype: int64


# Model Loading and Training

In [8]:
from torch import nn
from torchsummary import summary

from torchvision import models
model = models.resnet50(pretrained=True)

model = model.to(device)
model.conv1=nn.Conv2d(1, model.conv1.out_channels, 
                      kernel_size=model.conv1.kernel_size[0], 
                      stride=model.conv1.stride[0], 
                      padding=model.conv1.padding[0])
num_ftrs = model.fc.in_features
model.fc = nn.Sequential(*[nn.Dropout(p=0.25), nn.Linear(num_ftrs, 7)])

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

In [9]:
from google.colab import drive

drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [10]:
model = model.to(device)
#summary(model, signal.shape)

In [11]:
from torch import nn
BATCH_SIZE = 32
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)
#step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.1)

def create_data_loader(train_data, batch_size):
    train_dataloader = DataLoader(train_data, batch_size=batch_size,shuffle=True)
    return train_dataloader

train_dataloader = create_data_loader(train_dataset, BATCH_SIZE)
test_dataloader = create_data_loader(test_dataset, BATCH_SIZE)
val_dataloader = create_data_loader(val_dataset, BATCH_SIZE)

In [13]:
def train_loop(dataloader, model, loss_fn, optimizer):
    model.train()
    size = len(dataloader.dataset)  
    for batch, (X,y) in enumerate(dataloader):
        # Compute prediction and loss
        X, y = X.to(device), y.to(device)
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 50 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
    return model

In [14]:
def test_loop(dataloader, model, loss_fn):
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for (X,y) in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    return 100*correct

In [40]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

In [None]:
import math
import time
#best_acc=0
EPOCHS = 10
start = time.time()
for t in range(EPOCHS):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, criterion, optimizer)
    acc = test_loop(val_dataloader, model, criterion)
    if(acc > best_acc):
          print("saving model now",best_acc)
          best_acc = acc
          torch.save(model,"/content/gdrive/MyDrive/CSE465/model.pth")
    
final = (time.time() - start)/60
print(f"Done for all {EPOCHS} epochs in {math.ceil(final)} minutes\n")

In [42]:
model = torch.load('/content/gdrive/MyDrive/CSE465/5828val.pth')

# Testing The Model

In [43]:
def test_single_epoch(model, dataloader, device):
  correct = 0
  size = len(dataloader.dataset)

  model.eval()
  for input,target in dataloader:
        input, target = input.to(device), target.to(device)
        # calculate loss
        prediction = model(input)
        correct += (prediction.argmax(1) == target).type(torch.float).sum().item()
  correct /= size
  print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}% \n")

In [45]:
test_single_epoch(model,test_dataloader,device)


Test Error: 
 Accuracy: 49.0% 



In [46]:
#just for Assurance!!!!
test_single_epoch(model,val_dataloader,device)


Test Error: 
 Accuracy: 58.3% 



In [47]:
test_single_epoch(model,train_dataloader,device)

Test Error: 
 Accuracy: 74.3% 

