<a href="https://colab.research.google.com/github/Ahmed-L/CSE465-/blob/main/last_work_for_faisal.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!gdown --id 1k-afEJdwz5Tf4-bsuKOJzP7xn-KQTlkM

Downloading...
From: https://drive.google.com/uc?id=1k-afEJdwz5Tf4-bsuKOJzP7xn-KQTlkM
To: /content/SUBESCO.zip
100% 1.65G/1.65G [00:10<00:00, 161MB/s]


In [2]:
#from google.colab import drive
#drive.mount('/content/drive')

import zipfile
dataset_directory = '/content/SUBESCO.zip'
zip_ref = zipfile.ZipFile(dataset_directory, 'r')
zip_ref.extractall('/content')
zip_ref.close()

In [3]:
import os
import torch
from torch.utils.data import Dataset
import pandas as pd
import torchaudio
from torch.utils.data import DataLoader
import torch.optim.lr_scheduler as lr_scheduler
from torch.nn.functional import normalize


class CustomDataset(Dataset):
    def __init__(self, annotations_file, audio_dir, transformation, target_sample_rate, num_samples, device):
        self.annotations = pd.read_csv(annotations_file)
        self.audio_dir = audio_dir
        self.device = device
        self.transformation = transformation.to(self.device)
        self.target_sample_rate = target_sample_rate
        self.num_samples = num_samples

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        audio_sample_path = self._get_audio_sample_path(index)
        #print(audio_sample_path)
        label = self._get_audio_sample_label(index)
        #print(label)
        signal, sr = torchaudio.load(audio_sample_path)
        signal = signal.to(self.device)
        signal = self._resample_if_necessary(signal, sr)
        signal = self._mix_down_if_necessary(signal)
        signal = self._cut_if_necessary(signal)
        signal = self._right_pad_if_necessary(signal)
        signal = self.transformation(signal)
        # produce delta and delta-delta and add to mfcc
        delta = torchaudio.functional.compute_deltas(signal)
        delta2 = torchaudio.functional.compute_deltas(delta)
        signal = torch.cat((signal, delta, delta2), 1) # concatenating deltas
        
        #signal = signal + delta + delta2
        avg_mfcc_deltas = signal
        #columns = list(signal[0][1].shape)
        #columns = int(columns[0])
        #avg_mfcc_deltas = (torch.sum(signal, 2))
        #avg_mfcc_deltas /= 60
        
        means = avg_mfcc_deltas.mean(dim=1, keepdim=True)
        stds = avg_mfcc_deltas.std(dim=1, keepdim=True)
        avg_mfcc_deltas = (avg_mfcc_deltas - means) / stds #NORMALIZED
        #avg_mfcc_deltas = normalize(avg_mfcc_deltas, p=2.0)

        return signal, label, avg_mfcc_deltas

    def _cut_if_necessary(self, signal):
        if signal.shape[1] > self.num_samples:
            # print(signal.shape[1]) # print sample size
            signal = signal[:, :self.num_samples]
        return signal

    def _right_pad_if_necessary(self, signal):
        length_signal = signal.shape[1]
        if length_signal < self.num_samples:
            num_missing_samples = self.num_samples - length_signal
            last_dim_padding = (0, num_missing_samples)
            signal = torch.nn.functional.pad(signal, last_dim_padding)
        return signal

    def _resample_if_necessary(self, signal, sr):
        if sr != self.target_sample_rate:
            resampler = torchaudio.transforms.Resample(sr, self.target_sample_rate)
            resampler = resampler.to(device)
            signal = resampler(signal)
        return signal

    def _mix_down_if_necessary(self, signal):
        if signal.shape[0] > 1:
            signal = torch.mean(signal, dim=0, keepdim=True)
        return signal

    # for any audio index this function returns the audio path
    def _get_audio_sample_path(self, index):
        path = os.path.join(self.audio_dir, self.annotations.iloc[index,0])
        return path
    
    # for specified audio index this will return the label from the data csv
    def _get_audio_sample_label(self, index):
        return self.annotations.iloc[index, 4]

In [4]:
TRAIN_CSV = '/content/SUBESCO/train/train.csv'
TEST_CSV = '/content/SUBESCO/test/test.csv'
VALID_CSV = '/content/SUBESCO/valid/valid.csv'

TRAIN_PATH = '/content/SUBESCO/train/'
TEST_PATH = '/content/SUBESCO/test/'
VALID_PATH = '/content/SUBESCO/valid/'

SAMPLE_RATE = 22050
NUM_SAMPLES = SAMPLE_RATE*4

device = ''
if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"
print(f"Using device {device}")

mfcc = torchaudio.transforms.MFCC(sample_rate=SAMPLE_RATE, n_mfcc=44, melkwargs={"n_fft": 1024, "hop_length": 512, "power": 2})

train_dataset = CustomDataset(TRAIN_CSV,
                        TRAIN_PATH,
                        mfcc,
                        SAMPLE_RATE,
                        NUM_SAMPLES,
                        device)
print(f"There are {len(train_dataset)} samples in the train.")
signal, label, avg_mfcc_deltas = train_dataset[0]
print(signal.size())
print(avg_mfcc_deltas.shape)

#print(len(usd))

val_dataset = CustomDataset(VALID_CSV,
                        VALID_PATH,
                        mfcc,
                        SAMPLE_RATE,
                        NUM_SAMPLES,
                        device)

print(f"There are {len(val_dataset)} samples in the valid.")
signal, label, avg_mfcc_deltas = val_dataset[0]


test_dataset = CustomDataset(TEST_CSV,
                        TEST_PATH,
                        mfcc,
                        SAMPLE_RATE,
                        NUM_SAMPLES,
                        device)
print(f"There are {len(test_dataset)} samples in the test.")
signal, label, avg_mfcc_deltas = test_dataset[0]

Using device cuda
There are 4900 samples in the train.
torch.Size([1, 132, 173])
torch.Size([1, 132, 173])
There are 700 samples in the valid.
There are 1400 samples in the test.


In [None]:
import torch.nn as nn
m = nn.Sequential(
    nn.Flatten()
)
x = m(signal)
y = m(avg_mfcc_deltas)
print(x.size())
print(y.size())

torch.Size([1, 22836])
torch.Size([1, 22836])


In [None]:
import torch
import torch.nn.functional as F
class ANN(nn.Module):
  def __init__(self):
    super().__init__()
    self.flatten = nn.Flatten()
    self.dense_layers = nn.Sequential(
      nn.Linear(60, 512),
      #nn.ReLU(),
      nn.Linear(512, 256),
      nn.ReLU(),
      nn.Linear(256, 7),
    )
    self.softmax = nn.Softmax(dim=1)

  def forward(self, input_data):
    flattened_input_data = self.flatten(input_data)
    logits = self.dense_layers(flattened_input_data)
    prediction = self.softmax(logits)
    return prediction

In [5]:
!gdown --id 18hQTkVXPjlmxhFmpD2U9Qa_uhvYUY3M_

Downloading...
From: https://drive.google.com/uc?id=18hQTkVXPjlmxhFmpD2U9Qa_uhvYUY3M_
To: /content/Resnet_high_model.pth
100% 94.4M/94.4M [00:00<00:00, 203MB/s]


In [21]:
from torchsummary import summary
device = ''
from torchvision import models
from torch import nn

if torch.cuda.is_available():
  device = 'cuda'
else:
  device = 'cpu'

print(f"Using device {device}")
model = torch.load('/content/Resnet_high_model.pth')
#model = torch.load('/content/Resnet_high_model.pth', map_location=torch.device('cpu'))
#model = torch.load('/content/ANN_test_acc_52.pth')
model = model.to(device)
summary(model, (signal.shape))

Using device cuda
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 66, 87]           3,200
       BatchNorm2d-2           [-1, 64, 66, 87]             128
              ReLU-3           [-1, 64, 66, 87]               0
         MaxPool2d-4           [-1, 64, 33, 44]               0
            Conv2d-5           [-1, 64, 33, 44]           4,096
       BatchNorm2d-6           [-1, 64, 33, 44]             128
              ReLU-7           [-1, 64, 33, 44]               0
            Conv2d-8           [-1, 64, 33, 44]          36,864
       BatchNorm2d-9           [-1, 64, 33, 44]             128
             ReLU-10           [-1, 64, 33, 44]               0
           Conv2d-11          [-1, 256, 33, 44]          16,384
      BatchNorm2d-12          [-1, 256, 33, 44]             512
           Conv2d-13          [-1, 256, 33, 44]          16,384
      BatchNorm2d-14 

In [10]:
BATCH_SIZE = 32
EPOCHS = 50
LEARNING_RATE = 0.0001
import torch.utils.data as data
#import torch.optim.lr_scheduler as lr_scheduler
from torch.optim.lr_scheduler import ReduceLROnPlateau

def create_data_loader(train_data, batch_size):
    train_dataloader = DataLoader(train_data, batch_size=batch_size)
    return train_dataloader

def train_single_epoch(model, data_loader, loss_fn, optimiser, device):
    for sr, target, input in data_loader:
        input, target = input.to(device), target.to(device)
        # calculate loss
        prediction = model(input)
        loss = loss_fn(prediction, target)
        # backpropagate error and update weights
        optimiser.zero_grad()
        loss.backward()
        optimiser.step()
        
    print(f"loss: {loss.item()}")
    #print(optimiser.state_dict()['param_groups'][0]['lr'])


def test_single_epoch(model, dataloader, loss_fn, optimiser, device):
  correct = 0
  size = len(dataloader.dataset)

  with torch.no_grad():
    for sr, target, input in dataloader:
          input, target = input.to(device), target.to(device)
          # calculate loss
          prediction = model(input)
          #loss = loss_fn(prediction, target)
          correct += (prediction.argmax(1) == target).type(torch.float).sum().item()
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}% \n")

# original train function
def train(model, data_loader, loss_fn, optimiser, device, epochs):
    for i in range(epochs):
        print(f"Epoch {i+1}")
        train_single_epoch(model, data_loader, loss_fn, optimiser, device)
        print("---------------------------")
    print("Finished training")

def train_val(model, train_dataloader, test_dataloader, loss_fn, optimiser, device, epochs):
    for i in range(epochs):
        print(f"Epoch {i+1}")
        train_single_epoch(model, train_dataloader, loss_fn, optimiser, device)
        test_single_epoch(model, test_dataloader, loss_fn, optimiser, device)
        print("---------------------------")
    print("Finished training")

train_dataloader = create_data_loader(train_dataset, BATCH_SIZE)
val_dataloader = create_data_loader(val_dataset, BATCH_SIZE)
#model = model.to(device)

# initialise loss funtion + optimiser
loss_fn = nn.CrossEntropyLoss()
optimiser = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
scheduler = ReduceLROnPlateau(optimiser, 'min')


# Decay LR by a factor of 0.1 every 7 epochs
#scheduler = lr_scheduler.StepLR(optimiser, step_size=5, gamma=0.1)


In [11]:
model.train()
train_val(model, train_dataloader, val_dataloader, loss_fn, optimiser, device, 50)

Epoch 1
loss: 2.6540005207061768
Test Error: 
 Accuracy: 37.7% 

---------------------------
Epoch 2
loss: 2.7482919692993164
Test Error: 
 Accuracy: 48.7% 

---------------------------
Epoch 3
loss: 4.494072914123535
Test Error: 
 Accuracy: 48.9% 

---------------------------
Epoch 4
loss: 4.424227714538574
Test Error: 
 Accuracy: 61.9% 

---------------------------
Epoch 5
loss: 3.9568705558776855
Test Error: 
 Accuracy: 59.6% 

---------------------------
Epoch 6
loss: 4.638225555419922
Test Error: 
 Accuracy: 60.4% 

---------------------------
Epoch 7
loss: 2.478137493133545
Test Error: 
 Accuracy: 63.3% 

---------------------------
Epoch 8
loss: 2.088496685028076
Test Error: 
 Accuracy: 61.9% 

---------------------------
Epoch 9
loss: 1.2016628980636597
Test Error: 
 Accuracy: 67.1% 

---------------------------
Epoch 10
loss: 1.1741286516189575
Test Error: 
 Accuracy: 69.7% 

---------------------------
Epoch 11
loss: 0.453665167093277
Test Error: 
 Accuracy: 70.7% 

---------

In [None]:
optimiser = torch.optim.RMSprop(model.parameters(), lr=0.0001)
model.train()
train_val(model, train_dataloader, val_dataloader, loss_fn, optimiser, device, 27)

In [19]:
def test_single_epoch(model, dataloader, loss_fn, optimiser, device):
  correct = 0
  size = len(dataloader.dataset)
  model.eval()
  with torch.no_grad():
    for sr, target, input in dataloader:
          input, target = input.to(device), target.to(device)
          # calculate loss
          prediction = model(input)
          #loss = loss_fn(prediction, target)
          correct += (prediction.argmax(1) == target).type(torch.float).sum().item()
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}% \n")

test_dataloader = create_data_loader(test_dataset, BATCH_SIZE)

Previously ANN gave 39% accuracy, Without average MFCCS.

In [20]:
test_single_epoch(model, test_dataloader, loss_fn, optimiser, device)

Test Error: 
 Accuracy: 59.1% 

