<a href="https://colab.research.google.com/github/AdhemarDeSenneville/Math_Art/blob/main/CW2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
from google.colab import drive
drive.mount("/content/gdrive")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [4]:
import os
import torch
from torch.utils.data import Dataset
import librosa
import torch.nn as nn
import torchaudio
import matplotlib.pyplot as plt
import random
import copy
import Utils

In [5]:
import torch
import torchvision
from torchvision.transforms import TrivialAugmentWide,AugMix,AutoAugment,RandAugment,Resize,ToTensor,Compose,ToTensor
from torch.utils.data import DataLoader,Dataset, random_split
import torch.nn.functional as F



In [6]:
root_dir = "/content/gdrive/MyDrive/genres_original"
classes = os.listdir(root_dir)
num_classes = len(classes)

In [7]:
#device

In [8]:
class AudioClassificationDataset(Dataset):
    def __init__(self, root_dir, frame_size, transform=None):
        self.root_dir = root_dir
        self.classes = os.listdir(root_dir)
        self.transform = transform
        self.frame_size = frame_size
        self.sample_length = 675808 + frame_size - 675808%frame_size
        
        self.file_list = []
        for class_idx, class_name in enumerate(self.classes):
            class_dir = os.path.join(root_dir, class_name)
            for filename in os.listdir(class_dir):
                if filename.endswith('.wav'):
                    file_path = os.path.join(class_dir, filename)
                    self.file_list.append((file_path, class_idx))
        
        #self.compute_max_length()
    
    def __len__(self):
        return len(self.file_list)

    def compute_max_length(self):

        for file in self.file_list:
            audio, sample_rate = torchaudio.load(file[0])
            size = audio.size()[1]
            if size > self.sample_length:
              self.sample_length = size

        print("Max sample length :",self.sample_length)
        self.sample_length + self.frame_size - self.sample_length%self.frame_size
    
    def __getitem__(self, idx):
        
        # Load audio file using librosa
        audio, sample_rate = torchaudio.load(self.file_list[idx][0])
        class_idx = self.file_list[idx][1]

        # Do padding
        audio = F.pad(audio, (0, self.sample_length - audio.shape[-1]), "constant", 0)
        audio = audio.view(-1,self.frame_size)
        # Apply transform if specified
        if self.transform:
            audio = self.transform(audio)
        
        # Convert class index to one-hot vector
        #label = torch.zeros(len(self.classes))
        #label[class_idx] = 1
        
        return audio, class_idx

In [9]:
frame_size = 256

Dataset = AudioClassificationDataset(root_dir,frame_size)

lengths = [800, 200]
train_d,valid_d=random_split(Dataset,lengths)

# Checking input tensor shape
img, label = Dataset.__getitem__(42)
img_size = img.size()
print("Image size :",img_size)

# Create the Train and Test Loaders 
dataloader_train = DataLoader(train_d, batch_size=32, shuffle=True)
dataloader_test = DataLoader(valid_d, batch_size=32, shuffle=True)

# Checking batch tensor info
itr=iter(dataloader_train)
item,label =next(itr)
print("Batch info | type: ",type(item),"| size:",item.size(),"| label size :",label.size(),"|")

Image size : torch.Size([2640, 256])
Batch info | type:  <class 'torch.Tensor'> | size: torch.Size([32, 2640, 256]) | label size : torch.Size([32]) |


In [10]:
loss_fcn = torch.nn.CrossEntropyLoss()

In [11]:
def predict(model,loader):
    model.eval()
    epoch_loss=0.
    epoch_acc = 0
    count = 0
    length = len(loader) * loader.batch_size
    for imgs,labels in loader:
        with torch.no_grad():
            outputs=model(imgs)
            loss=loss_fcn(outputs,labels)
            epoch_loss+=loss.item()
            
            _, preds = torch.max(outputs, 1)
            epoch_acc += torch.sum(preds == labels.data)

    return epoch_loss/len(loader), epoch_acc/length

In [12]:
learning_rate = 0.01
epoch_number = 10

stop_befor = 3

In [13]:
def train_classification(Net):

    test_loss_hist = []
    train_loss_hist = []
    test_acc_hist = []
    train_acc_hist = []
    mean_grad_hist = []
    best_acc = 0

    optimizer=torch.optim.Adam(Net.parameters(),lr=learning_rate) 

    for epoch in range(epoch_number):
        Net.train()
        epoch_loss=0
        count=0
        mean_grad=0
        p = Utils.ProgressBare(len(dataloader_train),Size=50,Mode="Normal",Freq=0.1)
        
        for imgs,labels in dataloader_train:
              
              count+=1
            # uses the .forward() method to get y_hat
              y_hat=Net(imgs)
            # as before
              loss=loss_fcn(y_hat,labels)
            # Computes the gradients and saves them in the appropriate .grad
              loss.backward()
            # updates the parameters using the computed .grad
              optimizer.step()
            # zero the .grad values so that they don't accumulate
              ###mean_grad_batch = get_mean_grad(Net)
              ###mean_grad += mean_grad_batch
              optimizer.zero_grad()
              epoch_loss+=loss.item()
              p.Update("Epoch "+str(epoch+1)+"/"+str(epoch_number)
                       +" | Loss : "+str(epoch_loss/count)[:5]
                       +" | MeanGrad "+str("mean_grad_batch")[:5])
        
        test_loss, test_acc = predict(Net,dataloader_test)
        train_loss, train_acc = predict(Net,dataloader_train)

        p.End("Test Loss : "+str(test_loss)[:5]+" Train Loss : "+str(train_loss)[:5])

        train_loss_hist.append(train_loss)
        train_acc_hist.append(train_acc)
        test_loss_hist.append(test_loss)
        test_acc_hist.append(test_acc)

        mean_grad_hist.append(mean_grad/count)

        if  test_acc > best_acc:
          stop_count = 0
          best_acc = test_acc
          best_model_wts = copy.deepcopy(Net.state_dict())
        else:
          stop_count +=1
          if stop_count>stop_befor:
            break

        

    plt.plot(train_loss_hist, label='Train')
    plt.plot(test_loss_hist, label='Test')

    # Add title and axis labels
    plt.legend()
    plt.title('Loss history')
    plt.xlabel('epochs')
    plt.ylabel('Loss')
    plt.show()

    plt.plot(test_acc_hist, label='Train')
    plt.plot(test_acc_hist, label='Test')
    # Add title and axis labels
    plt.legend()
    plt.title('Accuracy history')
    plt.xlabel('epochs')
    plt.ylabel('Accuracy')
    plt.show()

    print("Best Test Accuracy :",best_acc)
    
    Net.load_state_dict(best_model_wts)
    return Net

In [14]:
class AudioLSTM(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(AudioLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=1, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        # Set initial hidden and cell states
        h0 = torch.zeros(1, x.size(0), self.lstm.hidden_size).to(device=x.device)
        c0 = torch.zeros(1, x.size(0), self.lstm.hidden_size).to(device=x.device)

        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))

        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        return out

In [15]:
print("test")
LSTM_NET  = AudioLSTM(frame_size,frame_size)



test


In [None]:
LSTM_NET = train_classification(LSTM_NET)

