# CNN implementation

In [None]:
import os

#os.chdir("/drive/MyDrive/data")

import IPython.display as ipd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as skl
import sklearn.utils, sklearn.preprocessing, sklearn.decomposition, sklearn.svm
import librosa
import librosa.display
from tensorflow.keras.utils import to_categorical
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

from torchvision.transforms import Compose, ToTensor, RandomAffine, RandomHorizontalFlip, RandomVerticalFlip, ColorJitter
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision

import random
import utils

plt.rcParams['figure.figsize'] = (17, 5)

In [None]:
def saveheavy(df, name, n):  #functions to save havy dataframes in multiple files
    l= len(df)
    for i in range(n-1):
        df.iloc[int((l/n)*i):int((l/n)*(i+1))].to_hdf(f'{name}_{i+1}.h5', 'x', mode='w')
    df.iloc[int((l/n)*(n-1)):l].to_hdf(f'{name}_{n}.h5', 'x', mode='w')
    
def readheavy(name, n, column, Dir):
    result = pd.DataFrame(columns=column)
    for i in range(n):
        #df = pd.read_hdf(f'Data/{Dir}/{name}_{i+1}.h5', 'x')
        df = pd.read_hdf(f"{name}_{i+1}.h5", 'x')
        result = pd.concat([result, df], ignore_index=True)
    return result

In [None]:
#Restore Datasets

test = readheavy('test', 1, ['audio', 'y'], 'Audio')
##validation = readheavy('validation', 2, ['audio', 'y'], 'Audio')
#training = readheavy('training', 16, ['audio', 'y'], 'Audio')

In [None]:
def get_stft(data):
    df = pd.DataFrame(columns=['stft', 'y'])
    for j in range(len(data)):
        audio = data.loc[j, 'audio']
        stft = np.abs(librosa.stft(audio, n_fft=1024, hop_length=512))
        y = data.loc[j, 'y']
        new_row = pd.DataFrame({'stft':[stft], 'y':[y]})
        df = pd.concat([df, new_row], ignore_index=True)
    return df

def get_mel(data):
    df = pd.DataFrame(columns=['mel', 'y'])
    for j in range(len(data)):
        audio = data.loc[j, 'audio']
        mel = np.abs(librosa.stft(audio, n_fft=1024, hop_length=512))
        y = data.loc[j, 'y']
        new_row = pd.DataFrame({'mel':[mel], 'y':[y]})
        df = pd.concat([df, new_row], ignore_index=True)
    return df

In [None]:
#Now I convert Audio into stft data


test = get_stft(test[:10])
#validation = get_stft(validation)
#training = get_stft(training)


In [None]:
def clip_stft(df, n_samples):
    df_clip = pd.DataFrame(columns=list(df.columns))
    t = len(df[df.columns[0]][1][1])
    for j in range(len(df)):
        full = df.loc[j, df.columns[0]].transpose()
        n=0
        while (n<(len(full)-n_samples)):
            clip = full[n: (n+n_samples)]
            y = df.loc[j, 'y']
            new_row = pd.DataFrame({df.columns[0]:[clip], 'y':[y]})
            df_clip = pd.concat([df_clip, new_row], ignore_index=True)
            n+=int(n_samples/2)
    return df_clip

def clip_audio(df, n_samples):
    df_clip = pd.DataFrame(columns=list(df.columns))
    t = len(df[df.columns[0]][1])
    for j in range(len(df)):
        full = df.loc[j, df.columns[0]]
        n=0
        while (n<(len(full)-n_samples)):
            clip = full[n: (n+n_samples)]
            y = df.loc[j, 'y']
            new_row = pd.DataFrame({df.columns[0]:[clip], 'y':[y]})
            df_clip = pd.concat([df_clip, new_row], ignore_index=True)
            n+=int(n_samples/2)
    return df_clip
          

In [None]:
test_clip = clip_stft(test, 128)

In [None]:
diego = pd.DataFrame(test_clip).to_numpy()

In [None]:
diego.shape

In [None]:
diego = np.load("diego.npy",allow_pickle=True)

In [None]:
diego.shape

In [None]:
from torch.utils.data import Dataset, DataLoader

#Class for the creation of torch manageble datasets, with Format one can select the desired input column 
class DataDiego(Dataset):

    def __init__(self, data, transform=None):
        self.x = data[:,0]
        self.y = data[:,1]
        self.transform = transform

    def __len__(self):
       
        return len(self.x)

    def __getitem__(self, idx):
        x = self.x[idx]
        y = self.y[idx]
        if self.transform:
            x = self.transform(x)
            #x = x.unsqueeze(0)
            
        return x, y

In [None]:
transforms = Compose([
    ToTensor(), #this converts numpy or Pil image to torch tensor and normalizes it in 0, 1
])

In [None]:
test_dataset = DataDiego(data=diego,transform=transforms)

In [None]:
#Creation of dataloader classes
batch_size = 64
test_dataloader = DataLoader(test_dataset, batch_size, shuffle=True, num_workers=os.cpu_count())

In [None]:
for x,y in test_dataloader:
    print(x.shape)
    break

In [None]:
# Remember to add dropout layers 

class NNET1(nn.Module):
    
    def __init__(self):
        super(NNET1, self).__init__()
        
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=128,kernel_size=(4,513)),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2,1)),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(4,1)),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2,1)),
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(4,1)),
            nn.ReLU(),
        )

        # Input of fc1 is 256

        self.fc = nn.Sequential(
            nn.Linear(256, 300),
            nn.ReLU(),
            nn.Linear(300, 150),
            nn.ReLU(),
            nn.Linear(150, 10),
            nn.Softmax(dim=1)
        )
        
    def forward(self, x):
        x = self.conv(x)
        max_pool = F.max_pool2d(x, kernel_size=(26,1))
        avg_pool = F.avg_pool2d(x, kernel_size=(26,1))
        x = max_pool + avg_pool
        x = self.fc(x.view(-1, 256))
        
        return x

In [None]:
class NNET2(nn.Module):
        
    def __init__(self):
        super(NNET2, self).__init__()
        
        
        self.c1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=256,kernel_size=(4,513)),
            nn.BatchNorm2d(256),
            nn.ReLU()
        )

        self.c2 = nn.Sequential(
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(4, 1),padding=(2,0)),
            nn.BatchNorm2d(256),
            nn.ReLU()
        )

        self.c3 = nn.Sequential(
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(4, 1),padding=(1,0)),
            nn.BatchNorm2d(256),
            nn.ReLU()
        )
        
        self.fc = nn.Sequential(
            nn.Linear(256, 300),
            nn.ReLU(),
            nn.Linear(300, 150),
            nn.ReLU(),
            nn.Linear(150, 10),
            nn.Softmax(dim=1)
        )

    def forward(self,x):
        
        c1 = self.c1(x)
        c2 = self.c2(c1)
        c3 = self.c3(c2)
        x = c1 + c3
        max_pool = F.max_pool2d(x, kernel_size=(125,1))
        avg_pool = F.avg_pool2d(x, kernel_size=(125,1))
        x = max_pool + avg_pool
        x = self.fc(x.view(-1, 256))
        return x 




In [None]:
# Fake spectrogram 128x513
x = torch.randn(1, 1, 128, 513)

# Create model
model = NNET2()


# Forward pass
output = model(x)

# Print output shape
print(output.shape)

In [None]:
test_dataloader.dataset.y[0]

In [None]:
prova = test_dataloader.dataset.x

In [None]:
prova = torch.Tensor(prova[0]).unsqueeze(0)

In [None]:
from torch.optim import SGD, Adam, Adadelta
from torch.nn import CrossEntropyLoss
from tqdm import tqdm
# Create model
model = NNET2()
opt = Adadelta(model.parameters())

loss_fn = CrossEntropyLoss()

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
epochs=10
best_val = np.inf
for epoch in range(epochs):
    model.train()
    print(f"Epoch: {epoch+1}")
    iterator = tqdm(test_dataloader)
    for batch_x, batch_y in iterator:
        batch_x = batch_x.to(device)
        batch_y = batch_y.to(device)

        y_pred = model(batch_x)

        loss = loss_fn(y_pred, batch_y)

        opt.zero_grad()
        loss.backward()
        opt.step()
        iterator.set_description(f"Train loss: {loss.detach().cpu().numpy()}")

    model.eval()
    with torch.no_grad():
        predictions = []
        true = []
        for batch_x, batch_y in tqdm(test_dataloader):
            batch_x = batch_x.to(device)
            batch_y = batch_y.to(device)

            y_pred = model(batch_x)

            predictions.append(y_pred)
            true.append(batch_y)
        predictions = torch.cat(predictions, axis=0)
        true = torch.cat(true, axis=0)
        val_loss = loss_fn(predictions, true)
        val_acc = (torch.sigmoid(predictions).round() == true).float().mean()
        print(f"loss: {val_loss}, accuracy: {val_acc}")
    
    if val_loss < best_val:
        print("Saved Model")
        torch.save(model.state_dict(), "model.pt")
        best_val = val_loss

In [None]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_curve, roc_auc_score

def evaluate_network(dataloader, model, data_split):
    model.eval()
    with torch.no_grad():
        predictions = []
        true = []
        for batch_x, batch_y in tqdm(dataloader):
            batch_x = batch_x.to(device)
            batch_y = batch_y.to(device)

            y_pred = model(batch_x)

            predictions.append(y_pred)
            true.append(batch_y)
        predictions = torch.cat(predictions, axis=0)
        true = torch.cat(true, axis=0)
        loss = loss_fn(predictions, true).detach().cpu().numpy()
        predictions = torch.sigmoid(predictions).detach().cpu().numpy()
        true = true.detach().cpu().numpy()

        fpr, tpr, thresholds = roc_curve(true, predictions)
        auc = roc_auc_score(true, predictions)
        predictions = predictions.round()
        precision, recall, fscore, _= precision_recall_fscore_support(true, predictions, average='binary')
        accuracy = accuracy_score(true, predictions)

        print(f"{data_split} loss: {loss}, accuracy: {accuracy}, precision: {precision}, recall: {recall}, f1: {fscore}, roc_auc: {auc}")

        plt.figure()
        plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % auc)
        plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title(f'{data_split} receiver operating characteristic (ROC)')
        plt.legend(loc="lower right")

In [None]:
model = NNET2()
model.load_state_dict(torch.load("model.pt"))

evaluate_network(test_dataloader, model, "Test Dataset")