# Import Requiremets

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchaudio
import torchvision.transforms as transforms
import os
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import torchvision.models as models
import torch.nn.functional as F
import time
import librosa
from skopt import gp_minimize
from skopt.space import Real, Categorical
from skopt.utils import use_named_args

# Check for CUDA availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
import torch
import torch.nn as nn
from collections import defaultdict


class Add(nn.Module):
    '''
    Adds two tensors and returns the result
    '''
    def __init__(self,activation=None):
        super(Add, self).__init__()
        self.activation = activation
        self.digital = True
        
    def forward(self, x):
        if len(x) != 2:
            print('ERR: Num tensors to add',len(x))
            raise
#         return torch.stack(x,dim=0).sum(dim=0)
        if self.activation is not None:
            return self.activation(torch.stack(x,dim=0).sum(dim=0))
        else:
            return torch.stack(x,dim=0).sum(dim=0)
        
def model_summary(M, pt_191=False):
    """
    This function provides summary of all the named classes in the model.
    Use arguments pt_191=True for pytorch 1.9.1 usage, default pt_191 = False
    Returns a dictionary of class names and usage count.
    """
    def zero(): return 0
    cdict = defaultdict(zero)
    

    for n,m in M.named_modules(remove_duplicate=True):
        if isinstance(m,nn.Conv2d):
            if M.get_submodule(n.rsplit('.',1)[0]).__class__.__name__ == 'CART':
                cdict['CART_'+m.__class__.__name__]+=1
                
            else:
                cdict[m.__class__.__name__]+=1
                
            
        elif isinstance(m,(nn.ReLU,Add)) and hasattr(m,'digital'):
            if m.digital:
                cdict[m.__class__.__name__]+=1
                
            else:
                cdict['CART_'+m.__class__.__name__]+=1
                
        else:
             cdict[m.__class__.__name__]+=1
        
            
    w_size=0        
    for p in M.parameters():
        w_size+=p.shape.numel()
    cdict['Parameters'] = str(w_size/1e6)+'M'   
        
    return dict(cdict)

# Class AudioDataset

In [None]:
class AudioDataset(Dataset):
    def __init__(self, directory, desired_duration, sample_rate=44100, n_mfcc=20):
        self.directory = directory
        self.classes = sorted(os.listdir(directory))
        self.audio_files = []
        self.desired_duration = desired_duration
        self.sample_rate=sample_rate
        self.n_mfcc=n_mfcc

        for i, class_name in enumerate(self.classes):
            class_path = os.path.join(directory, class_name)
            for audio_file in os.listdir(class_path):
                self.audio_files.append((os.path.join(class_path, audio_file), i))

    def __len__(self):
        return len(self.audio_files)

    def __getitem__(self, idx):
        audio_file, label = self.audio_files[idx]
        waveform, sample_rate = librosa.load(audio_file, sr=None) 
        mfcc = self._compute_mfcc(waveform)
        return mfcc, label
    
    def _compute_mfcc(self, waveform):
        if len(waveform) != self.desired_duration * self.sample_rate:
            waveform = librosa.resample(waveform, orig_sr=len(waveform), target_sr=self.sample_rate)

        if len(waveform) < self.desired_duration * self.sample_rate:
            pad_size = self.desired_duration * self.sample_rate - len(waveform)
            waveform = np.pad(waveform, (0, pad_size))
        elif len(waveform) > self.desired_duration * self.sample_rate:
            waveform = waveform[:self.desired_duration * self.sample_rate]

        # Compute MFCC
        mfcc = librosa.feature.mfcc(y=waveform, sr=self.sample_rate, n_mfcc=self.n_mfcc)

        # Compute Mel spectrogram
        mel_spectrogram = librosa.feature.melspectrogram(y=waveform, sr=self.sample_rate)

        # Compute Chroma CQT
        chroma_cqt = librosa.feature.chroma_cqt(y=waveform, sr=self.sample_rate)

        # Stack the features
        stacked_features = np.vstack([mfcc, mel_spectrogram, chroma_cqt])
        return torch.tensor(stacked_features)

## Define data directories

In [None]:
train_dir = 'data/train'
validation_dir = 'data/validate'
test_dir = 'data/test'

## Load datasets

In [None]:
desired_duration = 6  # Duration in seconds
train_dataset = AudioDataset(train_dir, desired_duration=desired_duration)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

validation_dataset = AudioDataset(validation_dir,desired_duration=desired_duration)
validation_loader = DataLoader(validation_dataset, batch_size=8, shuffle=False)

test_dataset = AudioDataset(test_dir, desired_duration=desired_duration)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

# Class SVM

In [None]:
from sklearn.svm import SVC

# Define SVM model
class SVM(nn.Module):
    def __init__(self):
        super(SVM, self).__init__()
        self.svm = SVC(kernel='rbf', gamma='scale')  # You can adjust kernel and other parameters here

    def forward(self, x):
        # SVM doesn't need forward pass as it's not a neural network
        pass

In [None]:
# Function to calculate the number of parameters in the model
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

# Train Function

In [None]:
def train_svm(model, train_loader):
    # Flatten features and labels for SVM
    X_train = []
    y_train = []
    for inputs, labels in train_loader:
        X_train.append(inputs.view(inputs.size(0), -1).numpy())
        y_train.extend(labels.numpy())

    X_train = np.concatenate(X_train)
    
    # Train the SVM model
    model.svm.fit(X_train, y_train)


# Optimization (optional)

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

# Define the parameter grid
param_grid = {
    'C': [0.1, 1, 10, 100],  # Regularization parameter
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],  # Kernel function
    'gamma': ['scale', 'auto', 0.1, 0.01, 0.001]  # Kernel coefficient for 'rbf', 'poly', and 'sigmoid' kernels
}

In [None]:
svm_model = SVC()

In [None]:
# Convert DataLoader to numpy arrays
X_train_all = []
y_train_all = []
for inputs, labels in train_loader:
    X_train_all.append(inputs.view(inputs.size(0), -1).numpy())
    y_train_all.extend(labels.numpy())

X_train_all = np.concatenate(X_train_all)

# Perform grid search with cross-validation
grid_search = GridSearchCV(estimator=svm_model, param_grid=param_grid, cv=5, scoring='accuracy', verbose=1)
grid_search.fit(X_train_all, y_train_all)

# Get the best parameters and best accuracy
best_params = grid_search.best_params_
best_accuracy = grid_search.best_score_

print("Best Parameters:", best_params)
print("Best Accuracy:", best_accuracy)

# Create the model

In [None]:
sample_rate = 44100
model = SVM()
print(model)

In [None]:
model_summary(model)

# Train the model

In [None]:
train_svm(model, train_loader)

# Evaluate the model

In [None]:
def validate_svm(model, val_loader):
    model.eval()
    correct = 0
    total = 0
    y_true = []
    y_pred = []

    for inputs, labels in val_loader:
        inputs_flat = inputs.view(inputs.size(0), -1).numpy()
        outputs = model.svm.predict(inputs_flat)
        y_pred.extend(outputs)
        y_true.extend(labels.numpy())
        total += labels.size(0)
        correct += (outputs == labels.numpy()).sum().item()

    accuracy = correct / total
    return accuracy, y_true, y_pred

In [None]:
val_accuracy, y_true_val, y_pred_val = validate_svm(model, validation_loader)
print("Validation Accuracy:", val_accuracy)

In [None]:
def test_svm(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    y_true = []
    y_pred = []

    for inputs, labels in test_loader:
        inputs_flat = inputs.view(inputs.size(0), -1).numpy()
        outputs = model.svm.predict(inputs_flat)
        y_pred.extend(outputs)
        y_true.extend(labels.numpy())
        total += labels.size(0)
        correct += (outputs == labels.numpy()).sum().item()

    accuracy = correct / total
    return accuracy, y_true, y_pred

In [None]:
test_accuracy, y_true_test, y_pred_test = test_svm(model, test_loader)
print("Test Accuracy:", test_accuracy)

In [None]:
conf_matrix = confusion_matrix(y_true_test, y_pred_test)

# Print the confusion matrix
print("Confusion Matrix:")
print(conf_matrix)

# Plot the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix (Test)')
plt.show()

In [None]:
# Get class names
class_names = test_dataset.classes

# Plot the confusion matrix with class names
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix (Test)')
plt.show()


In [None]:
from sklearn.metrics import classification_report

# Get true and predicted labels for the test set
test_accuracy, y_true_test, y_pred_test = test_svm(model, test_loader)

# Generate the classification report
report = classification_report(y_true_test, y_pred_test)

# Print the classification report
print("Classification Report:")
print(report)
