## VGG16 Training
---

#### Contents:
- [Data Loading](#Data-Loading): Load the data from the proper directory.
- [Model Training](#Model-Training): Train the model using the loaded data.
  
#### Findings:


In [28]:
import torch
import numpy as np
from torch.utils.data import TensorDataset, DataLoader, Dataset
from torch import nn, optim
from torch.utils.data import DataLoader, Dataset, Subset
import torchaudio
import torchaudio.transforms as T
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
from tqdm.notebook import tqdm
import os
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, balanced_accuracy_score
import pandas as pd

In [29]:
TYPE_NUM_TO_EXTRACT = {
	'mfcc': None,
	'spectrogram': None,
	'mel_spectrogram': None,
 }

EXTRACTION_INTERVALS = [1]
RANDOM_SEED = 42
IMG_SHAPE = (80, 80)

PATH_TO_SAVE =  '../../features/raw/'

NAME = f'features_images_{EXTRACTION_INTERVALS[0]}_{IMG_SHAPE}_'
for key in TYPE_NUM_TO_EXTRACT.keys():
	NAME += f'{key}_'
NAME = NAME[:-1]
NAME = NAME + '.pt'

In [30]:
# Function to extract features from a specific layer
def extract_features(dataloader, model, layer_index=None):
    if layer_index is None:
        layer_index = len(model.features)

    layer = model.features[:layer_index + 1]
    layer.eval()
    
    features = []
    with torch.no_grad():
        for inputs, _ in tqdm(dataloader):
            outputs = layer(inputs)
            features.append(outputs.view(outputs.size(0), -1).cpu().numpy())
    return np.concatenate(features, axis=0)

def get_dataloaders_sampler(datasets, batch_size, sampler=None, shuffling=[True, False, False]):
    """
    Get the dataloaders for the training, validation, and test sets. Use this function with sampler = True to balance the data.

    Args:
    - datasets (torch.utils.data.dataset.TensorDataset): The dataset containing the features of the real and fake audio files.
    - batch_size (int): The batch size for the dataloaders.
    - sampler (bool): A boolean value indicating whether to use a sampler for data balancing.
    - shuffling (list, optional): A list of boolean values indicating whether to shuffle the data for each dataloader. The length of the list should be equal to the number of datasets.

    Returns:
    - dataloaders (list): A list containing the training, validation, and test dataloaders.
    """
    
    samplers = [None, None, None]
    
    # define a weighted random sampler to be used in the dataloader for data balancing
    if sampler:
        samplers = []
        for dataset in datasets:
            try:
                labels = dataset[:][1]
            except: 
                labels = dataset.labels.long()
            class_counts = torch.bincount(labels)
            class_weights = 1. / class_counts.float()
            weights = class_weights[labels]
            sampler = torch.utils.data.WeightedRandomSampler(weights, len(weights), replacement=True)
            samplers.append(sampler)
            shuffling = [False, False, False]
    
    
    dataloaders = []
    for i, dataset in enumerate(datasets):
        dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=shuffling[i], sampler=samplers[i])
        dataloaders.append(dataloader)
    
    return dataloaders

# Function to count the number of samples per class in an epoch
def count_samples_per_class(dataloader):
    class_counts = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0}
    for _, labels in dataloader:
        for label in labels:
            class_counts[label.item()] += 1
    return class_counts

In [31]:
# Define a custom Dataset
class MFCCDataset(Dataset):
    def __init__(self, mfcc_data, labels, transform=None):
        self.mfcc_data = mfcc_data
        self.labels = labels
        self.transform = transform
    
    def __len__(self):
        return len(self.mfcc_data)
    
    def __getitem__(self, idx):
        sample = self.mfcc_data[idx].numpy()
        sample = np.stack([sample]*3, axis=0)  # Expand to 3 channels
        sample = sample.transpose(1, 2, 0)  # Change to HWC format for PIL Image
        sample = Image.fromarray((sample * 255).astype('uint8'))  # Convert to PIL Image
        
        if self.transform:
            sample = self.transform(sample)
        
        return sample, self.labels[idx]

# Define the transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [32]:
# ---------- MODELS ----------
# Load the pre-trained VGG16_bn model
vgg16_bn = models.vgg16_bn(pretrained=True)
vgg16_bn.classifier = torch.nn.Identity()  # Remove the classifier to get features

metrics_dict = {
    'macrof1': f1_score,
    'accuracy': accuracy_score,
    'balanced_accuracy': balanced_accuracy_score
}

# ---------- STORING STRUCT ----------
models_dict = {
    "rf": RandomForestClassifier(random_state=RANDOM_SEED),
}

# df having features type as columns and metrics as rows
results_df = pd.DataFrame(columns=TYPE_NUM_TO_EXTRACT.keys(), index=metrics_dict.keys())

# for each model save the df
results_dict = { key: results_df.copy() for key in models_dict.keys() }



In [33]:
# Load the features
features = torch.load(PATH_TO_SAVE + NAME)

for feature_type in tqdm(TYPE_NUM_TO_EXTRACT.keys()): #x3

    # get the sets
    X_train, y_train = features[feature_type]['train']["X"], features[feature_type]['train']["y"]
    X_test, y_test = features[feature_type]['test']["X"], features[feature_type]['test']["y"]
    
    # create the datasets and balanced dataloaders
    train_dataset = MFCCDataset(X_train, y_train, transform=transform)
    train_dataloader = get_dataloaders_sampler([train_dataset], batch_size=32, sampler=True, shuffling=[True])[0]
    # print the number of samples per class
    print(f"{'_'*10} Train samples per class (interval {EXTRACTION_INTERVALS[0]}):\t {count_samples_per_class(train_dataloader)} {'_'*10}\n")
    test_dataset = MFCCDataset(X_test, y_test, transform=transform)
    test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)    
    
    print(f"Extracting neural features from {feature_type} features\n")
    train_features = extract_features(train_dataloader, vgg16_bn)
    test_features = extract_features(test_dataloader, vgg16_bn)
    
    # train the models
    for model_name, model in models_dict.items(): #x1
        print(f"Training {model_name} on {feature_type} features\n")
        model.fit(train_features, y_train)
        
        # evaluate the model
        for metric_name, metric in metrics_dict.items(): #x3
            y_pred = model.predict(test_features)
            if 'macro' in metric_name:
                score = metric(y_test, y_pred, average='macro')
            else:
                score = metric(y_test, y_pred)
                
            print(f"{model_name} {metric_name} on {feature_type} features: {score}\n")
        
            # save the results
            results_dict[model_name].loc[metric_name, feature_type] = score

  0%|          | 0/3 [00:00<?, ?it/s]

__________ Train samples per class (interval 1):	 {0: 770, 1: 800, 2: 807, 3: 767, 4: 760} __________

Extracting neural features from mfcc features

Training rf on mfcc features

rf macrof1 on mfcc features: 0.12402572221876222

rf accuracy on mfcc features: 0.4098360655737705

rf balanced_accuracy on mfcc features: 0.20096342787565552

__________ Train samples per class (interval 1):	 {0: 782, 1: 785, 2: 772, 3: 770, 4: 795} __________

Extracting neural features from spectrogram features

Training rf on spectrogram features

rf macrof1 on spectrogram features: 0.12274633405501115

rf accuracy on spectrogram features: 0.41290983606557374

rf balanced_accuracy on spectrogram features: 0.2021387682133359

__________ Train samples per class (interval 1):	 {0: 747, 1: 782, 2: 806, 3: 747, 4: 822} __________

Extracting neural features from mel_spectrogram features

Training rf on mel_spectrogram features

rf macrof1 on mel_spectrogram features: 0.12907804077879656

rf accuracy on mel_spe

In [34]:
# save the results_dict to a file
torch.save(results_dict, PATH_TO_SAVE + 'results_dict_VGG16.pt')