# Model lab

## Imports

In [2]:
from torch import device, cuda, no_grad, optim, max
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset

import matplotlib.pyplot as plt

import torch.nn as nn
import pandas as pd
import librosa
import numpy as np
import torch.nn.functional as F
import torch

* Sample Rate : 44100, 22050, 16000
* Taille de la fenêtre de transformation de Fourier : n_fft
* Décalage : hop_length

In [3]:
def audio_to_spectrogram(audio_file, sample_rate=22050, n_fft=2048, hop_length=512):
    y, sr = librosa.load(audio_file, sr=sample_rate)
    spectrogram = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length))
    return spectrogram

df = pd.read_csv('../data/data.csv')
df['path'] = df['path'].apply(lambda x: '../data/clips/' + x)
df['spectrogram'] = df['path'].apply(lambda x: audio_to_spectrogram(x))

### Padding

In [4]:
max_x = 0
max_y = 0

for spectrogram in df['spectrogram']:
    if spectrogram.shape[0] > max_x:
        max_x = spectrogram.shape[0]
    if spectrogram.shape[1] > max_y:
        max_y = spectrogram.shape[1]

df['spectrogram'] = df['spectrogram'].apply(lambda x: np.resize(x, (max_x, max_y)))

In [5]:
df['spectrogram'] = df['spectrogram'].apply(lambda x: torch.tensor(x))

### Split

In [6]:
class CustomDataset(Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        label = self.dataframe.iloc[idx]['sentence']
        data = self.dataframe.iloc[idx]['spectrogram']
        return label, data

In [7]:
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(df, train_size=0.8, random_state=42)

print("Taille de l'ensemble d'entraînement :", len(train_df))
print("Taille de l'ensemble de test :", len(test_df))


Taille de l'ensemble d'entraînement : 6921
Taille de l'ensemble de test : 1731


In [8]:
train_df = CustomDataset(train_df)
test_df = CustomDataset(test_df)

batch_size = 32

train_df = DataLoader(train_df, batch_size=batch_size, shuffle=True)
test_df = DataLoader(test_df, batch_size=batch_size)

## Model Architecture
* CNN
  

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 16, 5, 1, 2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(16, 32, 5, 1, 2),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.out = nn.Linear(32 * 7 * 7, 10)
        #self.dropout = nn.Dropout(0.5) # to avoid overfitting

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)
        #x = self.dropout(x)
        output = self.out(x)
        return output, x