In [None]:
# Import standard libraries
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim

# Import sklearn libraries for model evaluation and data splitting
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier

# Import PyTorch utilities for data handling
from torch.utils.data import DataLoader, Dataset

# Import transformers for model and tokenizer
from transformers import AdamW, get_linear_schedule_with_warmup
from transformers import BertModel, BertTokenizer, CamembertModel, CamembertTokenizer

# Import custom modules
import load_data
from utils import *
from training_audio_model import *

# Set device to GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(torch.cuda.is_available())

# Load data
transcr_path='paco-cheese/transcr'
data=load_data.load_all_ipus(folder_path=transcr_path,load_words=True)

In [None]:
# Convert the 'data' variable into a pandas DataFrame
df = pd.DataFrame(data)

# Generate the target variable 'y'
y = create_y(df)

# Display
print(len(y))
print(len(data))
"""

In [None]:
# Define the path to the audio files
audio_files_path = 'paco-cheese/audio/2_channels/'

# Extract audio segments from the audio files based on the information in 'data'
audio_segments = extract_audio_segments(data, audio_files_path)

# Display
audio_segments

In [None]:
#Display
print(len(audio_segments))
print(len(y))

In [None]:
# For each segment, extract features and store the extracted features in a numpy array 'X'
X = np.array([extract_features(segment) for segment in audio_segments])

In [None]:
# Create an AudioDataset from the features 'X' and labels 'y'
dataset = AudioDataset(X, y)

# Calculate the size of the training set as 80% of the total dataset
train_size = int(0.8 * len(dataset))

# Set a seed for reproducibility
seed = 42

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

# Create AudioDataset objects for the training and test sets
train_dataset = AudioDataset(X_train, y_train)
test_dataset = AudioDataset(X_test, y_test)

# Create DataLoaders for these datasets
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Print the length of 'X' and 'y_test'
print(len(X))
print(len(y_test))

In [None]:
# Set the device to GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)

# Initialize the AudioCNN model and move it to the device
model = AudioCNN().to(device)

# Define the loss function as CrossEntropyLoss
criterion = nn.CrossEntropyLoss()

# Define the optimizer as Adam with a learning rate of 0.001
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Set the number of training epochs and class weights for the loss function
num_epochs = 10
class_weights = torch.tensor([1.0, 4.0], device=device)

# Define the loss function with class weights
loss_fn = nn.CrossEntropyLoss(weight=class_weights)

# Training loop
for epoch in range(num_epochs):
    # Set the model to training mode
    model.train()
    
    # Loop over each batch from the training set
    for inputs, labels in train_loader:
        # Move the inputs and labels to the device
        inputs, labels = inputs.to(device), labels.to(device)
       
        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass: compute the outputs by passing inputs to the model
        outputs = model(inputs)

        # Compute the loss
        loss = loss_fn(outputs, labels)

        # Backward pass: compute the gradient of the loss with respect to model parameters
        loss.backward()

        # Perform a single optimization step (parameter update)
        optimizer.step()

    # Evaluation
    # Set the model to evaluation mode
    model.eval()

    # Initialize counters
    total = 0
    correct = 0

    # Disable gradient computation
    with torch.no_grad():
        # Loop over each batch from the test set
        for inputs, labels in test_loader:
            # Move the inputs and labels to the device
            inputs, labels = inputs.to(device), labels.to(device)

            # Forward pass: compute the outputs by passing inputs to the model
            outputs = model(inputs)

            # Get the predicted class with the highest score
            _, predicted = torch.max(outputs.data, 1)

            # Update the counters
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    # Print the loss and accuracy for this epoch
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Accuracy: {100 * correct / total:.2f}%')

    # Save the model
    torch.save(model, 'modele/model_audio')

In [None]:
# Display
print(len(y))
print(len(y_test))

In [None]:
# Define a function to make predictions with the model
def predition_model_audio(model, dataset, device, proba=True):
    # Initialize lists to store predictions and labels
    all_preds = []
    all_labels = []
    
    # Disable gradient computation
    with torch.no_grad():
        # Loop over each batch from the dataset
        for inputs, labels in dataset:
            # Move the inputs and labels to the device
            inputs, labels = inputs.to(device), labels.to(device)

            # Compute the outputs by passing inputs to the model
            preds = model(inputs)
            
            # If 'proba' is False, get the predicted class with the highest score
            if not proba:
                _, preds = torch.max(preds.data, dim=1)
            
            # Append the predictions and labels to the respective lists
            all_preds.append(preds.cpu().numpy())
            all_labels.append(labels.cpu().numpy())

    # Concatenate all predictions and labels along the first axis
    all_preds = np.concatenate(all_preds, axis=0)
    all_labels = np.concatenate(all_labels, axis=0)
    
    # Return the predictions and labels
    return all_preds, all_labels

# Use the function to make predictions with the model on the test set
all_preds_audio, all_labels = predition_model_audio(model, test_loader, device, proba=False)

# Compute the F1 score and confusion matrix
f1 = f1_score(all_labels, all_preds_audio)
conf_matrix = confusion_matrix(all_labels, all_preds_audio)

# Print the F1 score and confusion matrix
print(f'Test F1 Score: {f1}')
print(f'Confusion Matrix:\n{conf_matrix}')

# Compute the total number of instances for each class and the number of correctly detected instances
total_class_0 = np.sum(conf_matrix[0])
total_class_1 = np.sum(conf_matrix[1])
detected_class_0 = conf_matrix[0, 0]  # True positives for class 0
detected_class_1 = conf_matrix[1, 1]  # True positives for class 1

# Print the number of correctly detected instances for each class
print(f'Nombre d\'éléments de classe 0 détectés : {detected_class_0} sur {total_class_0}')
print(f'Nombre d\'éléments de classe 1 détectés : {detected_class_1} sur {total_class_1}')