# RTF Model for predicting zone with MFCC

Les MFCC sont des coefficients qui représentent le contenu fréquentiel d’un son, mais d’une manière qui imite la perception humaine. Ils traduisent ce que l’oreille humaine perçoit comme étant important dans un son.

- réduisent une information spectrale complexe (spectre FFT) à un petit vecteur de valeurs pertinentes.
- robustes au bruit et compacts (souvent 12 ou 13 coefficients suffisent).

Import

In [10]:
import os
import glob
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from scipy.io import wavfile
from scipy.fft import fft
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from scipy.fftpack import dct

Readwav function

In [11]:
def readWavFolder(folderPath):
    fileFolder=[]
    sampleRateFolder=[]

    files = os.listdir(folderPath)
    for filename in glob.glob(os.path.join(folderPath, '*.wav')):
        samplerate, data = wavfile.read(filename)
        fileFolder.append(data)
        sampleRateFolder.append(samplerate)
    return sampleRateFolder, fileFolder, files
    
samplerateVect,testWavFileVect,filename = readWavFolder("../../Data_Clean/new_RB")


Spectrum

In [12]:
def spectrumFromWav(wavFile,sampleRate,chanel):
    spectrum = fft(wavFile[:, chanel])  # Compute the FFT for the first channel
    return abs(spectrum[:len(spectrum) // 2])  # Return the magnitude of the spectrum (half due to symmetry)


spectrumVect=[]
for i in range(len(testWavFileVect)):
    spectrum = spectrumFromWav(testWavFileVect[i], samplerateVect[i],0)
    freqs = np.fft.fftfreq(len(spectrum) * 2, d=1/samplerateVect[i])[:len(spectrum)]
    filtered_spectrum = spectrum[(freqs >= 150) & (freqs <= 1000)]
    spectrumVect.append(filtered_spectrum)

Extract each peak and store

In [13]:
# Fonction pour lire les fichiers d'un dossier donné
def lire_fichiers_dossier(dossier):
    if os.path.exists(dossier):  # Vérifier si le dossier existe
        return readWavFolder(dossier)
    else:
        print(f"Dossier introuvable : {dossier}")
        return [], [], []  # Retourne des listes vides si le dossier n'existe pas
    
    
def compute_mfcc(wav_file, sample_rate, n_mfcc=13, n_fft=2048, hop_length=512, n_mels=40):
    # Step 1: Compute the power spectrum
    window = np.hanning(n_fft)
    frames = [
        wav_file[i:i + n_fft] * window
        for i in range(0, len(wav_file) - n_fft, hop_length)
    ]
    power_spectrum = np.array([np.abs(np.fft.rfft(frame))**2 for frame in frames])

    # Step 2: Compute the Mel filterbank
    mel_filters = np.zeros((n_mels, n_fft // 2 + 1))
    mel_min = 0
    mel_max = 2595 * np.log10(1 + (sample_rate / 2) / 700)
    mel_points = np.linspace(mel_min, mel_max, n_mels + 2)
    hz_points = 700 * (10**(mel_points / 2595) - 1)
    bin_points = np.floor((n_fft + 1) * hz_points / sample_rate).astype(int)

    for i in range(1, n_mels + 1):
        mel_filters[i - 1, bin_points[i - 1]:bin_points[i]] = (
            np.linspace(0, 1, bin_points[i] - bin_points[i - 1])
        )
        mel_filters[i - 1, bin_points[i]:bin_points[i + 1]] = (
            np.linspace(1, 0, bin_points[i + 1] - bin_points[i])
        )

    # Step 3: Apply the Mel filterbank to the power spectrum
    mel_spectrum = np.dot(power_spectrum, mel_filters.T)

    # Step 4: Take the logarithm of the Mel spectrum
    log_mel_spectrum = np.log(mel_spectrum + 1e-10)

    # Step 5: Compute the DCT to get MFCCs
    mfccs = dct(log_mel_spectrum, type=2, axis=1, norm='ortho')[:, :n_mfcc]

    return mfccs

# Example usage
mfccs = compute_mfcc(testWavFileVect[0][:, 0], samplerateVect[0])
print(mfccs)


[[ 5.64521253e+01 -7.21524304e-01  3.88531590e+00 ...  2.79669999e+00
   2.58574356e+00  1.67550579e+00]
 [ 5.71527794e+01  1.77248239e-01  4.30096268e+00 ...  2.69131543e+00
   2.31459621e+00  2.08747072e+00]
 [ 5.69293317e+01 -5.87369181e-02  4.22449737e+00 ...  3.23037067e+00
   3.10002437e+00  2.22914470e+00]
 ...
 [ 6.83085125e+01  6.34760154e+00 -1.87120682e-01 ...  1.99815566e+00
   1.92868507e+00  1.07754660e+00]
 [ 5.78575427e+01 -3.18546488e-01  3.20406110e+00 ...  2.07595788e+00
   2.02895466e+00  2.19739496e+00]
 [ 5.76358941e+01 -2.76905681e-01  3.79439097e+00 ...  2.46331156e+00
   2.43247292e+00  2.38692214e+00]]


## Main

In [14]:
# Liste des types de raquettes et zones
racket_names = {"RB":1, "RO":2, "RR":3, "RV":4}
zone_names = {"C":1, "S":2, "V":3}

mfcc_range = [13, 20, 30]
n_fft_range = [256, 512]
hop_length_range = [128, 256]
n_mels_range = range(20, 21)

# Liste pour stocker les résultats
results = []

for n_mfcc in mfcc_range:
    
    for chanel in range(1, 3):
        if chanel == 1:
            c = 0
        else:
            c = 1
        print("Chanel", c)

        X_Amplitude = []
        Y_Label = []
            
        # Lire les fichiers des raquettes
        for raquetteType in racket_names:
            dossier_raquette = f"../../Data_Clean/new_{raquetteType}"
            samplerateVect, WavFileVect, filesName = lire_fichiers_dossier(dossier_raquette)

            spectrumVect = []

            # Pour chaque fichier wav on extrait son spectre et on le filtre entre 150 et 1000hz et on prend les n meilleurs peaks
            for i in range(len(WavFileVect)):

                # On extrait la zone
                if "C" in filesName[i]:
                    zone = 'C'
                if 'S' in filesName[i]:
                    zone = 'S'
                if 'V' in filesName[i]:
                    zone = 'V'

                spectrum = spectrumFromWav(WavFileVect[i], samplerateVect[i], c)
                freqs = np.fft.fftfreq(len(spectrum) * 2, d=1 / samplerateVect[i])[:len(spectrum)]
                filtered_spectrum = spectrum[(freqs >= 150) & (freqs <= 1000)]
                spectrumVect.append(filtered_spectrum)  # Ajout dans spectrumVect

                for n_fft in n_fft_range:
                    for hop_length in hop_length_range:
                        for n_mels in n_mels_range:
                            mfccs = compute_mfcc(WavFileVect[i][:, 0], samplerateVect[i], n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)

                            min_mfcc_length = min(mfcc.shape[0] for mfcc in mfccs)
                            mfccs = mfccs[:min_mfcc_length]

                            X_Amplitude.append(mfccs)
                            Y_Label.append(zone)

        # Encode string labels into integers
        label_encoder = LabelEncoder()
        y = label_encoder.fit_transform(Y_Label)

        # Diviser les données en ensembles d'entraînement et de test
        X_train, X_test, y_train, y_test = train_test_split(X_Amplitude, y, test_size=0.2, random_state=42, shuffle=True)
        X_train = np.array(X_train)
        X_test = np.array(X_test)

        X_train = X_train.reshape(X_train.shape[0], -1)  # Flatten if necessary
        X_test = X_test.reshape(X_test.shape[0], -1)    # Flatten if necessary6

        # Paramètres de l'algorithme Random Forest
        n_estimators_range = range(10, 101, 10)  # Nombre d'arbres entre 10 et 100
        max_depth_range = [None, 10, 20, 30, 40]  # Profondeurs différentes
        min_samples_split_range = [2, 5, 10]  # Nombre minimum pour diviser un nœud
        min_samples_leaf_range = [1, 2, 4]  # Nombre minimum d'échantillons dans une feuille
        max_features_range = ['sqrt', 'log2', None]  # Nombre de features par arbre

        # Tester toutes les combinaisons d'hyperparamètres
        for n_estimators in n_estimators_range:
            for max_depth in max_depth_range:
                for min_samples_split in min_samples_split_range:
                    # Créer et entraîner le modèle Random Forest
                    rf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth,
                                                min_samples_split=min_samples_split, random_state=42)
                    rf.fit(X_train, y_train)

                    # Évaluer sur l'ensemble de test
                    y_pred = rf.predict(X_test)
                    accuracy_test = accuracy_score(y_test, y_pred)

                    # Évaluer sur l'ensemble d'entraînement
                    y_train_pred = rf.predict(X_train)
                    accuracy_train = accuracy_score(y_train, y_train_pred)

                    # Switch case pour le canal
                    if c == 0:
                        channel_name = "Left"
                    elif c == 1:
                        channel_name = "Right"

                    # Ajouter le nom du canal aux résultats
                    results.append({
                        'Chanel_Name': channel_name,
                        'n_mfcc': n_mfcc,
                        'n_fft': n_fft,
                        'hop_length': hop_length,
                        'n_mels': n_mels,
                        'n_estimators': n_estimators,
                        'max_depth': max_depth,
                        'min_samples_split': min_samples_split,
                        'accuracy_train': accuracy_train,
                        'accuracy_test': accuracy_test
                    })

# Convertir les résultats en DataFrame
results_df = pd.DataFrame(results)

# Enregistrer les résultats dans un fichier Excel
results_df.to_excel("RTF_ZONE_P1_MFCC.xlsx", index=False)

print("Results have been saved to 'RTF_ZONE_P1_MFCC.xlsx'.")

Chanel 0
Chanel 1
Chanel 0
Chanel 1
Chanel 0
Chanel 1
Results have been saved to 'RTF_ZONE_P1_MFCC.xlsx'.
