# RTF Model for predicting zone with Peaks

Import

In [32]:
import os
import glob
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from scipy.io import wavfile
from scipy.fft import fft
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier

Readwav function

In [33]:
def readWavFolder(folderPath):
    fileFolder=[]
    sampleRateFolder=[]

    files = os.listdir(folderPath)
    for filename in glob.glob(os.path.join(folderPath, '*.wav')):
        samplerate, data = wavfile.read(filename)
        fileFolder.append(data)
        sampleRateFolder.append(samplerate)
    return sampleRateFolder, fileFolder, files
    
samplerateVect,testWavFileVect,filename = readWavFolder("../../Data_Clean/new_RB")


Spectrum

In [35]:
def spectrumFromWav(wavFile,sampleRate,chanel):
    spectrum = fft(wavFile[:, chanel])  # Compute the FFT for the first channel
    return abs(spectrum[:len(spectrum) // 2])  # Return the magnitude of the spectrum (half due to symmetry)


spectrumVect=[]
for i in range(len(testWavFileVect)):
    spectrum = spectrumFromWav(testWavFileVect[i], samplerateVect[i],0)
    freqs = np.fft.fftfreq(len(spectrum) * 2, d=1/samplerateVect[i])[:len(spectrum)]
    filtered_spectrum = spectrum[(freqs >= 150) & (freqs <= 1000)]
    spectrumVect.append(filtered_spectrum)


#for idx, spectrum in enumerate(spectrumVect[:3]):
    #plt.figure(figsize=(6, 3))
    #plt.plot(spectrum)
    #plt.title(f"Spectrum {idx + 1}")
    #plt.xlabel("Frequency Bin")
    #plt.ylabel("Magnitude")
    #plt.show()


Extract each peak and store

In [None]:
def extractNPeak(n_peak,signal):
    # Find peaks in the signal
    peaks = np.argsort(signal)[-n_peak:]  # Get indices of the n largest peaks
    peaks = np.sort(peaks)  # Sort the indices in ascending order

    # Extract the peak values
    peak_values = signal[peaks]

    return peaks, peak_values

extractNPeak(20,spectrumVect[0])

# Fonction pour lire les fichiers d'un dossier donné
def lire_fichiers_dossier(dossier):
    if os.path.exists(dossier):  # Vérifier si le dossier existe
        return readWavFolder(dossier)
    else:
        print(f"Dossier introuvable : {dossier}")
        return [], [], []  # Retourne des listes vides si le dossier n'existe pas


## Main

In [29]:
# Liste des types de raquettes et zones
racket_names = {"RB":1, "RO":2, "RR":3, "RV":4}
zone_names = {"C":1, "S":2, "V":3}

# Liste pour stocker les résultats
results = []
for chanel in range(1, 3):
    if chanel == 1:
        c = 0
    else:
        c = 1
    print("Chanel", c)

    for i in range(1, 21):
        n_peak = i * 5
        X_peaksHz = []
        X_peaksAmplitude = []
        Y_Label = []
        print("Nbr_peak", n_peak)

        # Lire les fichiers des raquettes
        for raquetteType in racket_names:
            dossier_raquette = f"../../Data_Clean/new_{raquetteType}"
            samplerateVect, WavFileVect, filesName = lire_fichiers_dossier(dossier_raquette)

            spectrumVect = []

            # Pour chaque fichier wav on extrait son spectre et on le filtre entre 150 et 1000hz et on prend les n meilleurs peaks
            for i in range(len(WavFileVect)):

                # On extrait la zone
                if "C" in filesName[i]:
                    zone = 'C'
                if 'S' in filesName[i]:
                    zone = 'S'
                if 'V' in filesName[i]:
                    zone = 'V'

                spectrum = spectrumFromWav(WavFileVect[i], samplerateVect[i], c)
                freqs = np.fft.fftfreq(len(spectrum) * 2, d=1 / samplerateVect[i])[:len(spectrum)]
                filtered_spectrum = spectrum[(freqs >= 150) & (freqs <= 1000)]
                spectrumVect.append(filtered_spectrum)  # Ajout dans spectrumVect

                # Utilisation du dernier élément ajouté
                peaks, peak_values = extractNPeak(n_peak, spectrumVect[-1])

                X_peaksHz.append(peaks)
                X_peaksAmplitude.append(peak_values)

                Y_Label.append(zone)

        # On normalise les amplitudes
        X_peaksAmplitude = [peak_values / np.max(peak_values) for peak_values in X_peaksAmplitude]

        # Ensure all arrays in X_peaksHz and X_peaksAmplitude have the same length
        max_length = max(max(len(peaks) for peaks in X_peaksHz), max(len(amps) for amps in X_peaksAmplitude))
        X_peaksHz_padded = [np.pad(peaks, (0, max_length - len(peaks)), constant_values=0) for peaks in X_peaksHz]
        X_peaksAmplitude_padded = [np.pad(amps, (0, max_length - len(amps)), constant_values=0) for amps in X_peaksAmplitude]

        # Combine the frequencies and amplitudes into a single feature matrix
        X = np.hstack((np.array(X_peaksHz_padded), np.array(X_peaksAmplitude_padded)))

        # Encode string labels into integers
        label_encoder = LabelEncoder()
        y = label_encoder.fit_transform(Y_Label)

        # Diviser les données en ensembles d'entraînement et de test
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=None, shuffle=True)

        # Paramètres de l'algorithme Random Forest
        n_estimators_range = range(10, 101, 10)  # Nombre d'arbres entre 10 et 100
        max_depth_range = [None, 10, 20, 30, 40]  # Profondeurs différentes
        min_samples_split_range = [2, 5, 10]  # Nombre minimum pour diviser un nœud
        min_samples_leaf_range = [1, 2, 4]  # Nombre minimum d'échantillons dans une feuille
        max_features_range = ['sqrt', 'log2', None]  # Nombre de features par arbre

        # Tester toutes les combinaisons d'hyperparamètres
        for n_estimators in n_estimators_range:
            for max_depth in max_depth_range:
                for min_samples_split in min_samples_split_range:
                    # Créer et entraîner le modèle Random Forest
                    rf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth,
                                                min_samples_split=min_samples_split, random_state=42)
                    rf.fit(X_train, y_train)

                    # Évaluer sur l'ensemble de test
                    y_pred = rf.predict(X_test)
                    accuracy_test = accuracy_score(y_test, y_pred)

                    # Évaluer sur l'ensemble d'entraînement
                    y_train_pred = rf.predict(X_train)
                    accuracy_train = accuracy_score(y_train, y_train_pred)

                    # Switch case pour le canal
                    if c == 0:
                        channel_name = "Left"
                    elif c == 1:
                        channel_name = "Right"

                    # Ajouter le nom du canal aux résultats
                    results.append({
                        'Chanel_Name': channel_name,
                        'nbr_de_peak': n_peak,
                        'n_estimators': n_estimators,
                        'max_depth': max_depth,
                        'min_samples_split': min_samples_split,
                        'accuracy_train': accuracy_train,
                        'accuracy_test': accuracy_test
                    })

# Convertir les résultats en DataFrame
results_df = pd.DataFrame(results)

# Enregistrer les résultats dans un fichier Excel
results_df.to_excel("RTF_ZONE_P1_Peaks.xlsx", index=False)

print("Results have been saved to 'RTF_ZONE_P1_Peaks.xlsx'.")

Chanel 0
Nbr_peak 5
Nbr_peak 10
Nbr_peak 15
Nbr_peak 20
Nbr_peak 25
Nbr_peak 30
Nbr_peak 35
Nbr_peak 40
Nbr_peak 45
Nbr_peak 50
Nbr_peak 55
Nbr_peak 60
Nbr_peak 65
Nbr_peak 70
Nbr_peak 75
Nbr_peak 80
Nbr_peak 85
Nbr_peak 90
Nbr_peak 95
Nbr_peak 100
Chanel 1
Nbr_peak 5
Nbr_peak 10
Nbr_peak 15
Nbr_peak 20
Nbr_peak 25
Nbr_peak 30
Nbr_peak 35
Nbr_peak 40
Nbr_peak 45
Nbr_peak 50
Nbr_peak 55
Nbr_peak 60
Nbr_peak 65
Nbr_peak 70
Nbr_peak 75
Nbr_peak 80
Nbr_peak 85
Nbr_peak 90
Nbr_peak 95
Nbr_peak 100
Results have been saved to 'RTF_ZONE_P1_Peaks.xlsx'.
