# RTF Model for predicting zone with Energie

Import

In [25]:
import os
import glob
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from scipy.io import wavfile
from scipy.fft import fft
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier

Readwav function

In [26]:
def readWavFolder(folderPath):
    fileFolder=[]
    sampleRateFolder=[]

    files = os.listdir(folderPath)
    for filename in glob.glob(os.path.join(folderPath, '*.wav')):
        samplerate, data = wavfile.read(filename)
        fileFolder.append(data)
        sampleRateFolder.append(samplerate)
    return sampleRateFolder, fileFolder, files
    
samplerateVect,testWavFileVect,filename = readWavFolder("../../Data_Clean/new_RB")


Spectrum

In [27]:
def spectrumFromWav(wavFile,sampleRate,chanel):
    spectrum = fft(wavFile[:, chanel])  # Compute the FFT for the first channel
    return abs(spectrum[:len(spectrum) // 2])  # Return the magnitude of the spectrum (half due to symmetry)


spectrumVect=[]
for i in range(len(testWavFileVect)):
    spectrum = spectrumFromWav(testWavFileVect[i], samplerateVect[i],0)
    freqs = np.fft.fftfreq(len(spectrum) * 2, d=1/samplerateVect[i])[:len(spectrum)]
    filtered_spectrum = spectrum[(freqs >= 150) & (freqs <= 1000)]
    spectrumVect.append(filtered_spectrum)


#for idx, spectrum in enumerate(spectrumVect[:3]):
    #plt.figure(figsize=(6, 3))
    #plt.plot(spectrum)
    #plt.title(f"Spectrum {idx + 1}")
    #plt.xlabel("Frequency Bin")
    #plt.ylabel("Magnitude")
    #plt.show()


Energy

In [28]:
# Fonction pour lire les fichiers d'un dossier donné
def lire_fichiers_dossier(dossier):
    if os.path.exists(dossier):  # Vérifier si le dossier existe
        return readWavFolder(dossier)
    else:
        print(f"Dossier introuvable : {dossier}")
        return [], [], []  # Retourne des listes vides si le dossier n'existe pas
    
def energy_per_frequency_band_from_spectrum(spectrum, freqs, band_width):
    # Calculate energy per band
    band_energies = []
    band_frequencies = []
    for start_freq in range(0, int(freqs[-1]), band_width):
        end_freq = start_freq + band_width
        band_indices = np.where((freqs >= start_freq) & (freqs < end_freq))[0]
        band_energy = np.sum(spectrum[band_indices]**2)  # Sum of squared magnitudes
        band_energies.append(band_energy)
        band_frequencies.append((start_freq + end_freq) / 2)  # Center frequency of the band

    return band_energies, band_frequencies


## Main

In [29]:
# Liste des types de raquettes et zones
racket_names = {"RB":1, "RO":2, "RR":3, "RV":4}
zone_names = {"C":1, "S":2, "V":3}

# Liste pour stocker les résultats
results = []
for chanel in range(1, 3):
    if chanel == 1:
        c = 0
    else:
        c = 1
    print("Chanel", c)

    for band_width in range(10, 101, 5):
        X_Hz = []
        X_Amplitude = []
        Y_Label = []
        print("band width :", band_width)

        # Lire les fichiers des raquettes
        for raquetteType in racket_names:
            dossier_raquette = f"../../Data_Clean/new_{raquetteType}"
            samplerateVect, WavFileVect, filesName = lire_fichiers_dossier(dossier_raquette)

            spectrumVect = []

            # Pour chaque fichier wav on extrait son spectre et on le filtre entre 150 et 1000hz et on prend les n meilleurs peaks
            for i in range(len(WavFileVect)):

                # On extrait la zone
                if "C" in filesName[i]:
                    zone = 'C'
                if 'S' in filesName[i]:
                    zone = 'S'
                if 'V' in filesName[i]:
                    zone = 'V'

                spectrum = spectrumFromWav(WavFileVect[i], samplerateVect[i], c)
                freqs = np.fft.fftfreq(len(spectrum) * 2, d=1 / samplerateVect[i])[:len(spectrum)]
                filtered_spectrum = spectrum[(freqs >= 150) & (freqs <= 1000)]
                spectrumVect.append(filtered_spectrum)  # Ajout dans spectrumVect

                # Utilisation du dernier élément ajouté
                band_energies, band_frequencies = energy_per_frequency_band_from_spectrum(filtered_spectrum, freqs[(freqs >= 150) & (freqs <= 1000)], band_width)

                X_Hz.append(band_frequencies)
                X_Amplitude.append(band_energies)

                Y_Label.append(zone)

        # On normalise les amplitudes
        X_Amplitude = [peak_values / np.max(peak_values) for peak_values in X_Amplitude]

        # Ensure all arrays in X_Hz and X_Amplitude have the same length
        max_length = max(max(len(peaks) for peaks in X_Hz), max(len(amps) for amps in X_Amplitude))
        X_Hz_padded = [np.pad(peaks, (0, max_length - len(peaks)), constant_values=0) for peaks in X_Hz]
        X_Amplitude_padded = [np.pad(amps, (0, max_length - len(amps)), constant_values=0) for amps in X_Amplitude]

        # Combine the frequencies and amplitudes into a single feature matrix
        X = np.hstack((np.array(X_Hz_padded), np.array(X_Amplitude_padded)))

        # Encode string labels into integers
        label_encoder = LabelEncoder()
        y = label_encoder.fit_transform(Y_Label)

        # Diviser les données en ensembles d'entraînement et de test
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)

        # Paramètres de l'algorithme Random Forest
        n_estimators_range = range(10, 101, 10)  # Nombre d'arbres entre 10 et 100
        max_depth_range = [None, 10, 20, 30, 40]  # Profondeurs différentes
        min_samples_split_range = [2, 5, 10]  # Nombre minimum pour diviser un nœud
        min_samples_leaf_range = [1, 2, 4]  # Nombre minimum d'échantillons dans une feuille
        max_features_range = ['sqrt', 'log2', None]  # Nombre de features par arbre

        # Tester toutes les combinaisons d'hyperparamètres
        for n_estimators in n_estimators_range:
            for max_depth in max_depth_range:
                for min_samples_split in min_samples_split_range:
                    # Créer et entraîner le modèle Random Forest
                    rf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth,
                                                min_samples_split=min_samples_split, random_state=42)
                    rf.fit(X_train, y_train)

                    # Évaluer sur l'ensemble de test
                    y_pred = rf.predict(X_test)
                    accuracy_test = accuracy_score(y_test, y_pred)

                    # Évaluer sur l'ensemble d'entraînement
                    y_train_pred = rf.predict(X_train)
                    accuracy_train = accuracy_score(y_train, y_train_pred)

                    # Switch case pour le canal
                    if c == 0:
                        channel_name = "Left"
                    elif c == 1:
                        channel_name = "Right"

                    # Ajouter le nom du canal aux résultats
                    results.append({
                        'Chanel_Name': channel_name,
                        'band_width': band_width,
                        'n_estimators': n_estimators,
                        'max_depth': max_depth,
                        'min_samples_split': min_samples_split,
                        'accuracy_train': accuracy_train,
                        'accuracy_test': accuracy_test
                    })

# Convertir les résultats en DataFrame
results_df = pd.DataFrame(results)

# Enregistrer les résultats dans un fichier Excel
results_df.to_excel("RTF_ZONE_P1_Energie.xlsx", index=False)

print("Results have been saved to 'RTF_ZONE_P1_Energie.xlsx'.")

Chanel 0
band width : 10
band width : 15
band width : 20
band width : 25
band width : 30
band width : 35
band width : 40
band width : 45
band width : 50
band width : 55
band width : 60
band width : 65
band width : 70
band width : 75
band width : 80
band width : 85
band width : 90
band width : 95
band width : 100
Chanel 1
band width : 10
band width : 15
band width : 20
band width : 25
band width : 30
band width : 35
band width : 40
band width : 45
band width : 50
band width : 55
band width : 60
band width : 65
band width : 70
band width : 75
band width : 80
band width : 85
band width : 90
band width : 95
band width : 100
Results have been saved to 'RTF_ZONE_P1_Energie.xlsx'.
