In [8]:
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import os
import re
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import datetime

In [7]:
# Dossier racine contenant les sous-dossiers par classe
DATA_DIR = '../data/'

# Mapping des noms de dossier vers un label numérique
class_names = {
    'car_horn': 0,
    'dog_bark': 1,
    'drilling': 2,
    'engine_idling': 3,
    'gun_shot': 4,
    'jackhammer': 5,
    'siren': 6
}

X, y = [], []

# Parcours de chaque dossier de classe
for class_folder, label in class_names.items():
    class_path = os.path.join(DATA_DIR, class_folder)
    if os.path.isdir(class_path):
        print(f"Traitement de la classe '{class_folder}'...")
        for filename in os.listdir(class_path):
            filepath = os.path.join(class_path, filename)
            try:
                y_audio, sr = librosa.load(filepath, sr=None)
                mfccs = librosa.feature.mfcc(y=y_audio, sr=sr, n_mfcc=20, fmax=4000)
                mfcc_mean = np.mean(mfccs, axis=1)
                mfcc_var = np.var(mfccs, axis=1)
                feature_vector = np.concatenate((mfcc_mean, mfcc_var))
                X.append(feature_vector)
                y.append(label)
            except Exception as e:
                print(f"Erreur avec {filename} : {e}")

X = np.array(X)
y = np.array(y)

# Division stratifiée pour conserver la proportion des classes
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Entraînement d'un classifieur Random Forest
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

# Prédictions et évaluation
y_pred = clf.predict(X_test)

print("🌲 Random Forest")
print("Accuracy:", accuracy)
print(report)

# Enregistrement dans un fichier
timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
report_dir = "reports"
os.makedirs(report_dir, exist_ok=True)
report_path = os.path.join(report_dir, f"random_forest_report_{timestamp}.txt")

with open(report_path, "w", encoding="utf-8") as f:
    f.write("🌲 Random Forest\n")
    f.write(f"Accuracy: {accuracy:.4f}\n")
    f.write(report)

print(f"\n📄 Rapport sauvegardé dans : {report_path}")


Traitement de la classe 'car_horn'...


  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Traitement de la classe 'dog_bark'...
Traitement de la classe 'drilling'...
Traitement de la classe 'engine_idling'...
Traitement de la classe 'gun_shot'...
Traitement de la classe 'jackhammer'...
Traitement de la classe 'siren'...
🌲 Random Forest
Accuracy: 0.9148936170212766
               precision    recall  f1-score   support

     car_horn       0.97      0.70      0.81        86
     dog_bark       0.93      0.87      0.90       200
     drilling       0.87      0.80      0.83       200
engine_idling       0.92      0.97      0.95       200
     gun_shot       0.87      0.96      0.91       245
   jackhammer       0.90      0.94      0.92       200
        siren       0.96      0.98      0.97       373

     accuracy                           0.91      1504
    macro avg       0.92      0.89      0.90      1504
 weighted avg       0.92      0.91      0.91      1504



In [12]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler
from tensorflow.keras import Input
from sklearn.utils.class_weight import compute_class_weight


# 🔹 Normalisation des features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 🔹 Encodage des labels en one-hot
y_train_cat = to_categorical(y_train, num_classes=7)
y_test_cat = to_categorical(y_test, num_classes=7)

# 🔹 Pondération automatique des classes
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
class_weights_dict = dict(enumerate(class_weights))

# 🔹 Modèle MLP avec architecture améliorée
mlp = Sequential([
    Input(shape=(40,)),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(7, activation='softmax')  # 7 classes
])

mlp.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

# 🔹 Entraînement avec class_weight
mlp.fit(
    X_train, y_train_cat,
    epochs=30,
    batch_size=32,
    validation_split=0.2,
    class_weight=class_weights_dict
)

# 🔹 Prédiction
y_pred_mlp = mlp.predict(X_test)
y_pred_mlp_class = tf.argmax(y_pred_mlp, axis=1)

# 🔹 Évaluation
print("\n🧠 Réseau de neurones (MLP) avec normalisation et class weight")
print("Accuracy :", accuracy_score(y_test, y_pred_mlp_class))
print(classification_report(
    y_test, y_pred_mlp_class,
    target_names=list(class_names.keys())
))

Epoch 1/30
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.3679 - loss: 1.6778 - val_accuracy: 0.6892 - val_loss: 0.9187
Epoch 2/30
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6480 - loss: 0.9596 - val_accuracy: 0.7492 - val_loss: 0.7213
Epoch 3/30
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7034 - loss: 0.8070 - val_accuracy: 0.7666 - val_loss: 0.6551
Epoch 4/30
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7291 - loss: 0.7256 - val_accuracy: 0.7950 - val_loss: 0.5785
Epoch 5/30
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7831 - loss: 0.6086 - val_accuracy: 0.8124 - val_loss: 0.5498
Epoch 6/30
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7793 - loss: 0.6241 - val_accuracy: 0.8212 - val_loss: 0.5230
Epoch 7/30
[1m115/115[0m 