In [1]:
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

In [2]:
def augment_audio(audio, sr):
    # Ajout de bruit
    noise = np.random.randn(len(audio)) * 0.005
    audio_with_noise = audio + noise
    
    # Modifier la hauteur en fréquence
    try:
        pitch_factor = np.random.uniform(-5, 5)  # En demi-tons
        audio_pitch_shifted = librosa.effects.pitch_shift(audio, sr=sr, n_steps=pitch_factor)
    except:
        audio_pitch_shifted = audio

    # Modifier la vitesse
    audio_stretched = librosa.effects.time_stretch(audio, rate=np.random.uniform(0.8, 1.2))
    
    return [audio_with_noise, audio_pitch_shifted, audio_stretched]


In [3]:
def extract_features_with_augmentation(file_path):
    try:
        audio, sr = librosa.load(file_path, duration=30)
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)

        # Ajouter des augmentations
        augmented_audios = augment_audio(audio, sr)
        augmented_mfccs = [librosa.feature.mfcc(y=a, sr=sr, n_mfcc=40) for a in augmented_audios]

        # Combiner les MFCCs originaux et augmentés
        all_mfccs = [np.mean(mfccs.T, axis=0)] + [np.mean(m.T, axis=0) for m in augmented_mfccs]
        return all_mfccs
    except Exception as e:
        print(f"Erreur lors du traitement du fichier {file_path}: {e}")
        return []


In [4]:
def load_audio_features_with_augmentation(data_path):
    genres = os.listdir(data_path)
    features, labels = [], []
    
    for genre in genres:
        genre_path = os.path.join(data_path, genre)
        for file_name in os.listdir(genre_path):
            file_path = os.path.join(genre_path, file_name)
            try:
                mfccs_list = extract_features_with_augmentation(file_path)
                for mfccs in mfccs_list:
                    features.append(mfccs)
                    labels.append(genre)
            except Exception as e:
                print(f"Erreur avec le fichier {file_path}: {e}")

    return np.array(features), np.array(labels)


In [None]:
# Chemin vers les données audio
data_path = "Data/genres_original"

# Charger les données avec augmentation
X, y = load_audio_features_with_augmentation(data_path)
print(f"X shape: {X.shape}")
print(f"y shape: {y.shape}")

# Encodage des labels
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)
y_categorical = to_categorical(y_encoded)

# Diviser les données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, test_size=0.2, random_state=42)


In [None]:
model = Sequential([
    Dense(512, activation='relu', input_shape=(40,)),
    BatchNormalization(),
    Dropout(0.3),
    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(len(encoder.classes_), activation='softmax')  # Nombre de genres
])


In [None]:
from tensorflow.keras.optimizers import Adam

model.compile(optimizer=Adam(learning_rate=0.0001), 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

model.summary()


In [None]:
# Entraîner le modèle
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

# Évaluer le modèle
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Précision sur l'ensemble de test : {accuracy * 100:.2f}%")


In [None]:
# Précision
plt.figure(figsize=(10, 5))
plt.plot(history.history['accuracy'], label='Précision (entraînement)')
plt.plot(history.history['val_accuracy'], label='Précision (validation)')
plt.title("Précision au cours des epochs")
plt.xlabel("Epochs")
plt.ylabel("Précision")
plt.legend()
plt.show()

# Perte
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Perte (entraînement)')
plt.plot(history.history['val_loss'], label='Perte (validation)')
plt.title("Perte au cours des epochs")
plt.xlabel("Epochs")
plt.ylabel("Perte")
plt.legend()
plt.show()


In [None]:
y_pred = np.argmax(model.predict(X_test), axis=-1)
y_true = np.argmax(y_test, axis=-1)

conf_matrix = confusion_matrix(y_true, y_pred)
ConfusionMatrixDisplay(conf_matrix, display_labels=encoder.classes_).plot(cmap='viridis')
plt.show()


In [None]:
def predict_genre(file_path):
    test_features = extract_features_with_augmentation(file_path)[0]  # Extraire les MFCCs
    test_features = np.expand_dims(test_features, axis=0)  # Ajouter une dimension
    predicted_genre_index = np.argmax(model.predict(test_features), axis=-1)
    return encoder.inverse_transform(predicted_genre_index)[0]

# Exemple de test
test_file = "Data/genres_original/rock/rock.00001.wav"  # Remplacez par un fichier audio
predicted_genre = predict_genre(test_file)
print(f"Le genre prédit pour {test_file} est : {predicted_genre}")
