In [1]:
import numpy as np
import pandas as pd
import soundfile as sf
import matplotlib.pyplot as plt
import librosa
import os
from sklearn.model_selection import train_test_split
import tensorflow as tf
from keras import layers

In [2]:
data_train = pd.read_csv('dataset_train.csv')
data_train = data_train.sample(frac=1).reset_index(drop=True)
data_test = pd.read_csv('dataset_test.csv')
data_test = data_test.sample(frac=1).reset_index(drop=True)

data_test_folder = "./IRMAS-TestingData/" 

def getdParam():
    dList = []
    for filename in data_train['name']:
        path = os.path.join(data_test_folder, filename + ".wav")
        audio, sr = librosa.load(path)
        dList.append(librosa.get_duration(y=audio, sr=sr))
    return dList

dmin = 5.019773242630386 # La fonction prend 1min30 à s'executer, on a donc sauvegardé le résultat dans une variable
dmean = 16.886306332321844 
dmax = 20.0
len = 2874

def getSpectre():
    L = []

    for path in data_train['path']:
        audio, sr = librosa.load(path)
        spectre = librosa.feature.melspectrogram(y = audio, sr = sr, n_mels = 128, fmax = 8000)
        ptodb = librosa.power_to_db(spectre, ref=np.max)
        L.append(ptodb)
        
    Y = np.array(data_train.drop(['name', 'path'], axis=1))    
    list_spectres = np.array(L)
    np.save('list_spectres.npy', list_spectres)
    np.save('list_spectres_values.npy', Y)

#getSpectre() ##La fonction prend très longtemps à créer le fichier npy, on l'execute une fois et on enregistre le fichier

(6705, 128, 130) (6705, 12)


In [9]:
list_spectres = np.load('list_spectres.npy')
list_spectres_values = np.load('list_spectres_values.npy')
X = list_spectres
Y = list_spectres_values
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state=1, test_size=0.2)

print(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)

(5364, 128, 130) (1341, 128, 130) (5364, 12) (1341, 12)


In [33]:
model = tf.keras.Sequential([
    layers.Input(shape=(128, 130, 1)),  # Input layer with shape (128, 130, 1)
    layers.Conv2D(32, (3, 3), activation='relu'),  # Convolutional layer with 32 filters and 3x3 kernel
    layers.MaxPooling2D((2, 2)),  # Max pooling layer with 2x2 pool size
    layers.Conv2D(64, (3, 3), activation='relu'),  # Convolutional layer with 64 filters and 3x3 kernel
    layers.MaxPooling2D((2, 2)),  # Max pooling layer with 2x2 pool size
    layers.Flatten(),  # Flatten the 2D feature maps into a 1D array
    layers.Dense(64, activation='relu'),  # Dense layer with 64 units and ReLU activation
    layers.Dense(12, activation='sigmoid')  # Output layer with 12 units and softmax activation
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()

model.fit(X_train, Y_train, batch_size=32, epochs=1)

loss, accuracy = model.evaluate(X_test, Y_test)
print("Test loss:", loss)
print("Test accuracy:", accuracy)

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_8 (Conv2D)           (None, 126, 128, 32)      320       
                                                                 
 max_pooling2d_8 (MaxPooling  (None, 63, 64, 32)       0         
 2D)                                                             
                                                                 
 conv2d_9 (Conv2D)           (None, 61, 62, 64)        18496     
                                                                 
 max_pooling2d_9 (MaxPooling  (None, 30, 31, 64)       0         
 2D)                                                             
                                                                 
 flatten_4 (Flatten)         (None, 59520)             0         
                                                                 
 dense_8 (Dense)             (None, 64)               

In [39]:
# Assuming you have already trained and compiled your model

# Get a specific row from your dataset

a = np.random.randint(0, 1341)

input_row = X_test[a]  # Replace with your desired row
output_row = Y_test[a]  # Replace with your desired row

# Reshape the input row if needed
input_row = input_row.reshape(1, 128, 130, 1)  # Reshape for a single sample

# Perform the prediction
predictions = model.predict(input_row)

# Get the predicted class probabilities
class_probabilities = predictions[0]

# Get the predicted class index
predicted_class_index = tf.argmax(class_probabilities).numpy()

# Get the predicted class label
class_labels = ["cel", "cla", "flu", "gac", "gel", "org", "pia", "sax", "tru", "vio", "voi", "drum"]
predicted_class_label = class_labels[predicted_class_index]

# Print the predicted class label and probabilities
print(output_row)
print("Predicted Class:", predicted_class_label)
print("Class Probabilities:", class_probabilities)


[0 0 0 0 0 0 0 0 1 0 0 1]
Predicted Class: cel
Class Probabilities: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
