In [89]:
import librosa
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

ModuleNotFoundError: No module named 'keras.wrappers'

In [23]:
# Function to extract features from an audio file
def extract_features(file_path):
    y, sr = librosa.load(file_path)
    
    # Extract features
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
    rms = np.mean(librosa.feature.rms(y=y))
    chromagram = np.mean(librosa.feature.chroma_stft(y=y, sr=sr).T, axis=0)
    mel_spectrogram = np.mean(librosa.feature.melspectrogram(y=y, sr=sr).T, axis=0)
    spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
    spectral_contrast = np.mean(librosa.feature.spectral_contrast(y=y, sr=sr).T, axis=0)
    spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
    zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y))
    harmonics = np.mean(librosa.effects.harmonic(y))
    mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr).T, axis=0)
    
    return [tempo, rms, *chromagram, *mel_spectrogram, spectral_centroid, *spectral_contrast, spectral_rolloff, zero_crossing_rate, harmonics, *mfccs]

In [24]:
dataset_directory = '/home/jovyan/teaching_material/MScProject/audio'
audio_files = [dataset_directory+'/'+f for f in os.listdir(dataset_directory) if f.endswith('.mp3')]
labels = [int(f.split('/audio/')[1].split('_')[0].replace('Q', '')) for f in audio_files]

In [25]:
# Extract features for each audio file
features = [extract_features(file) for file in audio_files]

In [28]:
# Create a DataFrame for the features
columns = (['tempo', 'rms'] +
           [f'chromagram_{i}' for i in range(12)] +
           [f'mel_spectrogram_{i}' for i in range(128)] +
           ['spectral_centroid'] +
           [f'spectral_contrast_{i}' for i in range(7)] +
           ['spectral_rolloff', 'zero_crossing_rate', 'harmonics'] +
           [f'mfcc_{i}' for i in range(20)])

In [31]:
df_features = pd.DataFrame(features, columns=columns)

In [33]:
# Save the features to a CSV file
df_features.to_csv('/home/jovyan/teaching_material/MScProject/audio_features.csv', index=False)

Features extracted and saved to 'audio_features.csv'


In [47]:
y = np.array(labels)

In [49]:
# Perform Min-Max Scaling
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(df_features)

In [53]:
# Perform PCA for dimensionality reduction
pca = PCA(n_components=50)  # Adjust the number of components as needed
X_pca = pca.fit_transform(X_scaled)

In [54]:
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)

In [55]:
# Convert labels to categorical
num_classes = len(np.unique(y))
y_train_categorical = to_categorical(y_train - 1, num_classes=num_classes)  # Subtracting 1 to make labels 0-indexed
y_test_categorical = to_categorical(y_test - 1, num_classes=num_classes)

In [58]:
# Reshape data for CNN input (samples, timesteps, features)
X_train_cnn = np.expand_dims(X_train, axis=2)
X_test_cnn = np.expand_dims(X_test, axis=2)

In [62]:
# Build the CNN model
def create_model(optimizer='adam', init_mode='uniform'):
    model = Sequential()
    model.add(Conv1D(128, kernel_size=3, activation='relu', input_shape=(X_train_cnn.shape[1], 1)))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(0.5))

    model.add(Conv1D(256, kernel_size=3, activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(0.5))

    model.add(Conv1D(512, kernel_size=3, activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(0.5))

    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))
    
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model


In [63]:
# Compile the model
# model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model = KerasClassifier(build_fn=create_model, verbose=0)
optimizers = ['adam', 'rmsprop']
init_modes = ['uniform', 'normal']
epochs = [50, 100]
batches = [32, 64]
param_grid = dict(optimizer=optimizers, epochs=epochs, batch_size=batches, init_mode=init_modes)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train_cnn, y_train_categorical)

# Best parameters
print(f"Best: {grid_result.best_score_} using {grid_result.best_params_}")

In [64]:
# Train the model
# model.fit(X_train_cnn, y_train_categorical, epochs=100, batch_size=64, validation_data=(X_test_cnn, y_test_categorical))

# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test_cnn, y_test_categorical)
print(f'Test Accuracy: {accuracy * 100:.2f}%')

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x7f556a423df0>

In [65]:
# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test_cnn, y_test_categorical)
print(f'Test Accuracy: {accuracy * 100:.2f}%')

Test Accuracy: 65.09%
