# DataBass Project

Get the notes of the bass line from an audio file with multiple instruments

In [13]:
import os
import sys
import pathlib

# Récupérer WORKING_DIR
WORKING_DIR = os.getenv("WORKING_DIR")
sys.path.append(WORKING_DIR)

import librosa
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

from dotenv import load_dotenv

import pickle

import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.callbacks import EarlyStopping

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.preprocessing import LabelEncoder

from databass.utils.get_note_baseline import get_pic_frequency, get_note, plot_fft
from databass.preprocess.filter import frequencies_filter, plot_filtered_vs_original
from databass.preprocess.spectrograms import generate_mel_spectrogram, plot_mel_spectrogram
from databass.models.conv2D import conv2D_predict_note

from databass.utils import bass_notes

from databass.models.conv2D import create_model, preprocess_for_conv2D

from databass.extract.bass_extract import extract_bass_list


In [None]:
%load_ext autoreload
%autoreload 2

We use the librosa library, which contains useful tools to load and process audio files.

First, lets extract the waveform and sample rate of one audio file :

In [None]:
filename = path_DataBass + '/raw_data/Bass monophon/Samples/Chorus/B11-28100-3311-00625.wav'
y, sr = librosa.load(filename)

The waveform (y) corresponds to the amplitude of the sound signal over time (well, over frames here). The sampling rate (sr) corresponds to how many times the signal was recorded per second (how many frames per second).

In [None]:
y

In [None]:
plt.plot(y)

In [None]:
plt.plot(y[10000:15000])

In [None]:
test = librosa.pyin(y, fmin=20, fmax=400)
test

In [None]:
df_notes = pd.read_csv(path_DataBass + '/notebooks/lou/table_correspondance_notes_basse.csv')
print(df_notes)

In [None]:
pic_frequency, magnitude, frequencies = get_pic_frequency(y, sr)

In [None]:
plot_fft(magnitude, frequencies)

In [None]:
get_note(pic_frequency, df_notes)

In [None]:
y_filtered = frequencies_filter(y, sr)

plot_filtered_vs_original(y, y_filtered)

In [None]:
print(sr)

In [None]:

mel_spec = generate_mel_spectrogram(y, sr, normalize=False, target_shape=(128,128))
print("Shape du Mel-spectrogramme:", mel_spec.shape)  # (128, time_steps)


In [None]:
print(mel_spec)

In [None]:
plot_mel_spectrogram(mel_spec, sr, y=y, target_shape=(128, 128))

In [None]:
df_targets = pd.read_csv(path_DataBass + '/data/preprocessed/chorus_bass_list.csv')
df_targets.head()

In [None]:
label1 = 'B31-28100-3312-01561.wav'
label1_name = label1.replace('.wav', '')
print(label1_name)
df_targets[df_targets['fileID'] == label1_name]['note_name'].values[0]

In [None]:
audio_dir = path_DataBass + '/raw_data/Bass monophon/Samples/Chorus'

X = []
y = []
labels = []

for label in os.listdir(audio_dir):

    # get the audio file
    y_audio, sr = librosa.load(audio_dir + '/' + label, sr=None)

    # create mel_spectrogram
    mel_spec = generate_mel_spectrogram(y_audio, sr, target_shape=(128,128),
                                        duration=2.0, normalize='minmax')

    # get the target
    label_name = label.replace('.wav', '')
    target = df_targets[df_targets['fileID'] == label_name]['note_name'].values[0]

    # add to lists
    X.append(mel_spec)
    y.append(target)
    labels.append(label)

X = np.array(X)


In [None]:
# Optionnel : voir la correspondance classe -> entier
#print("Classes (note -> id):")
#for note, idx in zip(le.classes_, range(len(le.classes_))):
#    print(f"{note} -> {idx}")

In [None]:
X_train, X_test, X_val, y_train, y_test, y_val, le = preprocess_for_conv2D(X, y)

In [None]:
print(len(labels))
print(X.shape)
print(len(y))
print(y)
print(len(np.unique(y)))

In [None]:
# Créer le modèle
input_shape = X_train.shape[1:]  # (128, 128, 1)
num_classes = len(np.unique(y))
model = create_model(input_shape, num_classes)

# Afficher le modèle
model.summary()

In [None]:
es = EarlyStopping(
    monitor='val_loss',       # Metrique à surveiller (perte sur le jeu de validation)
    patience=5,               # Nombre d'époques sans amélioration avant l'arrêt
    restore_best_weights=True, # Restaure les poids du modèle à l'époque avec la meilleure performance
    verbose=1                 # Affiche un message quand l'arrêt précoce est déclenché
)

# Entraînement
history = model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=32,
    callbacks=[es],
    validation_data=(X_val
                     , y_val),
    verbose=1
)

In [None]:
model.save('modele_lou_10-12-25_17h33.keras')

In [None]:
new_file = 'raw_data/Bass monophon/Samples/Distortion/B11-28100-4411-06241.wav'

In [None]:
X_train.shape

In [None]:
from tensorflow.keras.models import load_model

# Charger le modèle
model = load_model(path_DataBass + '/notebooks/lou/modele_lou_10-12-25_11h.keras')

print("Conv2D model loaded successfully.")

# load exemple .wav
exemple_wav = os.path.join(
    path_DataBass,
    'raw_data',
    'Bass monophon',
    'Samples',
    'Chorus',
    'B11-28100-3311-00625.wav')

y, sr = librosa.load(exemple_wav)

s = generate_mel_spectrogram(y, sr, normalize='minmax')

print(s.shape)

s = np.expand_dims(s, axis=-1)
s = np.expand_dims(s, axis=0)

print(f'Preprocessed shape: {s.shape}')

result = model.predict(
    s
)
predicted_classes = np.argmax(result, axis=1)

print("Indices des classes prédites :", predicted_classes)

# Récupérer les notes originales
predicted_notes = le.inverse_transform(predicted_classes)
print("Notes prédites :", predicted_notes)

In [None]:


note = conv2D_predict_note(s, model, le)
note

In [None]:

# Sauvegarder le LabelEncoder dans un fichier
with open('label_encoder.pkl', 'wb') as f:
    pickle.dump(le, f)

In [None]:
sample_list = pd.read_csv(path_DataBass + '/data/preprocessed/bass_list.csv')
len(sample_list)

In [None]:

# Récupérer WORKING_DIR
WORKING_DIR = os.getenv("WORKING_DIR")

# Dossier de sortie pour les spectrogrammes
output_dir = WORKING_DIR + "/spectrograms"
os.makedirs(output_dir, exist_ok=True)


In [None]:
np.save(output_path, mel_spec)

In [None]:
# Go through the dataframe
for index, row in sample_list.iterrows():

    audio_path = row['file_path']

    # check that the file exists
    if not os.path.exists(audio_path):
        print(f"⚠️ Fichier introuvable : {audio_path}")
        continue

    # create the Mel-spectrogramme
    try:
        y, sr = librosa.load(audio_path)
        mel_spec = generate_mel_spectrogram(y, sr, normalize='minmax',
                                            target_shape=(128,128))
    except Exception as e:
        print(f"❌ Erreur lors du traitement de {audio_path}: {e}")
        continue

    # output folder for each note
    note_dir = os.path.join(output_dir, row['note_name'])
    os.makedirs(note_dir, exist_ok=True)

    # output file name (.npy)
    output_filename = f"{row['fileID']}.npy"
    output_path = os.path.join(note_dir, output_filename)

    # Save the spectrogram .npy
    np.save(output_path, mel_spec)