In [8]:
import os
import sys
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.utils import to_categorical

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

sys.path.append('../')
from vocal_patterns.ml_logic.preprocessor import preprocess_audio


In [23]:
## Model functions

def initialize_model(input_shape) -> Model:
    """
    Initialize the CNN model
    """
    model = Sequential()
    model.add(layers.Conv2D(8, (5,5), input_shape=input_shape, strides=(2, 2), padding='same', activation="relu"))
    model.add(layers.BatchNormalization())

    model.add(layers.Conv2D(16, (3,3), strides=(2, 2), padding='same', activation="relu"))
    model.add(layers.BatchNormalization())

    model.add(layers.Conv2D(32, (3,3), strides=(2, 2), padding='same', activation="relu"))
    model.add(layers.BatchNormalization())

    model.add(layers.Conv2D(64, (3,3), strides=(2, 2), padding='same', activation="relu"))
    model.add(layers.BatchNormalization())

    model.add(layers.Flatten())
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(3, activation='softmax'))

    print("✅ Model initialized")

    return model

def compile_model(model: Model, learning_rate=0.001) -> Model:
    """
    Compile the Neural Network
    """
    optimizer = optimizers.Adam(learning_rate=learning_rate)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    print("✅ Model compiled")

    return model


In [6]:
## Get the data

download_path = "../vocal_patterns/data/dataset_tags.csv"
data = pd.read_csv(download_path)
data.head()


Unnamed: 0,path,exercise,technique,filename
0,/Users/denis/code/ElsaGregoire/vocal_patterns/...,Other,vibrato,m6_row_vibrato.wav
1,/Users/denis/code/ElsaGregoire/vocal_patterns/...,Other,vibrato,m6_caro_vibrato.wav
2,/Users/denis/code/ElsaGregoire/vocal_patterns/...,Other,vibrato,m6_dona_vibrato.wav
3,/Users/denis/code/ElsaGregoire/vocal_patterns/...,Other,straight,m6_caro_straight.wav
4,/Users/denis/code/ElsaGregoire/vocal_patterns/...,Other,straight,m6_row_straight.wav


In [11]:
## Separate the data

X = data.drop(columns=['exercise', "technique", "filename"])
y = data[['exercise']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)


In [14]:
X_train_processed = preprocess_audio(X_train)
X_train_reshaped = X_train_processed.reshape(len(X_train_processed), 128, 259, 1)

X_test_processed = preprocess_audio(X_test)
X_test_reshaped = X_test_processed.reshape(len(X_test_processed), 128, 259, 1)


In [15]:
label_encoder = LabelEncoder()
y_train_labels = label_encoder.fit_transform(y_train)
y_train_cat = to_categorical(y_train_labels, num_classes=3)


  y = column_or_1d(y, warn=True)


In [16]:
label_encoder = LabelEncoder()
y_test_labels = label_encoder.fit_transform(y_test)
y_test_cat = to_categorical(y_test_labels, num_classes=3)


  y = column_or_1d(y, warn=True)


In [32]:
model = initialize_model((128, 259, 1))
model = compile_model(model)


✅ Model initialized
✅ Model compiled


In [33]:
model.fit(X_train_reshaped, y_train_cat, epochs=10, batch_size=32, validation_split=0.2)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x296c68be0>

In [34]:
model.evaluate(X_test_reshaped, y_test_cat)




[0.1357562392950058, 0.9538745284080505]