In [2]:
import librosa
import numpy as np
import pandas as pd
import joblib
import os
import sys
import scipy.ndimage

In [3]:
MODEL_PATH = 'model-svm.joblib'
ENCODER_PATH = 'encoder.joblib'
SEGMENT_DURATION_SEC = 0.1
INPUT_FILE = '../datasets/1.mp3'
OUTPUT_CSV_NAME = 'output.csv'

In [16]:
def featurize():
    try:
        y, sr = librosa.load(INPUT_FILE)
        file_duration = librosa.get_duration(y=y, sr=sr)
        hop_length = int(SEGMENT_DURATION_SEC * sr)

        y_harm = librosa.effects.harmonic(y=y, margin=8)
        chroma_harm = librosa.feature.chroma_cqt(y=y_harm, sr=sr, hop_length=hop_length)
        chroma_filter = np.minimum(
            chroma_harm,
            librosa.decompose.nn_filter(chroma_harm, aggregate=np.median)
        )
        chroma_smooth = scipy.ndimage.median_filter(chroma_filter, size=(1, 9))
        chroma = chroma_smooth

        features = chroma.T

        num_segments = features.shape[0]
        start_times = np.arange(0, num_segments) * SEGMENT_DURATION_SEC
        end_times = np.minimum(start_times + SEGMENT_DURATION_SEC, file_duration)

        return features, start_times, end_times
        
    except Exception as e:
        print(f"featurize() -> Error processing audio '{INPUT_FILE}': {e}")
        return np.array([]), np.array([]), np.array([]) 

In [18]:
def merge_chords(predictions):
    merge_data = []
    current_chord = None
    current_start_time = None
    current_end_time = None

    for index, row in prediction.iterrows():
        chord = row['chords']
        start_time = row['starts']
        end_time = row['ends']

        if current_chord is None:
            current_chord = chord
            current_start_time = start_time
            current_end_time = end_time
        elif chord == current_chord:
            current_end_time = end_time
        else:
            merge_data.append({
                'start': current_start_time,
                'end': current_end_time,
                'chord': current_chord,
            })
            
    if current_chord is not None:
        merge_data.append({
            'start': current_start_time,
            'end': current_end_time,
            'chord': current_chord,
        })

    return pd.DataFrame(merge_data)

In [19]:
def predict():
    if not os.path.exists(INPUT_FILE):
        print(f"predict() -> File not found {INPUT_FILE}")
        sys.exit(1)

    try:
        model = joblib.load(MODEL_PATH)
        encoder = joblib.load(ENCODER_PATH)
    except FileNotFoundError:
        print(f"predict() -> File not found {MODEL_PATH}  or  {ENCODER_PATH}")
        sys.exit(1)
    except Exeception as e:
        print(f"predict() -> Error {e}")

    features, starts, ends = featurize()
    raw_predictions_df = pd.DataFrame({
        "starts": starts,
        "ends": ends,
    })

    predictions = model.predict(feature)
    predictions_decoded = encoder.inverse_transform(predictions)
    raw_predictions_df["chords"] = predictions_decoded
    final_predictions_df = merge_chords(raw_predictions_df)

    

In [14]:
# predict()
featurize()

(array([[0.20600197, 0.0627564 , 0.08255352, ..., 0.03954926, 0.01822974,
         0.05632551],
        [0.20600197, 0.0627564 , 0.08255352, ..., 0.03954926, 0.01822974,
         0.05632551],
        [0.20600197, 0.0627564 , 0.08255352, ..., 0.03954926, 0.01822974,
         0.05632551],
        ...,
        [1.        , 0.11088652, 0.14574105, ..., 0.13633344, 0.25692022,
         0.12828174],
        [1.        , 0.11088652, 0.13884585, ..., 0.13633344, 0.27261382,
         0.12828174],
        [1.        , 0.11088652, 0.13884585, ..., 0.13633344, 0.25692022,
         0.12828174]], shape=(126, 12), dtype=float32),
 array([ 0. ,  0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9,  1. ,
         1.1,  1.2,  1.3,  1.4,  1.5,  1.6,  1.7,  1.8,  1.9,  2. ,  2.1,
         2.2,  2.3,  2.4,  2.5,  2.6,  2.7,  2.8,  2.9,  3. ,  3.1,  3.2,
         3.3,  3.4,  3.5,  3.6,  3.7,  3.8,  3.9,  4. ,  4.1,  4.2,  4.3,
         4.4,  4.5,  4.6,  4.7,  4.8,  4.9,  5. ,  5.1,  5.2,  5.3,  5.4,
       