In [1]:
import pathlib
import pickle

import sklearn.metrics
from sklearn.neighbors import KNeighborsClassifier

from components.common import Config
from components.predictor import SklearnPredictor

sklearn_config = Config(model=KNeighborsClassifier(),
                model_filename_path = "../models",
                data_path='../data',
                dataset_split_path= "../split",
                dataset_name="mtat-20")
sklearn_predictor = SklearnPredictor(sklearn_config, model_filename="mtat-20/model.bin")

In [2]:
from components.predictor import Predictor
from external.musicnn import Musicnn

musicnn_config = Config(model=Musicnn(n_class=20),
                        model_filename_path = "../models",
                        data_path='../data',
                        dataset_split_path= "../split",
                        dataset_name="mtat-20")
musicnn_predictor = Predictor(musicnn_config, model_filename="mtat-20/2023-03-27-11-49-27.pth")

In [3]:
import numpy as np

test_files = np.load("../split/mtat-20/test.npy", allow_pickle=True)
test_files

array([[13782, '2/indidginus-seismic-03-master_of_masters-349-378.mp3'],
       [31415,
        '7/american_bach_soloists-j_s__bach__mass_in_b_minor_cd2-07-et_in_spiritum_sanctum_dominum-262-291.mp3'],
       [5814,
        '0/jeffrey_luck_lucas-what_we_whisper-01-you_knew_it_well-175-204.mp3'],
       ...,
       [47511, '0/apa_ya-apa_ya-12-african_wedding_song-146-175.mp3'],
       [35566,
        'f/kenji_williams-faces_of_epiphany-08-free_energy-88-117.mp3'],
       [30965, '3/jag-four_strings-07-country_romp-30-59.mp3']],
      dtype=object)

In [4]:
from components.preprocessor import PreProcessor

preprocessor = PreProcessor(input_path="../data/mtat/mp3", output_path="../data/mtat/npy", suffix="npy", sr=16000)
preprocessor.run(test_files[:, 1])

100%|██████████| 1972/1972 [00:08<00:00, 233.21it/s]


In [5]:
from components.preprocessor import OpenL3PreProcessor

preprocessor = OpenL3PreProcessor(input_path="../data/mtat/mp3", output_path="../data/mtat/emb", suffix="npy", sr=16000)
preprocessor.run(test_files[:, 1])

100%|██████████| 1972/1972 [00:08<00:00, 231.46it/s]


In [6]:
binary = {row[0]: row[1:] for row in np.load("../split/mtat-20/binary.npy", allow_pickle=True)}
binary

{2: array([0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0]),
 6: array([0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0]),
 10: array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]),
 11: array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]),
 12: array([0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]),
 14: array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 21: array([0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 23: array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 29: array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]),
 32: array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 33: array([0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 37: array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]),
 38: array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]),
 39: array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0

In [18]:
import os
import pickle

file = os.path.join("../data/mtat/emb/", pathlib.Path(test_files[0][1]).with_suffix(f".npy"))
data = np.load(file, allow_pickle=True)

model = pickle.load(open("../models/KNeighborsClassifier/mtat-20/model.bin", "rb"))
y_pred = model.predict_proba(data.reshape(1, -1))
y_pred = np.transpose([pred[:, 1] for pred in y_pred])
y_pred

array([[0. , 0. , 0. , 0.8, 0. , 0.4, 0.2, 0.2, 0.8, 0. , 0. , 0.4, 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. ]])

In [19]:
sklearn_predictor.predict_data_prob(data)

array([[0. , 0. , 0. , 0.8, 0. , 0.4, 0.2, 0.2, 0.8, 0. , 0. , 0.4, 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. ]])

In [33]:
from tqdm import tqdm
import os
import pathlib

Y_pred = []
Y_true = []
for idx, file in tqdm(test_files):
    file = os.path.join("../data/mtat/npy/", pathlib.Path(file).with_suffix(f".npy"))
    prediction = musicnn_predictor.predict_data(np.load(file, allow_pickle=True))
    Y_pred.append(prediction.mean(0).reshape(1, -1))
    Y_true.append(binary[idx])
Y_pred = np.array(Y_pred)
Y_pred = Y_pred.reshape(Y_pred.shape[0], Y_pred.shape[2])
Y_true = np.array(Y_true)

100%|██████████| 1972/1972 [02:50<00:00, 11.56it/s]


In [34]:
Y_true

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 1, 0, ..., 1, 1, 0],
       [0, 0, 1, ..., 0, 1, 1],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0]])

In [43]:
Y_pred

array([[0.21950147, 0.10293862, 0.18517861, ..., 0.0233083 , 0.05200598,
        0.0555045 ],
       [0.19021389, 0.14832151, 0.1842592 , ..., 0.01733403, 0.05703202,
        0.04459751],
       [0.2837493 , 0.10571004, 0.21714152, ..., 0.01147093, 0.04763769,
        0.08228819],
       ...,
       [0.2340481 , 0.16306032, 0.15672274, ..., 0.01339923, 0.05342267,
        0.0581132 ],
       [0.22069775, 0.07279995, 0.16427813, ..., 0.01759528, 0.06556776,
        0.07563302],
       [0.25899678, 0.09027399, 0.16493782, ..., 0.02002248, 0.04686928,
        0.04974688]], dtype=float32)

In [50]:
from numpy import argmax

best_thresholds = []
for i in range(Y_pred.shape[1]):
    fpr, tpr, thresholds = sklearn.metrics.roc_curve(Y_true[:, i], Y_pred[:, i])
    best_thresholds.append(thresholds[argmax(tpr - fpr)])
print(f'Best Thresholds={best_thresholds}')

Best Thresholds=[0.22322433, 0.11302883, 0.15295269, 0.07690247, 0.110009804, 0.09695771, 0.091394395, 0.056917176, 0.09106095, 0.06588166, 0.08710687, 0.057797976, 0.077117294, 0.10145956, 0.067134604, 0.082501404, 0.088681385, 0.03278866, 0.064394414, 0.07362878]


In [52]:
np.max(Y_pred)

0.37375328