In [56]:
import sklearn.metrics
from sklearn.neighbors import KNeighborsClassifier

from components.common import Config
from components.predictor import SklearnPredictor

sklearn_config = Config(model=KNeighborsClassifier(),
                model_filename_path = "../models",
                data_path='../data',
                dataset_split_path= "../split",
                dataset_name="mtat-20")
sklearn_predictor = SklearnPredictor(sklearn_config, model_filename="mtat-20/model.bin")

Cannot use cuda for model, defaulting to cpu


In [57]:
from components.predictor import Predictor
from external.musicnn import Musicnn

musicnn_config = Config(model=Musicnn(n_class=20),
                        model_filename_path = "../models",
                        data_path='../data',
                        dataset_split_path= "../split",
                        dataset_name="mtat-20")
musicnn_predictor = Predictor(musicnn_config, model_filename="mtat-20/2023-03-27-11-49-27.pth")

In [58]:
import numpy as np

test_files = np.load("../split/mtat-20/test.npy", allow_pickle=True)
tags = np.load("../split/mtat-20/tags.npy", allow_pickle=True)

In [59]:
from components.preprocessor import PreProcessor

preprocessor = PreProcessor(input_path="../data/mtat/mp3", output_path="../data/mtat/npy", suffix="npy", sr=16000)
preprocessor.run(test_files[:, 1])

100%|██████████| 1972/1972 [00:07<00:00, 272.01it/s]


In [60]:
from components.preprocessor import OpenL3PreProcessor

preprocessor = OpenL3PreProcessor(input_path="../data/mtat/mp3", output_path="../data/mtat/emb", suffix="npy", sr=16000)
preprocessor.run(test_files[:, 1])

100%|██████████| 1972/1972 [00:09<00:00, 217.34it/s]


In [61]:
binary = {row[0]: row[1:] for row in np.load("../split/mtat-20/binary.npy", allow_pickle=True)}
binary

{2: array([0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0]),
 6: array([0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0]),
 10: array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]),
 11: array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]),
 12: array([0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]),
 14: array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 21: array([0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 23: array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 29: array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]),
 32: array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 33: array([0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 37: array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]),
 38: array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]),
 39: array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0

In [62]:
import pathlib
import os
from tqdm import tqdm

test_data = []
Y_true = []
for idx, file in tqdm(test_files):
    file = os.path.join("../data/mtat/emb/", pathlib.Path(file).with_suffix(f".npy"))
    test_data.append(np.load(file, allow_pickle=True).flatten())
    Y_true.append(binary[idx])
Y_true = np.array(Y_true)
Y_pred = sklearn_predictor.predict_data(np.array(test_data))

100%|██████████| 1972/1972 [00:13<00:00, 143.69it/s]


In [63]:
Y_pred

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 1, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0]])

In [64]:
Y_true

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 1, 0, ..., 1, 1, 0],
       [0, 0, 1, ..., 0, 1, 1],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0]])

In [65]:
from sklearn.metrics import classification_report, roc_auc_score, average_precision_score

print(classification_report(Y_true, Y_pred, target_names=list(tags), zero_division=1))
print(roc_auc_score(Y_pred, Y_pred, average='macro'))
print(average_precision_score(Y_true, Y_pred, average='macro'))

              precision    recall  f1-score   support

      guitar       0.87      0.72      0.79       477
   classical       0.76      0.76      0.76       432
        slow       0.67      0.37      0.48       353
      techno       0.77      0.78      0.77       297
     strings       0.67      0.52      0.58       256
       drums       0.60      0.42      0.49       233
  electronic       0.67      0.44      0.53       266
        rock       0.87      0.78      0.83       240
        fast       0.58      0.50      0.54       237
       piano       0.88      0.65      0.75       203
     ambient       0.74      0.76      0.75       206
        beat       0.61      0.51      0.56       178
      violin       0.77      0.72      0.74       176
       vocal       0.73      0.23      0.35       175
       synth       0.56      0.29      0.38       190
      female       0.70      0.31      0.43       145
      indian       0.76      0.62      0.68       131
       opera       0.82    

In [66]:
import pathlib
import os
from tqdm import tqdm

Y_pred = []
Y_true = []
for idx, file in tqdm(test_files):
    file = os.path.join("../data/mtat/npy/", pathlib.Path(file).with_suffix(f".npy"))
    Y_pred.append(musicnn_predictor.predict_data(np.load(file, allow_pickle=True).flatten()))
    Y_true.append(binary[idx])
Y_true = np.array(Y_true)
Y_pred = np.array(Y_pred)

100%|██████████| 1972/1972 [01:22<00:00, 23.78it/s]


In [74]:
print(Y_pred.shape)
Y_pred_max = np.max(Y_pred, axis=1)
print(Y_pred_max.shape)
Y_pred_max_bin = np.where(Y_pred_max >= 0.5, 1, 0)

(1972, 9, 20)
(1972, 20)


In [75]:
Y_pred_max_bin

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 0, 0],
       [1, 0, 1, ..., 0, 0, 0],
       ...,
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0]])

In [76]:
Y_true

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 1, 0, ..., 1, 1, 0],
       [0, 0, 1, ..., 0, 1, 1],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0]])

In [77]:
from sklearn.metrics import classification_report

print(classification_report(Y_true, Y_pred_max_bin, target_names=list(tags), zero_division=1))
print(roc_auc_score(Y_true, Y_pred_max, average='macro'))
print(average_precision_score(Y_true, Y_pred_max, average='macro'))

              precision    recall  f1-score   support

      guitar       0.27      0.79      0.40       477
   classical       0.31      0.52      0.39       432
        slow       0.23      0.10      0.14       353
      techno       0.31      0.27      0.29       297
     strings       0.21      0.13      0.16       256
       drums       0.24      0.24      0.24       233
  electronic       0.25      0.11      0.15       266
        rock       0.25      0.09      0.13       240
        fast       0.17      0.02      0.03       237
       piano       0.22      0.11      0.15       203
     ambient       0.18      0.17      0.18       206
        beat       0.11      0.01      0.02       178
      violin       0.19      0.20      0.19       176
       vocal       0.25      0.03      0.05       175
       synth       0.30      0.02      0.03       190
      female       0.17      0.23      0.19       145
      indian       0.10      0.28      0.14       131
       opera       0.27    