In [45]:
import os
import random
import numpy as np
import torch
import pathlib

from sklearn.model_selection import train_test_split
from models.common import load_file_lists
from models.preprocessor import OpenL3PreProcessor

In [46]:
def create_features(input_data):
    try:
        X_np = np.load(os.path.join(features_path, "X.npy"), allow_pickle=True)
        Y_np = np.load(os.path.join(features_path, "Y.npy"), allow_pickle=True)
    except OSError:
        X = []
        Y = []
        for idx, filename in input_data:
            filename = os.path.join("../data/mtat/emb", str(pathlib.Path(filename).with_suffix(".npy")))
            file_data = np.load(filename, allow_pickle=True).flatten()
            X.append(file_data)
            Y.append(binary[int(idx)])
        X_np = np.array(X)
        Y_np = np.array(Y)
        np.save(os.path.join(features_path, "X.npy"), X_np)
        np.save(os.path.join(features_path, "y.npy"), Y_np)
    return X_np, Y_np

In [47]:
def set_seed(seed: int = 42) -> None:
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    print(f"Random seed set as {seed}")

In [48]:
set_seed(123456)

Random seed set as 123456


In [49]:
dataset_name = "mtat-20"
split_path = "../split"
features_path = "../data/mtat/features/"

split_path = os.path.join(split_path, dataset_name)
features_path = os.path.join(features_path, dataset_name)
os.makedirs(split_path, exist_ok=True)
os.makedirs(features_path, exist_ok=True)

In [50]:
# p = OpenL3PreProcessor(input_path="../data/mtat/mp3",
#                        output_path="../data/mtat/emb",
#                        suffix="npy")
# # print(load_file_lists(["../split/mtat/train.npy", "../split/mtat/valid.npy", "../split/mtat/test.npy"])[:, 1])
data = load_file_lists([
    os.path.join(split_path, "train.npy"),
    os.path.join(split_path, "valid.npy"),
    os.path.join(split_path, "test.npy")
])
# p.run(files=data[:, 1])

In [51]:
binary = {row[0]: row[1:] for row in np.load(os.path.join(split_path, "binary.npy"), allow_pickle=True)}
tags = np.load(os.path.join(split_path, "tags.npy"), allow_pickle=True)
X_np, Y_np = create_features(data)

In [52]:
X_train, X_test, Y_train, Y_test = train_test_split(X_np, Y_np, random_state=1)

In [53]:
from sklearn.exceptions import UndefinedMetricWarning
from sklearn.metrics import classification_report
from sklearn.neighbors import KNeighborsClassifier

import warnings
warnings.simplefilter(action='ignore', category=UndefinedMetricWarning)

model = KNeighborsClassifier(),
model.fit(X_train, Y_train)
y_pred = model.predict(X_test)
print(classification_report(y_pred, Y_test))

              precision    recall  f1-score   support

           0       0.47      0.92      0.62       612
           1       0.59      0.78      0.67       785
           2       0.07      0.61      0.13       101
           3       0.54      0.83      0.65       487
           4       0.14      0.60      0.22       149
           5       0.11      0.80      0.19        88
           6       0.07      0.57      0.13        81
           7       0.57      0.98      0.72       353
           8       0.08      0.66      0.14        68
           9       0.35      1.00      0.51       184
          10       0.32      0.90      0.47       176
          11       0.15      0.71      0.24       105
          12       0.36      0.78      0.49       195
          13       0.01      0.75      0.01         4
          14       0.00      0.00      0.00         0
          15       0.00      0.00      0.00         0
          16       0.01      1.00      0.02         4
          17       0.39    

In [54]:
dataset_name = "mtat-10"
split_path = "../split"
features_path = "../data/mtat/features/"

split_path = os.path.join(split_path, dataset_name)
features_path = os.path.join(features_path, dataset_name)
os.makedirs(split_path, exist_ok=True)
os.makedirs(features_path, exist_ok=True)

In [55]:
# p = OpenL3PreProcessor(input_path="../data/mtat/mp3",
#                        output_path="../data/mtat/emb",
#                        suffix="npy")
# # print(load_file_lists(["../split/mtat/train.npy", "../split/mtat/valid.npy", "../split/mtat/test.npy"])[:, 1])
data = load_file_lists([
    os.path.join(split_path, "train.npy"),
    os.path.join(split_path, "valid.npy"),
    os.path.join(split_path, "test.npy")
])
# p.run(files=data[:, 1])

In [56]:
binary = {row[0]: row[1:] for row in np.load(os.path.join(split_path, "binary.npy"), allow_pickle=True)}
tags = np.load(os.path.join(split_path, "tags.npy"), allow_pickle=True)
X_np, Y_np = create_features(data)

In [57]:
X_train, X_test, Y_train, Y_test = train_test_split(X_np, Y_np, random_state=1)

In [67]:
from sklearn.exceptions import UndefinedMetricWarning
from sklearn.metrics import classification_report
from sklearn.neighbors import KNeighborsClassifier

import warnings
warnings.simplefilter(action='ignore', category=UndefinedMetricWarning)

model = KNeighborsClassifier(),
model.fit(X_train, Y_train)
y_pred = model.predict(X_test)
print(classification_report(y_pred, Y_test))

              precision    recall  f1-score   support

           0       0.61      0.91      0.73       787
           1       0.73      0.80      0.76      1001
           2       0.22      0.76      0.34       259
           3       0.59      0.82      0.69       538
           4       0.25      0.69      0.37       246
           5       0.17      0.80      0.28       137
           6       0.13      0.58      0.21       137
           7       0.67      0.97      0.79       382
           8       0.10      0.64      0.17        89
           9       0.32      0.99      0.49       172

   micro avg       0.41      0.83      0.55      3748
   macro avg       0.38      0.79      0.48      3748
weighted avg       0.54      0.83      0.63      3748
 samples avg       0.46      0.60      0.49      3748

              precision    recall  f1-score   support

           0       0.60      0.59      0.59      1192
           1       0.63      0.62      0.63      1096
           2       0.34 

In [68]:
dataset_name = "mtat"
split_path = "../split"
features_path = "../data/mtat/features/"

split_path = os.path.join(split_path, dataset_name)
features_path = os.path.join(features_path, dataset_name)
os.makedirs(split_path, exist_ok=True)
os.makedirs(features_path, exist_ok=True)

In [69]:
# p = OpenL3PreProcessor(input_path="../data/mtat/mp3",
#                        output_path="../data/mtat/emb",
#                        suffix="npy")
# # print(load_file_lists(["../split/mtat/train.npy", "../split/mtat/valid.npy", "../split/mtat/test.npy"])[:, 1])
data = load_file_lists([
    os.path.join(split_path, "train.npy"),
    os.path.join(split_path, "valid.npy"),
    os.path.join(split_path, "test.npy")
])
# p.run(files=data[:, 1])

In [61]:
binary = {row[0]: row[1:] for row in np.load(os.path.join(split_path, "binary.npy"), allow_pickle=True)}
tags = np.load(os.path.join(split_path, "tags.npy"), allow_pickle=True)
X_np, Y_np = create_features(data)

In [62]:
X_train, X_test, Y_train, Y_test = train_test_split(X_np, Y_np, random_state=1)

In [63]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier, RadiusNeighborsClassifier

import warnings
warnings.simplefilter(action='ignore', category=UndefinedMetricWarning)

model = KNeighborsClassifier()
model.fit(X_train, Y_train)
y_pred = model.predict(X_test)
print(classification_report(y_pred, Y_test))

              precision    recall  f1-score   support

           0       0.47      0.90      0.62       609
           1       0.54      0.74      0.62       759
           2       0.03      0.63      0.05        38
           3       0.50      0.80      0.61       464
           4       0.12      0.68      0.21       122
           5       0.07      0.84      0.14        57
           6       0.04      0.56      0.08        52
           7       0.56      0.96      0.70       331
           8       0.06      0.67      0.12        54
           9       0.32      0.99      0.49       169
          10       0.25      0.79      0.38       164
          11       0.14      0.65      0.23       105
          12       0.31      0.81      0.45       170
          13       0.00      0.00      0.00         0
          14       0.00      0.00      0.00         0
          15       0.00      0.00      0.00         1
          16       0.02      0.75      0.03         8
          17       0.23    