In [9]:
import os
import random
import numpy as np
import torch
import pathlib

from sklearn.model_selection import train_test_split
from models.common import load_file_lists

In [10]:
def create_features(features_path, split_path):
    try:
        X_np = np.load(os.path.join(features_path, "X.npy"), allow_pickle=True)
        Y_np = np.load(os.path.join(features_path, "Y.npy"), allow_pickle=True)
    except OSError:
        X = []
        Y = []
        data = load_file_lists([
            os.path.join(split_path, "train.npy"),
            os.path.join(split_path, "valid.npy"),
            os.path.join(split_path, "test.npy")
        ])
        binary = {row[0]: row[1:] for row in np.load(os.path.join(split_path, "binary.npy"), allow_pickle=True)}
        for idx, filename in data:
            filename = os.path.join("../data/mtat/emb", str(pathlib.Path(filename).with_suffix(".npy")))
            file_data = np.load(filename, allow_pickle=True).flatten()
            X.append(file_data)
            Y.append(binary[int(idx)])
        X_np = np.array(X)
        Y_np = np.array(Y)
        np.save(os.path.join(features_path, "X.npy"), X_np)
        np.save(os.path.join(features_path, "y.npy"), Y_np)
    return X_np, Y_np

In [11]:
def set_seed(seed: int = 42) -> None:
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    print(f"Random seed set as {seed}")

In [12]:
set_seed(123456)

Random seed set as 123456


In [13]:
dataset_name = "mtat-20"
split_path = "../split"
features_path = "../data/mtat/features/"

split_path = os.path.join(split_path, dataset_name)
features_path = os.path.join(features_path, dataset_name)
os.makedirs(split_path, exist_ok=True)
os.makedirs(features_path, exist_ok=True)

In [14]:
X_np, Y_np = create_features(features_path, split_path)

In [15]:
X_train, X_test, Y_train, Y_test = train_test_split(X_np, Y_np, random_state=1)

In [16]:
import pickle
from sklearn.exceptions import UndefinedMetricWarning
from sklearn.metrics import classification_report
from sklearn.neighbors import KNeighborsClassifier

import warnings
warnings.simplefilter(action='ignore', category=UndefinedMetricWarning)

model = KNeighborsClassifier()
model.fit(X_train, Y_train)
y_pred = model.predict(X_test)
print(classification_report(y_pred, Y_test))

              precision    recall  f1-score   support

           0       0.66      0.79      0.72       987
           1       0.73      0.70      0.71      1091
           2       0.26      0.50      0.34       456
           3       0.72      0.72      0.72       749
           4       0.40      0.53      0.46       494
           5       0.41      0.54      0.47       506
           6       0.31      0.51      0.39       379
           7       0.74      0.88      0.81       510
           8       0.40      0.43      0.41       540
           9       0.56      0.79      0.66       383
          10       0.63      0.68      0.65       464
          11       0.40      0.47      0.43       435
          12       0.62      0.70      0.66       378
          13       0.12      0.47      0.19       115
          14       0.17      0.42      0.25       178
          15       0.28      0.65      0.39       161
          16       0.52      0.65      0.58       274
          17       0.81    

In [24]:
model_filename_path = os.path.join("../models", model.__class__.__name__)
os.makedirs(model_filename_path, exist_ok=True)
model_filename = os.path.join(model_filename_path, f"{dataset_name}.bin")
pickle.dump(model, open(model_filename, 'wb+'))

In [25]:
dataset_name = "mtat-10"
split_path = "../split"
features_path = "../data/mtat/features/"

split_path = os.path.join(split_path, dataset_name)
features_path = os.path.join(features_path, dataset_name)
os.makedirs(split_path, exist_ok=True)
os.makedirs(features_path, exist_ok=True)

In [26]:
X_np, Y_np = create_features(features_path, split_path)

In [27]:
X_train, X_test, Y_train, Y_test = train_test_split(X_np, Y_np, random_state=1)

In [28]:
from sklearn.exceptions import UndefinedMetricWarning
from sklearn.metrics import classification_report
from sklearn.neighbors import KNeighborsClassifier

import warnings
warnings.simplefilter(action='ignore', category=UndefinedMetricWarning)

model = KNeighborsClassifier()
model.fit(X_train, Y_train)
y_pred = model.predict(X_test)
print(classification_report(y_pred, Y_test))

              precision    recall  f1-score   support

           0       0.73      0.82      0.77      1043
           1       0.80      0.75      0.77      1166
           2       0.41      0.64      0.50       583
           3       0.77      0.74      0.75       771
           4       0.51      0.59      0.55       592
           5       0.45      0.58      0.50       492
           6       0.40      0.54      0.46       461
           7       0.79      0.87      0.83       499
           8       0.44      0.46      0.45       562
           9       0.54      0.83      0.66       347

   micro avg       0.60      0.70      0.65      6516
   macro avg       0.58      0.68      0.62      6516
weighted avg       0.62      0.70      0.65      6516
 samples avg       0.65      0.70      0.63      6516



In [29]:
model_filename_path = os.path.join("../models", model.__class__.__name__)
os.makedirs(model_filename_path, exist_ok=True)
model_filename = os.path.join(model_filename_path, f"{dataset_name}.bin")
pickle.dump(model, open(model_filename, 'wb+'))

In [30]:
dataset_name = "mtat"
split_path = "../split"
features_path = "../data/mtat/features/"

split_path = os.path.join(split_path, dataset_name)
features_path = os.path.join(features_path, dataset_name)
os.makedirs(split_path, exist_ok=True)
os.makedirs(features_path, exist_ok=True)

In [31]:
X_np, Y_np = create_features(features_path, split_path)

In [32]:
X_train, X_test, Y_train, Y_test = train_test_split(X_np, Y_np, random_state=1)

In [33]:
from sklearn.metrics import classification_report
from sklearn.neighbors import KNeighborsClassifier

import warnings
warnings.simplefilter(action='ignore', category=UndefinedMetricWarning)

model = KNeighborsClassifier()
model.fit(X_train, Y_train)
y_pred = model.predict(X_test)
print(classification_report(y_pred, Y_test))

              precision    recall  f1-score   support

           0       0.66      0.78      0.71       990
           1       0.70      0.67      0.68      1093
           2       0.24      0.48      0.32       451
           3       0.71      0.69      0.70       761
           4       0.42      0.54      0.47       521
           5       0.38      0.56      0.45       438
           6       0.30      0.55      0.39       360
           7       0.73      0.81      0.77       516
           8       0.38      0.44      0.41       489
           9       0.54      0.80      0.64       351
          10       0.56      0.61      0.58       474
          11       0.38      0.45      0.41       411
          12       0.63      0.73      0.67       385
          13       0.16      0.53      0.25       135
          14       0.14      0.36      0.21       187
          15       0.28      0.59      0.38       172
          16       0.56      0.64      0.60       294
          17       0.74    

In [34]:
model_filename_path = os.path.join("../models", model.__class__.__name__)
os.makedirs(model_filename_path, exist_ok=True)
model_filename = os.path.join(model_filename_path, f"{dataset_name}.bin")
pickle.dump(model, open(model_filename, 'wb+'))

In [None]:
loaded_model = pickle.load(open("../models/KNeighborsClassifier/mtat.bin", 'rb'))
y_pred = loaded_model.predict(X_test)
print(classification_report(y_pred, Y_test))