In [1]:
import os
import pathlib
import random

import numpy as np
import torch
from tqdm import tqdm

from utils.common import load_file_lists

In [2]:
def create_features(split_path):
    binary = {row[0]: row[1:] for row in np.load(os.path.join(split_path, "binary.npy"), allow_pickle=True)}
    X_train = []
    Y_train = []
    test_data = load_file_lists([os.path.join(split_path, "train.npy"), os.path.join(split_path, "valid.npy")])
    for idx, filename in tqdm(test_data):
        filename = os.path.join("../data/mtat/emb", str(pathlib.Path(filename).with_suffix(".npy")))
        file_data = np.load(filename, allow_pickle=True).flatten()
        X_train.append(file_data)
        Y_train.append(binary[int(idx)])
    X_train = np.array(X_train)
    Y_train = np.array(Y_train)
    return X_train, Y_train

In [3]:
def set_seed(seed: int = 42) -> None:
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # Set a fixed value for the hash seed
    os.environ["PYTHONHASHSEED"] = str(seed)
    print(f"Random seed set as {seed}")

In [4]:
set_seed(123456)

Random seed set as 123456


In [None]:
dataset_name = "mtat-10"
split_path = "../split"
features_path = "../data/mtat/features/"

split_path = os.path.join(split_path, dataset_name)
os.makedirs(split_path, exist_ok=True)
X_train, Y_train = create_features(split_path)

 96%|█████████▌| 14233/14822 [02:10<00:04, 140.18it/s]

In [None]:
X_train

In [None]:
Y_train

In [None]:
from sklearn.exceptions import UndefinedMetricWarning
from utils.config import Config
from components.tester import SklearnTester
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier, RadiusNeighborsClassifier

import warnings
warnings.simplefilter(action='ignore', category=UndefinedMetricWarning)


config = Config(model=None, model_filename_path="../models", data_path='../data',
                dataset_split_path="../split", dataset_name=f"mtat-10")
tester = SklearnTester(config)

models = [
    RandomForestClassifier(bootstrap=True,
                           max_depth=20,
                           max_features='sqrt',
                           n_jobs=4,
                           random_state=1,
                           warm_start=True),
    DecisionTreeClassifier(max_depth=20,
                           max_features='sqrt',
                           random_state=1),
    KNeighborsClassifier(),
    RadiusNeighborsClassifier(radius=100.)
]
for model in models:
    model.fit(X_train, Y_train)
    stats = tester.test(model)