### https://github.com/johannfaouzi/pyts

In [17]:
from sklearn.metrics import accuracy_score
from sktime.utils.load_data import load_from_tsfile_to_dataframe
import numpy as np
from sklearn import preprocessing
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
from tqdm.notebook import tqdm

import logging
logging.basicConfig(filename="bossvs_results_history.log", level=logging.INFO)

import warnings
warnings.filterwarnings("ignore")

from pyts.classification import BOSSVS
from pyts.multivariate.classification import MultivariateClassifier

In [13]:
def preproc_before_knn_dtw(X):
    X = X.applymap(np.array)

    dimensions_lst = []

    for dim in X.columns:
        dimensions_lst.append(np.dstack(list(X[dim].values))[0])

    dimensions_lst = np.array(dimensions_lst)
    X = torch.from_numpy(np.array(dimensions_lst, dtype=np.float64))
    X = X.transpose(0, 2)
    X = X.transpose(1, 2)
    X = F.normalize(X, dim=1)
    X = X.float().numpy()
    return X


def preproc_answers(y):
    le = preprocessing.LabelEncoder()
    y = le.fit_transform(y)
    return y

# Run on all datasets

In [None]:
datasets_directory = "/root/data/Multivariate_ts/"
datasets_names_lst = os.listdir(datasets_directory)

for dataset_name in tqdm(datasets_names_lst):
    logging.info(f'{dataset_name}')
    print(dataset_name)
    
    try:
        X_train, y_train = load_from_tsfile_to_dataframe(datasets_directory\
                                                         + dataset_name + f'/{dataset_name}_TRAIN.ts')
        X_test, y_test = load_from_tsfile_to_dataframe(datasets_directory\
                                                       + dataset_name + f'/{dataset_name}_TEST.ts')
        print('data has been loaded')
        
        X_train = preproc_before_knn_dtw(X_train)
        X_test = preproc_before_knn_dtw(X_test)
        print('data has been preprocessed')
        
        clf = MultivariateClassifier(BOSSVS())
        clf.fit(X_train, y_train)

        test_predictions = clf.predict(X_test)
        test_accuracy = accuracy_score(test_predictions, y_test)
        logging.info(f"test_accuracy: {test_accuracy}\n")
        print('test_accuracy:',test_accuracy)
        print('\n')
    except:
        print('Error: smth is wrong\n')
        logging.info(f"Error: smth is wrong\n")

HBox(children=(IntProgress(value=0, max=31), HTML(value='')))

DuckDuckGeese
data has been loaded
data has been preprocessed


# One dataset example

In [29]:
# dataset_name = 'InsectWingbeat'
dataset_name = 'LSST'
# dataset_name = 'Cricket'

datasets_directory = "/root/data/Multivariate_ts/"

X_train, y_train = load_from_tsfile_to_dataframe(datasets_directory + dataset_name + f'/{dataset_name}_TRAIN.ts')
X_test, y_test = load_from_tsfile_to_dataframe(datasets_directory + dataset_name + f'/{dataset_name}_TEST.ts')

In [30]:
X_train = preproc_before_knn_dtw(X_train)
X_test = preproc_before_knn_dtw(X_test)

y_train = preproc_answers(y_train)
y_test = preproc_answers(y_test)

In [31]:
X_train.shape

(2459, 6, 36)

In [32]:
clf = MultivariateClassifier(BOSSVS())

In [33]:
clf.fit(X_train, y_train)

train_predictions = clf.predict(X_train)
train_accuracy = accuracy_score(train_predictions, y_train)
print('train_accuracy:', round(train_accuracy,5))

test_predictions = clf.predict(X_test)
test_accuracy = accuracy_score(test_predictions, y_test)
print('test_accuracy:', round(test_accuracy,5))

train_accuracy: 0.23993
test_accuracy: 0.16545
