### https://github.com/johannfaouzi/pyts

In [3]:
from sklearn.metrics import accuracy_score
from sktime.utils.load_data import load_from_tsfile_to_dataframe
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
from tqdm.notebook import tqdm
from sklearn import preprocessing

import logging
logging.basicConfig(filename="saxvsm_results_history.log", level=logging.INFO)

import warnings
warnings.filterwarnings("ignore")

from pyts.classification import SAXVSM
from pyts.multivariate.classification import MultivariateClassifier

In [4]:
def preproc_before_knn_dtw(X):
    X = X.applymap(np.array)

    dimensions_lst = []

    for dim in X.columns:
        dimensions_lst.append(np.dstack(list(X[dim].values))[0])

    dimensions_lst = np.array(dimensions_lst)
    X = torch.from_numpy(np.array(dimensions_lst, dtype=np.float64))
    X = X.transpose(0, 2)
    X = X.transpose(1, 2)
    X = F.normalize(X, dim=1)
    X = X.float().numpy()
    return X


def preproc_answers(y):
    le = preprocessing.LabelEncoder()
    y = le.fit_transform(y)
    return y

# Run on all datasets

In [9]:
datasets_directory = "/root/data/Multivariate_ts/"
datasets_names_lst = os.listdir(datasets_directory)
datasets_names_lst[15:]

['EigenWorms',
 'UWaveGestureLibrary',
 'PenDigits',
 'ArticularyWordRecognition',
 'PEMS-SF',
 'InsectWingbeat',
 '.ipynb_checkpoints',
 'HandMovementDirection',
 'AtrialFibrillation',
 'ERing',
 'FingerMovements',
 'SelfRegulationSCP1',
 'LSST',
 'NATOPS',
 'Libras',
 'Epilepsy']

In [None]:
datasets_directory = "/root/data/Multivariate_ts/"
datasets_names_lst = os.listdir(datasets_directory)

for dataset_name in tqdm(datasets_names_lst[15:]):
    logging.info(f'{dataset_name}')
    print(dataset_name)
    
    try:
        X_train, y_train = load_from_tsfile_to_dataframe(datasets_directory\
                                                         + dataset_name + f'/{dataset_name}_TRAIN.ts')
        X_test, y_test = load_from_tsfile_to_dataframe(datasets_directory\
                                                       + dataset_name + f'/{dataset_name}_TEST.ts')
        print('data has been loaded')
        
        X_train = preproc_before_knn_dtw(X_train)
        X_test = preproc_before_knn_dtw(X_test)
        print('data has been preprocessed')
        
        clf = MultivariateClassifier(SAXVSM())
        clf.fit(X_train, y_train)

        test_predictions = clf.predict(X_test)
        test_accuracy = accuracy_score(test_predictions, y_test)
        logging.info(f"test_accuracy: {test_accuracy}\n")
        print('test_accuracy:',test_accuracy)
        print('\n')
    except:
        print('Error: smth is wrong\n')
        logging.info(f"Error: smth is wrong\n")

HBox(children=(IntProgress(value=0, max=16), HTML(value='')))

EigenWorms
data has been loaded
data has been preprocessed


# One dataset example

In [136]:
# dataset_name = 'InsectWingbeat'
dataset_name = 'PenDigits'
# dataset_name = 'Cricket'

datasets_directory = "/root/data/Multivariate_ts/"

X_train, y_train = load_from_tsfile_to_dataframe(datasets_directory + dataset_name + f'/{dataset_name}_TRAIN.ts')
X_test, y_test = load_from_tsfile_to_dataframe(datasets_directory + dataset_name + f'/{dataset_name}_TEST.ts')

In [137]:
X_train = preproc_before_knn_dtw(X_train)
X_test = preproc_before_knn_dtw(X_test)

y_train = preproc_answers(y_train)
y_test = preproc_answers(y_test)

In [138]:
clf = MultivariateClassifier(SAXVSM(n_bins=2))

In [139]:
X_test.shape

(3498, 2, 8)

In [140]:
np.where(np.var(X_train, axis=1) == 0)[0]

array([   1,    2,    4, ..., 7485, 7489, 7492])

In [141]:
train__const_indicies = np.where(np.var(X_train, axis=1) == 0)[0]
test_const_indicies = np.where(np.var(X_test, axis=1) == 0)[0]

In [142]:
X_train = np.delete(X_train, const_indicies, axis=1)
# y_train = np.delete(y_train, const_indicies, axis=1)

X_test = np.delete(X_test, const_indicies, axis=1)
# y_test = np.delete(y_test, const_indicies, axis=1)

In [143]:
X_test.shape

(3498, 1, 8)

In [144]:
clf.fit(X_train, y_train)

train_predictions = clf.predict(X_train)
train_accuracy = accuracy_score(train_predictions, y_train)
print('train_accuracy:', round(train_accuracy,5))

test_predictions = clf.predict(X_test)
test_accuracy = accuracy_score(test_predictions, y_test)
print('test_accuracy:', round(test_accuracy,5))

ValueError: At least one sample is constant.