In [34]:
#!pip install sktime
#!pip install giotto-tda
#!pip install openml

In [35]:
import openml

In [36]:
%matplotlib inline

In [37]:
from sklearn.preprocessing import StandardScaler

from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

from sklearn.model_selection import GridSearchCV, StratifiedKFold

from sklearn.metrics import accuracy_score

import torch
from torch import nn
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset
### our imports

from src.utils import (get_data_from_directory, get_files_directory_list, 
                       one_hot_encoding, TimeSeriesDataset,get_device, train_clf)

from src.TFE import *
from src import VariationalAutoencoder, train_AE

In [38]:
#!wget -nc "http://www.timeseriesclassification.com/Downloads/Archives/Univariate2018_arff.zip"
#!unzip -q -n "Univariate2018_arff.zip"

In [39]:
directory_list = get_files_directory_list()
directory_list = sorted(directory_list)

random_index =  15
random_path = directory_list[random_index]

X_train, X_test, y_train, y_test = get_data_from_directory(random_path)
X_train = X_train.squeeze()
y_train = y_train.squeeze()
X_test = X_test.squeeze()
y_test = y_test.squeeze()
num_classes = np.unique(y_train).shape[0]

print('Dataset: ', random_path)
print('X_train shape: ', X_train.shape)
print('y_train shape: ', y_train.shape)
print('X_test shape:  ', X_test.shape)
print('y_test shape:  ', y_test.shape)
print('num. of class: ', num_classes)

Dataset:  Coffee
X_train shape:  (28, 286)
y_train shape:  (28,)
X_test shape:   (28, 286)
y_test shape:   (28,)
num. of class:  2


In [40]:
%%time

feature_extractor = TopologicalFeaturesExtractor(
    persistence_diagram_extractor=PersistenceDiagramsExtractor(tokens_embedding_dim=2, 
                                                               tokens_embedding_delay=3,
                                                               homology_dimensions=(0, 1),
                                                               parallel=True),
    persistence_diagram_features=[HolesNumberFeature(),
                                  MaxHoleLifeTimeFeature(),
                                  RelevantHolesNumber(),
                                  AverageHoleLifetimeFeature(),
                                  SumHoleLifetimeFeature(),
                                  PersistenceEntropyFeature(),
                                  SimultaneousAliveHolesFeatue(),
                                  AveragePersistenceLandscapeFeature(),
                                  BettiNumbersSumFeature(),
                                  RadiusAtMaxBNFeature()])

X_train_transformed = feature_extractor.fit_transform(X_train)
X_test_transformed = feature_extractor.fit_transform(X_test)

CPU times: user 2.7 s, sys: 183 ms, total: 2.88 s
Wall time: 10.6 s


In [41]:
print('X_train_transformed shape: ', X_train_transformed.shape)
print('X_test_transformed shape:  ', X_test_transformed.shape)

X_train_transformed shape:  (28, 19)
X_test_transformed shape:   (28, 19)


In [42]:
X_train_transformed[:,15]

array([4578., 5431., 5215., 5185., 4786., 5047., 4557., 5734., 4990.,
       5001., 4797., 5454., 5226., 5875., 4269., 4058., 4062., 3901.,
       3688., 4225., 4123., 3600., 3459., 3683., 4023., 4057., 4432.,
       4033.])

In [43]:
class Experiment:
    def __init__(self, model, parameters_grid, cv, scoring, n_jobs = -1):
        self.model = model
        self.parameters = parameters_grid
        self.cv = cv 
        self.scoring = scoring,
        self.jobs = n_jobs
        
    def GridSearchFit(self, X_train_transformed, y_train, y_test):
        self.model_cv = GridSearchCV(self.model, param_grid = self.parameters, cv = self.cv, 
                                    scoring = self.scoring, n_jobs = self.jobs)
        self.model_cv.fit(X_train_transformed, y_train)
        return accuracy_score(y_test, self.model_cv.best_estimator_.predict(X_test_transformed))

In [44]:
parameters = {"C": [10**i for i in range(-2, 5)],
              "kernel": ["linear", "rbf", "sigmoid", "poly"]}

svc_cv = GridSearchCV(SVC(random_state=42), 
                      param_grid=parameters,
                      cv=StratifiedKFold(n_splits=2, shuffle=True, random_state=42),
                      scoring='accuracy', 
                      n_jobs=-1)
svc_cv.fit(X_train_transformed, y_train)

print("Train accuracy: ", accuracy_score(y_train, svc_cv.best_estimator_.predict(X_train_transformed)))
print("Test accuracy: ", accuracy_score(y_test, svc_cv.best_estimator_.predict(X_test_transformed)))

Train accuracy:  1.0
Test accuracy:  0.9642857142857143


In [45]:
parameters = {"max_depth": [2, 10, 15, 20, 25, 30, 35, 40, 45, 50, 70, 100, 120, 150],
              "n_estimators": [20, 50, 100, 150, 200, 250]}
svc_cv = GridSearchCV(XGBClassifier(n_jobs=-1, random_state=42), 
                      param_grid=parameters,
                      cv=StratifiedKFold(n_splits=2, shuffle=True, random_state=42),
                      scoring='accuracy', 
                      n_jobs=-1)
svc_cv.fit(X_train_transformed, y_train)

print("Train accuracy: ", accuracy_score(y_train, svc_cv.best_estimator_.predict(X_train_transformed)))
print("Test accuracy: ", accuracy_score(y_test, svc_cv.best_estimator_.predict(X_test_transformed)))

Train accuracy:  1.0
Test accuracy:  0.9642857142857143


In [12]:
parameters = {"n_neighbors": [3, 5, 7, 11,]}

knn_cv = GridSearchCV(KNeighborsClassifier(n_jobs=-1), 
                      param_grid=parameters,
                      cv=StratifiedKFold(n_splits=2, shuffle=True, random_state=42),
                      scoring='accuracy', 
                      n_jobs=-1)
knn_cv.fit(X_train_transformed, y_train)

print("Train accuracy: ", accuracy_score(y_train, knn_cv.best_estimator_.predict(X_train_transformed)))
print("Test accuracy: ", accuracy_score(y_test, knn_cv.best_estimator_.predict(X_test_transformed)))

Train accuracy:  0.9642857142857143
Test accuracy:  0.9642857142857143


In [11]:
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(0)

device = get_device()
batch_size = 32
latent_dim = num_classes * 4

In [12]:
scale = StandardScaler()
scale.fit(X_train_transformed)

handle_dim = lambda x: np.swapaxes(scale.transform(x)[..., np.newaxis], 1, -1)

    
X_train_transformed_dim = handle_dim(X_train_transformed)
X_test_transformed_dim  = handle_dim(X_test_transformed)

y_hot_train = one_hot_encoding(y_train)
y_hot_test = one_hot_encoding(y_test)

dataset_train = TimeSeriesDataset(X_train_transformed_dim, y_hot_train)
dataset_test  = TimeSeriesDataset(X_test_transformed_dim, y_hot_test)

loader_train = DataLoader(dataset_train, batch_size=batch_size)
loader_test = DataLoader(dataset_test, batch_size=batch_size)

test_data = torch.zeros(dataset_train[:][0].shape)

In [13]:
vae = VariationalAutoencoder(batch_size=batch_size, latent_dims=latent_dim, test_data=test_data)
vae = vae.to(device)

num_params = sum(p.numel() for p in vae.parameters() if p.requires_grad)
print('Number of parameters: %d' % num_params)

Number of parameters: 19057


In [14]:
optimizer = torch.optim.Adam(params=vae.parameters(), lr=2e-3, weight_decay=1e-5)

train_AE(1000, vae, loader_train, loader_test, optimizer, device)

Training ...
Epoch [1 / 1000] average reconstruction error: 265.662018
Epoch [2 / 1000] average reconstruction error: 245.229553
Epoch [3 / 1000] average reconstruction error: 223.048874
Epoch [4 / 1000] average reconstruction error: 197.468384
Epoch [5 / 1000] average reconstruction error: 165.410492
Epoch [6 / 1000] average reconstruction error: 125.906525
Epoch [7 / 1000] average reconstruction error: 77.413147
Epoch [8 / 1000] average reconstruction error: 19.322205
Epoch [9 / 1000] average reconstruction error: -49.610023
Epoch [10 / 1000] average reconstruction error: -131.845184
Epoch [11 / 1000] average reconstruction error: -229.402924
Epoch [12 / 1000] average reconstruction error: -346.140442
Epoch [13 / 1000] average reconstruction error: -486.666992
Epoch [14 / 1000] average reconstruction error: -655.054199
Epoch [15 / 1000] average reconstruction error: -857.492676
Epoch [16 / 1000] average reconstruction error: -1099.860352
Epoch [17 / 1000] average reconstruction error

Epoch [139 / 1000] average reconstruction error: -6479.620117
Epoch [140 / 1000] average reconstruction error: -6433.132812
Epoch [141 / 1000] average reconstruction error: -6471.207520
Epoch [142 / 1000] average reconstruction error: -6467.018555
Epoch [143 / 1000] average reconstruction error: -6457.442871
Epoch [144 / 1000] average reconstruction error: -6513.291016
Epoch [145 / 1000] average reconstruction error: -6446.139648
Epoch [146 / 1000] average reconstruction error: -6587.675781
Epoch [147 / 1000] average reconstruction error: -6428.706543
Epoch [148 / 1000] average reconstruction error: -6552.922852
Epoch [149 / 1000] average reconstruction error: -6483.969727
Epoch [150 / 1000] average reconstruction error: -6436.144531
Epoch [151 / 1000] average reconstruction error: -6510.110352
Epoch [152 / 1000] average reconstruction error: -6346.612793
Epoch [153 / 1000] average reconstruction error: -6606.180176
Epoch [154 / 1000] average reconstruction error: -6397.910645
Epoch [1

Epoch [275 / 1000] average reconstruction error: -6885.376953
Epoch [276 / 1000] average reconstruction error: -6886.192871
Epoch [277 / 1000] average reconstruction error: -6618.939941
Epoch [278 / 1000] average reconstruction error: -6759.805664
Epoch [279 / 1000] average reconstruction error: -6615.219727
Epoch [280 / 1000] average reconstruction error: -6763.186523
Epoch [281 / 1000] average reconstruction error: -6811.892090
Epoch [282 / 1000] average reconstruction error: -6679.028809
Epoch [283 / 1000] average reconstruction error: -6732.780762
Epoch [284 / 1000] average reconstruction error: -6539.965332
Epoch [285 / 1000] average reconstruction error: -6662.092773
Epoch [286 / 1000] average reconstruction error: -6599.944336
Epoch [287 / 1000] average reconstruction error: -6605.726562
Epoch [288 / 1000] average reconstruction error: -6683.839355
Epoch [289 / 1000] average reconstruction error: -6583.981445
Epoch [290 / 1000] average reconstruction error: -6652.666992
Epoch [2

Epoch [411 / 1000] average reconstruction error: -6728.403809
Epoch [412 / 1000] average reconstruction error: -6744.681152
Epoch [413 / 1000] average reconstruction error: -6786.955078
Epoch [414 / 1000] average reconstruction error: -6822.638184
Epoch [415 / 1000] average reconstruction error: -6818.086914
Epoch [416 / 1000] average reconstruction error: -6737.409180
Epoch [417 / 1000] average reconstruction error: -6722.125977
Epoch [418 / 1000] average reconstruction error: -6747.523438
Epoch [419 / 1000] average reconstruction error: -6773.310547
Epoch [420 / 1000] average reconstruction error: -6789.874512
Epoch [421 / 1000] average reconstruction error: -6748.058594
Epoch [422 / 1000] average reconstruction error: -6725.624023
Epoch [423 / 1000] average reconstruction error: -6683.490723
Epoch [424 / 1000] average reconstruction error: -6656.791504
Epoch [425 / 1000] average reconstruction error: -6635.212891
Epoch [426 / 1000] average reconstruction error: -6650.588867
Epoch [4

Epoch [544 / 1000] average reconstruction error: -6670.075684
Epoch [545 / 1000] average reconstruction error: -6637.847656
Epoch [546 / 1000] average reconstruction error: -6608.014160
Epoch [547 / 1000] average reconstruction error: -6646.677246
Epoch [548 / 1000] average reconstruction error: -6540.105469
Epoch [549 / 1000] average reconstruction error: -6703.511230
Epoch [550 / 1000] average reconstruction error: -6545.480469
Epoch [551 / 1000] average reconstruction error: -6759.244141
Epoch [552 / 1000] average reconstruction error: -6697.210938
Epoch [553 / 1000] average reconstruction error: -6671.153320
Epoch [554 / 1000] average reconstruction error: -6746.539551
Epoch [555 / 1000] average reconstruction error: -6597.201172
Epoch [556 / 1000] average reconstruction error: -6687.931641
Epoch [557 / 1000] average reconstruction error: -6660.604004
Epoch [558 / 1000] average reconstruction error: -6598.805176
Epoch [559 / 1000] average reconstruction error: -6663.130371
Epoch [5

Epoch [684 / 1000] average reconstruction error: -6746.377441
Epoch [685 / 1000] average reconstruction error: -6780.358398
Epoch [686 / 1000] average reconstruction error: -6717.123047
Epoch [687 / 1000] average reconstruction error: -6745.405273
Epoch [688 / 1000] average reconstruction error: -6719.088867
Epoch [689 / 1000] average reconstruction error: -6660.640625
Epoch [690 / 1000] average reconstruction error: -6710.914551
Epoch [691 / 1000] average reconstruction error: -6653.311523
Epoch [692 / 1000] average reconstruction error: -6751.981445
Epoch [693 / 1000] average reconstruction error: -6665.504883
Epoch [694 / 1000] average reconstruction error: -6800.081055
Epoch [695 / 1000] average reconstruction error: -6710.356934
Epoch [696 / 1000] average reconstruction error: -6762.470215
Epoch [697 / 1000] average reconstruction error: -6750.371582
Epoch [698 / 1000] average reconstruction error: -6691.446777
Epoch [699 / 1000] average reconstruction error: -6743.241699
Epoch [7

Epoch [823 / 1000] average reconstruction error: -6596.159180
Epoch [824 / 1000] average reconstruction error: -6714.030762
Epoch [825 / 1000] average reconstruction error: -6583.604004
Epoch [826 / 1000] average reconstruction error: -6709.463867
Epoch [827 / 1000] average reconstruction error: -6670.005859
Epoch [828 / 1000] average reconstruction error: -6637.345215
Epoch [829 / 1000] average reconstruction error: -6723.384277
Epoch [830 / 1000] average reconstruction error: -6599.851074
Epoch [831 / 1000] average reconstruction error: -6695.137207
Epoch [832 / 1000] average reconstruction error: -6641.844727
Epoch [833 / 1000] average reconstruction error: -6634.949219
Epoch [834 / 1000] average reconstruction error: -6693.048340
Epoch [835 / 1000] average reconstruction error: -6616.495117
Epoch [836 / 1000] average reconstruction error: -6693.825195
Epoch [837 / 1000] average reconstruction error: -6630.503906
Epoch [838 / 1000] average reconstruction error: -6652.104980
Epoch [8

Epoch [958 / 1000] average reconstruction error: -6591.026367
Epoch [959 / 1000] average reconstruction error: -6708.705566
Epoch [960 / 1000] average reconstruction error: -6645.272949
Epoch [961 / 1000] average reconstruction error: -6643.066895
Epoch [962 / 1000] average reconstruction error: -6681.660645
Epoch [963 / 1000] average reconstruction error: -6556.186035
Epoch [964 / 1000] average reconstruction error: -6682.799805
Epoch [965 / 1000] average reconstruction error: -6659.145020
Epoch [966 / 1000] average reconstruction error: -6630.335938
Epoch [967 / 1000] average reconstruction error: -6644.410156
Epoch [968 / 1000] average reconstruction error: -6554.061035
Epoch [969 / 1000] average reconstruction error: -6715.538086
Epoch [970 / 1000] average reconstruction error: -6702.746094
Epoch [971 / 1000] average reconstruction error: -6807.363770
Epoch [972 / 1000] average reconstruction error: -6745.267090
Epoch [973 / 1000] average reconstruction error: -6729.573730
Epoch [9

In [15]:
z_train = vae.transform(loader_train.dataset[:][0]).cpu().detach().numpy()
z_test = vae.transform(loader_test.dataset[:][0]).cpu().detach().numpy()

In [16]:
parameters = {"max_depth": [2, 10, 15, 20, 25, 30, 35, 40, 45, 50, 70, 100, 120, 150],
              "n_estimators": [20, 50, 100, 150, 200, 250]}
svc_cv = GridSearchCV(XGBClassifier(n_jobs=-1, random_state=42), 
                      param_grid=parameters,
                      cv=StratifiedKFold(n_splits=2, shuffle=True, random_state=42),
                      scoring='accuracy', 
                      n_jobs=-1)
svc_cv.fit(z_train, y_train)

print('Dataset: ', random_path)
print("Train accuracy: ", accuracy_score(y_train, svc_cv.best_estimator_.predict(z_train)))
print("Test accuracy: ", accuracy_score(y_test, svc_cv.best_estimator_.predict(z_test)))

Dataset:  Coffee
Train accuracy:  1.0
Test accuracy:  0.9642857142857143


In [17]:
parameters = {"C": [10**i for i in range(-2, 5)],
              "kernel": ["linear", "rbf", "sigmoid", "poly"]}

svc_cv = GridSearchCV(SVC(random_state=42), 
                      param_grid=parameters,
                      cv=StratifiedKFold(n_splits=2, shuffle=True, random_state=42),
                      scoring='accuracy', 
                      n_jobs=-1)
svc_cv.fit(z_train, y_train)

print('Dataset: ', random_path)
print("Train accuracy: ", accuracy_score(y_train, svc_cv.best_estimator_.predict(z_train)))
print("Test accuracy: ", accuracy_score(y_test, svc_cv.best_estimator_.predict(z_test)))

Dataset:  Coffee
Train accuracy:  1.0
Test accuracy:  1.0


In [18]:
parameters = {"n_neighbors": [1, 2, 3, 5, 7, 11,]}

knn_cv = GridSearchCV(KNeighborsClassifier(n_jobs=-1), 
                      param_grid=parameters,
                      cv=StratifiedKFold(n_splits=2, shuffle=True, random_state=42),
                      scoring='accuracy', 
                      n_jobs=-1)
knn_cv.fit(z_train, y_train)

print('Dataset: ', random_path)
print("Train accuracy: ", accuracy_score(y_train, knn_cv.best_estimator_.predict(z_train)))
print("Test accuracy: ", accuracy_score(y_test, knn_cv.best_estimator_.predict(z_test)))

Dataset:  Coffee
Train accuracy:  1.0
Test accuracy:  0.9642857142857143
