Libs: \\
1. https://github.com/giotto-ai/giotto-tda \\
tutorial: https://github.com/giotto-ai/beetles-tda/blob/master/Beetle%20Population%20Dynamics.ipynb \\
2. https://github.com/scikit-tda/ \\

Repo: https://github.com/SamirMoustafa/Time-Series-Classification \\
Deep Learning: https://github.com/cauchyturing/UCR_Time_Series_Classification_Deep_Learning_Baseline \\



# Instalation \\
![alt text](https://drive.google.com/uc?id=1BCm2WfNRyqzANzk7bb6OgBqcG85DoeXI)



In [3]:
#!pip install giotto-learn
#!pip install giotto-tda
#!git clone https://github.com/giotto-ai/beetles-tda/ beetles_tda

In [20]:
from scipy.io import arff
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import giotto.time_series as ts
import giotto.homology as hl
from giotto.pipeline import Pipeline
from beetles_tda.features import get_amplitude, get_max_lifetime, get_mean_lifetime, get_n_rel_holes
import numpy as np

# Data preprocessing

In [None]:
directory_list = get_files_directory_list()
directory_list = sorted(directory_list)

random_index = 15
random_path = directory_list[random_index]

X_train, X_test, y_train, y_test = get_data_from_directory(random_path)
X_train = X_train.squeeze()
y_train = y_train.squeeze()
X_test = X_test.squeeze()
y_test = y_test.squeeze()

print('Dataset: ', random_path)
print('X_train shape: ', X_train.shape)
print('y_train shape: ', y_train.shape)
print('X_test shape:  ', X_test.shape)
print('y_test shape:  ', y_test.shape)

In [7]:
#EthanolLevel_TRAIN = pd.DataFrame(arff.loadarff('/content/drive/My Drive/EthanolLevel/EthanolLevel_TRAIN.arff')[0])
#EthanolLevel_TEST = pd.DataFrame(arff.loadarff('/content/drive/My Drive/EthanolLevel/EthanolLevel_TEST.arff')[0])

EthanolLevel_TRAIN = pd.DataFrame(arff.loadarff('EthanolLevel/EthanolLevel_TRAIN.arff')[0])
EthanolLevel_TEST = pd.DataFrame(arff.loadarff('EthanolLevel/EthanolLevel_TEST.arff')[0])

train_targ = EthanolLevel_TRAIN['target']
test_targ = EthanolLevel_TEST['target']

train = EthanolLevel_TRAIN.drop(columns='target')
test = EthanolLevel_TEST.drop(columns='target')

In [8]:
print(train_targ.unique())
for i, indx in zip(train_targ.unique(), [0,1,2,3]):
  train_targ[train_targ == i] = indx
  test_targ[test_targ==i] = indx
train_targ = train_targ.astype(float)
test_targ = test_targ.astype(float)
print('->', train_targ.unique())

[b'1' b'2' b'3' b'4']
-> [0. 1. 2. 3.]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


In [41]:
train.shape, train_targ.shape

((504, 1751), (504,))

In [10]:
train_targ.value_counts()

3.0    126
2.0    126
1.0    126
0.0    126
Name: target, dtype: int64

In [11]:
def calculate(train, test):
  clf = KNeighborsClassifier()
  clf.fit(train, train_targ)
  print('train acc: ', accuracy_score(train_targ, clf.predict(train)))
  print('test acc:', accuracy_score(test_targ, clf.predict(test)))

In [12]:
calculate(train, test)

train acc:  0.5436507936507936
test acc: 0.246


# TDA approach
Embed the time series using Takens' embedding into a higher dimensional space.\
Use the Vietoris-Rips filtration to calculate the persistence diagrams.\
Scale and filter the diagrams.

In [52]:
parameters_type = "fixed"
embedding_dimension = 2
embedding_time_delay = 3
n_jobs = 1

In [53]:
window_width = train.shape[1] - ((embedding_dimension - 1) * embedding_time_delay + 1)
window_stride = 1

metric = "euclidean"
max_edge_length = 10
homology_dimensions = [0, 1]

epsilon = 0.0

In [81]:
steps = [
    (
        "embedding",
        ts.TakensEmbedding(
            parameters_type=parameters_type,
            dimension=embedding_dimension,
            time_delay=embedding_time_delay,
            n_jobs=n_jobs,
        ),
    ),
    ("window", ts.SlidingWindow(width=window_width, stride=1)),
    (
        "diagrams",
        hl.VietorisRipsPersistence(
            metric=metric,
            max_edge_length=max_edge_length,
            homology_dimensions=homology_dimensions,
            n_jobs=n_jobs,
        ),
    ),
]
pipeline = Pipeline(steps)

In [57]:
embedding = ts.TakensEmbedding(
            parameters_type="fixed",
            dimension=embedding_dimension,
            time_delay=embedding_time_delay,
            n_jobs=-1,
        )

In [72]:
train.iloc[0].shape

(1751,)

In [73]:
r1 = embedding.fit_transform(train.iloc[0])
r1.shape

(1748, 2)

In [74]:
window = ts.SlidingWindow(width=window_width, stride=1)

In [78]:
r2 = window.fit_transform(r1)
r2.shape

(1, 1748, 2)

In [79]:
diagrams = hl.VietorisRipsPersistence(
            metric=metric,
            max_edge_length=max_edge_length,
            homology_dimensions=homology_dimensions,
            n_jobs=n_jobs,
        )

In [80]:
r3 = diagrams.fit_transform(r2)
r3.shape

(1, 2078, 3)

In [108]:
def feature_transform(X):
    X_filtered = np.zeros((1, 2078, 3))
    for i, x in enumerate(X):
        print(i)
        x_filtered = pipeline.fit_transform(X.iloc[i])
        X_filtered = np.vstack((X_filtered,x_filtered))
    return X_filtered

In [None]:
train_new = feature_transform(train)

In [None]:
train_new = feature_transform(train)
train_new =np.save('train_new.npy', train_new)
train_new = np.load('train_new.npy')

In [None]:
test_new = feature_transform(test)
test_new =np.save('test_new.npy', test_new)
test_new = np.load('test_new.npy')