# Time Series Shapelet_Motif_Classification

In [None]:
%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tslearn.preprocessing import TimeSeriesScalerMinMax
from collections import defaultdict

In [None]:
y_train = pd.read_csv("y_train.txt", header=None, delim_whitespace=True )
body_gyro_z_train = pd.read_csv("body_gyro_z_train.txt", header=None,  delim_whitespace=True)
body_gyro_z_test = pd.read_csv("body_gyro_z_test.txt", header=None,  delim_whitespace=True)

## Shaplet Classifiers

In [None]:
#!pip install pyts

In [None]:
from pyts.transformation import ShapeletTransform
from sklearn.metrics import accuracy_score

In [None]:
X_train=body_gyro_z_train.copy(deep=True)
X_test=body_gyro_z_test.copy(deep=True)

In [None]:
from tslearn.preprocessing import TimeSeriesScalerMeanVariance
scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.)  # Rescale time series

In [None]:
X_train_norm= scaler.fit_transform(X_train).reshape(X_train.shape[0], X_train.shape[1])
X_test_norm= scaler.fit_transform(X_test).reshape(X_test.shape[0], X_test.shape[1])
X_train_norm.shape

In [None]:
#plt.plot(X_train.T[0])
plt.plot(X_train_norm[0])
plt.show()

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
clf_1 = DecisionTreeClassifier(min_samples_split=33, min_samples_leaf =33 , random_state=42)
clf_1.fit(X_train, y_train)

y_pred_dt1 = clf_1.predict(X_test)

print('Accuracy %s' % accuracy_score(y_test, y_pred_dt1))
print('F1-score %s' % f1_score(y_test, y_pred_dt1, average=None))
print(classification_report(y_test, y_pred_dt1))

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
clf_knn_1 = KNeighborsClassifier(n_neighbors=5, weights='uniform', metric='euclidean')
clf_knn_1.fit(X_train, np.ravel(y_train))

y_pred_Knn1 = clf_knn_1.predict(X_test)

print('Accuracy %s' % accuracy_score(y_test, y_pred_Knn1))
print('F1-score %s' % f1_score(y_test, y_pred_Knn1, average=None))
print(classification_report(y_test, y_pred_Knn1))

## ShapeletModel

In [None]:
#!pip install tensorflow

In [1]:
import tensorflow as tf
from tslearn.shapelets import ShapeletModel
from tslearn.shapelets import grabocka_params_to_shapelet_size_dict
from tslearn.preprocessing import TimeSeriesScalerMinMax
from sklearn.metrics import accuracy_score, f1_score, classification_report
from tslearn.preprocessing import TimeSeriesScalerMeanVariance

In [None]:
n_ts, ts_sz = X_train.shape
n_classes = 6

shapelet_sizes = grabocka_params_to_shapelet_size_dict(n_ts=n_ts, ts_sz=ts_sz, n_classes=n_classes, l=0.1, r=1)

print('Number of time series:', n_ts)
print('Time series size:', ts_sz)
print('n_classes:', n_classes)
print('shapelet_sizes:', shapelet_sizes)

In [None]:
shp_clf = ShapeletModel(n_shapelets_per_size=shapelet_sizes,
                        optimizer="sgd",
                        weight_regularizer=.01,
                        max_iter=200,
                        verbose=0,
                        random_state=0)

shp_clf.fit(X_train, np.ravel(y_train))

In [None]:
#Location Shapelet in X_train
predicted_locations = shp_clf.locate(X_train)

ts_id = 2528
plt.figure(figsize=(10,4))
n_shapelets = sum(shapelet_sizes.values())
plt.title("Locations of shapelet matches in body_gyro_z_train[1846]") 
plt.plot(X_train.iloc[ts_id,:])
plt.xlim([0,127])
for idx_shp, shp in enumerate(shp_clf.shapelets_):
    t0 = predicted_locations[ts_id, idx_shp]
    plt.plot(np.arange(t0, t0 + len(shp)), shp, linewidth=2, label = 'Shapelet {}'.format(idx_shp))
    plt.legend(bbox_to_anchor=(1, 1), loc=2, borderaxespad=0.5)
#plt.savefig('FigXX-Shapelets.png', dpi=600,bbox_inches = 'tight') 
plt.show()

In [None]:
shp_clf.fit(X_train, np.ravel(y_train))

In [None]:
shp_clf.shapelets_

In [None]:
predicted_labels = shp_clf.predict(X_test)
print("Correct classification rate:", accuracy_score(y_test, predicted_labels))

In [None]:
predicted_locations = shp_clf.locate(X_test)

In [None]:
## ALTERNATIVA ##
# Author: Romain Tavenard
# License: BSD 3 clause

import numpy
from sklearn.metrics import accuracy_score
import tensorflow as tf
import matplotlib.pyplot as plt

from tslearn.datasets import CachedDatasets
from tslearn.preprocessing import TimeSeriesScalerMinMax
from tslearn.shapelets import LearningShapelets, \
    grabocka_params_to_shapelet_size_dict
from tslearn.utils import ts_size

# Get statistics of the dataset
n_ts, ts_sz = body_gyro_z_train.shape
n_classes = 6 # len(set(y_train))

# Set the number of shapelets per size as done in the original paper
shapelet_sizes = grabocka_params_to_shapelet_size_dict(n_ts=n_ts,
                                                       ts_sz=ts_sz,
                                                       n_classes=n_classes,
                                                       l=0.1,
                                                       r=1)

# Define the model using parameters provided by the authors (except that we
# use fewer iterations here)
shp_clf = LearningShapelets(n_shapelets_per_size=shapelet_sizes,
                            optimizer="sgd", #tf.optimizers.Adam(.01),
                            #batch_size=16,
                            weight_regularizer=.01,
                            max_iter=100,
                            random_state=42,
                            verbose=0)
shp_clf.fit(X_train, np.ravel(y_train))

# Make predictions and calculate accuracy score
pred_labels = shp_clf.predict(X_test)
print("Correct classification rate:", accuracy_score(y_test, pred_labels))

# Plot the different discovered shapelets
plt.figure()
for i, sz in enumerate(shapelet_sizes.keys()):
    plt.subplot(len(shapelet_sizes), 1, i + 1)
    plt.title("%d shapelets of size %d" % (shapelet_sizes[sz], sz))
    for shp in shp_clf.shapelets_:
        if ts_size(shp) == sz:
            plt.plot(shp.ravel())
    plt.xlim([0, max(shapelet_sizes.keys()) - 1])

plt.tight_layout()
plt.show()

# The loss history is accessible via the `model_` that is a keras model
plt.figure()
plt.plot(numpy.arange(1, shp_clf.n_iter_ + 1), shp_clf.history_["loss"])
plt.title("Evolution of cross-entropy loss during training")
plt.xlabel("Epochs")
plt.show()

## Shaplet-distances-based Classifier

In [None]:
X_train2 = shp_clf.transform(X_train)
X_train2.shape

In [None]:
X_test2 = shp_clf.transform(X_test)
X_test2.shape

In [None]:
X_train_norm2 = shp_clf.transform(X_train_norm)
X_test_norm2 = shp_clf.transform(X_test_norm)

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
%%time
error_rate = []
for i in range(1,55):
    knn = KNeighborsClassifier(n_neighbors=i)
    knn.fit(X_train2,np.ravel(y_train))
    pred_i = knn.predict(X_test2)
    error_rate.append(np.mean(pred_i != np.ravel(y_test)))
#print(pred_i)
plt.figure(figsize=(10,6))
plt.plot(range(1,55),error_rate,color='blue', linestyle='dashed', 
         marker='o',markerfacecolor='red', markersize=10)
plt.title('Error Rate vs. K Value')
plt.xlabel('K')
plt.ylabel('Error Rate')
print("Minimum error:-",min(error_rate),"at K =",error_rate.index(min(error_rate)))

In [None]:
# Knn: mon normalizzato
clf_knn = KNeighborsClassifier(n_neighbors=28, weights='uniform')
clf_knn.fit(X_train2, np.ravel(y_train))
y_pred_knn = clf_knn.predict(X_test2)

print('Accuracy %s' % accuracy_score(y_test, y_pred_knn))
print('F1-score %s' % f1_score(y_test, y_pred_knn, average=None))
print(classification_report(y_test, y_pred_knn))

In [None]:
# Knn: normalizzato
clf_knn_norm = KNeighborsClassifier(n_neighbors=28, weights='uniform')
clf_knn_norm.fit(X_train_norm2, np.ravel(y_train))
y_pred_knn_norm = clf_knn_norm.predict(X_test_norm2)

print('Accuracy %s' % accuracy_score(y_test, y_pred_knn_norm))
print('F1-score %s' % f1_score(y_test, y_pred_knn_norm, average=None))
print(classification_report(y_test, y_pred_knn_norm))

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV

In [None]:
params = { "max_depth" : [None,2,4,8,12,16,22], 
          "min_samples_split" : np.arange(2,50,2),
          "min_samples_leaf" : np.arange(1,50,2)   
}

In [None]:
%%time
grid = GridSearchCV(DecisionTreeClassifier(), params,cv=10, scoring='accuracy', n_jobs=-1)
grid.fit(X_train2, y_train)

In [None]:
grid.best_params_

In [None]:
# Decision Tree: non normalizzato
clf_dt = DecisionTreeClassifier( max_depth=8, min_samples_leaf=15, min_samples_split=4, random_state=42)
clf_dt.fit(X_train2, y_train)

y_pred_dt = clf_dt.predict(X_test2)

print('Accuracy %s' % accuracy_score(y_test, y_pred_dt))
print('F1-score %s' % f1_score(y_test, y_pred_dt, average=None))
print(classification_report(y_test, y_pred_dt))

In [None]:
# Decision Tree: normalizzato
clf_dt_norm = DecisionTreeClassifier(min_samples_split=50, min_samples_leaf =50 , random_state=42)
clf_dt_norm.fit(X_train_norm2, y_train)

y_pred_dt_norm = clf_dt_norm.predict(X_test_norm2)

print('Accuracy %s' % accuracy_score(y_test, y_pred_dt_norm))
print('F1-score %s' % f1_score(y_test, y_pred_dt_norm, average=None))
print(classification_report(y_test, y_pred_dt_norm))

## Univariate Time Series Classifiers


In [None]:
from pyts.datasets import load_basic_motions
from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier
from tslearn.preprocessing import TimeSeriesScalerMinMax
from sklearn.model_selection import train_test_split, cross_val_score 
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.metrics import roc_curve, auc, roc_auc_score
import numpy as np
from sklearn.metrics import classification_report
from sktime.utils.plotting import plot_series
from pyts.datasets import fetch_uea_dataset
from sktime.datatypes._panel._convert import from_3d_numpy_to_nested

In [None]:
X_train=body_gyro_z_train.copy(deep=True)
X_test=body_gyro_z_test.copy(deep=True)

In [None]:
# trasformare il dataset in una colonna sola con cella contente l'intera time series
X_train_uni=from_3d_numpy_to_nested(X_train_norm.reshape(7352,1,128))
X_test_uni=from_3d_numpy_to_nested(X_test_norm.reshape(2947,1,128))

In [None]:
clf_uni = KNeighborsTimeSeriesClassifier()
clf_uni.fit(X_train_uni, np.ravel(y_train))

In [None]:
%%time
y_pred_uni = clf_uni.predict(X_test_uni)
print(classification_report(y_test, y_pred_uni))

### ROCKET

In [None]:
from sklearn.linear_model import RidgeClassifierCV
from sktime.transformations.panel.rocket import Rocket

In [None]:
%%time
rocket_1 = Rocket()  # by default, MiniRocket uses ~10,000 kernels
rocket_1.fit(X_train_uni)
X_train_transform_ro1 = rocket.transform(X_train_uni)
clf_ridge = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))
clf_ridge.fit(X_train_transform_ro1.values, y_train)

In [None]:
X_test_transform_ro1 = rocket.transform(X_test_uni)
y_pred_rocket1 = clf_ridge.predict(X_test_transform_ro1.values)
print(classification_report(y_test, y_pred_rocket1))

### MINI-ROCKET


In [None]:
from sklearn.linear_model import RidgeClassifierCV
from sktime.transformations.panel.rocket import MiniRocket

In [None]:
%%time
minirocket_mini1 = MiniRocket()  # by default, MiniRocket uses ~10,000 kernels
minirocket_mini1.fit(X_train_uni)
X_train_transform_min1 = minirocket_mini1.transform(X_train_uni)
clf_ridge_min = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))
clf_ridge_min.fit(X_train_transform_min1.values, y_train)

In [None]:
X_test_transform_min1 = minirocket_mini1.transform(X_test_uni)
y_pred_min1 = clf_ridge_min.predict(X_test_transform_min1.values)
print(classification_report(y_test, y_pred_min1))

### Canonical Interval Forest (CIF)

In [None]:
from sktime.classification.interval_based import CanonicalIntervalForest

In [None]:
%%time
clf_canon = CanonicalIntervalForest(n_estimators=30)
clf_canon.fit(X_train_uni, y_train)

In [None]:
y_pred_canon = clf_canon.predict(X_test_uni)
print(classification_report(y_test, y_pred_canon))

## MULTIVARIATE

In [2]:
X_train=body_gyro_x_train.copy(deep=True)
Y_train=body_gyro_y_train.copy(deep=True)
Z_train=body_gyro_z_train.copy(deep=True)

X_test=body_gyro_x_test.copy(deep=True)
Y_test=body_gyro_y_test.copy(deep=True)
Z_test=body_gyro_z_test.copy(deep=True)

NameError: name 'body_gyro_x_train' is not defined

In [None]:
X_train_norm= scaler.fit_transform(X_train).reshape(X_train.shape[0], X_train.shape[1])
X_test_norm= scaler.fit_transform(X_test).reshape(X_test.shape[0], X_test.shape[1])

Y_train_norm= scaler.fit_transform(Y_train).reshape(Y_train.shape[0], Y_train.shape[1])
Y_test_norm= scaler.fit_transform(Y_test).reshape(Y_test.shape[0], Y_test.shape[1])

Z_train_norm= scaler.fit_transform(Z_train).reshape(Z_train.shape[0], Z_train.shape[1])
Z_test_norm= scaler.fit_transform(Z_test).reshape(Z_test.shape[0], Z_test.shape[1])

In [None]:
X_train_uni=from_3d_numpy_to_nested(X_train_norm.reshape(7352,1,128))
Y_train_uni=from_3d_numpy_to_nested(Y_train_norm.reshape(7352,1,128))
Z_train_uni=from_3d_numpy_to_nested(Z_train_norm.reshape(7352,1,128))


X_test_uni=from_3d_numpy_to_nested(X_test_norm.reshape(2947,1,128))
Y_test_uni=from_3d_numpy_to_nested(Y_test_norm.reshape(2947,1,128))
Z_test_uni=from_3d_numpy_to_nested(Z_test_norm.reshape(2947,1,128))

In [None]:
XY_train_uni=pd.merge(left=X_train_uni, right=Y_train_uni,right_index=True, left_index=True)

In [None]:
XYZ_train_uni=pd.merge(left=XY_train_uni, right=Z_train_uni,right_index=True, left_index=True)

In [None]:
XYZ_train_uni.columns

In [None]:
XY_test_uni=pd.merge(left=X_test_uni, right=Y_test_uni,right_index=True, left_index=True)

In [None]:
XYZ_test_uni=pd.merge(left=XY_test_uni, right=Z_test_uni,right_index=True, left_index=True)

In [None]:
plot_series(XYZ_train_uni['var_0_x'][0])
plot_series(XYZ_train_uni['var_0_y'][0])
plot_series(XYZ_train_uni['var_0'][0])
plt.show()

### 1-NN with DTW (baseline)

In [None]:
from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier

In [None]:
%%time
clf_multi = KNeighborsTimeSeriesClassifier()
clf_multi.fit(XYZ_train_uni, np.ravel(y_train))

In [None]:
%%time
y_pred_2 = clf_multi.predict(XYZ_test_uni)
print(classification_report(y_test, y_pred_2))

### Rocket

In [None]:
from sklearn.linear_model import RidgeClassifierCV
from sktime.transformations.panel.rocket import Rocket

In [None]:
%%time
rocket_multi = Rocket()
rocket_multi.fit(XYZ_train_uni)
XYZ_train_transform_ro2 = rocket_multi.transform(XYZ_train_uni)
clf_ro2 = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True)
clf_ro2.fit(XYZ_train_transform_ro2.values, y_train)

In [None]:
XYZ_test_transform_ro2 = rocket_multi.transform(XYZ_test_uni)
y_pred_ro2 = clf_ro2.predict(XYZ_test_transform_ro2.values)
print(classification_report(y_test, y_pred_ro2))

### MINI-ROCKET


In [None]:
from sklearn.linear_model import RidgeClassifierCV
from sktime.transformations.panel.rocket import MiniRocketMultivariate

In [None]:
from sklearn.linear_model import RidgeClassifierCV
from sktime.transformations.panel.rocket import MiniRocketMultivariate

In [None]:
XYZ_test_transform_mini2 = minirocket_multi.transform(XYZ_test_uni)
y_pred_mini2 = clf_mini2.predict(XYZ_test_transform_mini2.values)
print(classification_report(y_test, y_pred_mini2))