# Dynamic Time Warping with TSLearn

## TSLearn imports

In [11]:
from tslearn.metrics import cdist_dtw
from tslearn.clustering import TimeSeriesKMeans, silhouette_score
from tslearn.datasets import UCR_UEA_datasets

## Metric imports

In [12]:
from sklearn.metrics.cluster import davies_bouldin_score, contingency_matrix,adjusted_rand_score,fowlkes_mallows_score,v_measure_score,adjusted_mutual_info_score

### Purity definition

In [13]:
def purity(y_true, y_pred):
    cm = contingency_matrix(y_true, y_pred)
    return np.sum(np.amax(cm, axis=0)) / np.sum(cm)

## Datasets declaration

In [14]:
datasets_names = ["ECG5000", "ECG200","ChlorineConcentration","FordA","FordB","PhalangesOutlinesCorrect","RefrigerationDevices","TwoLeadECG","TwoPatterns"]
chosen_dataset = datasets_names[8]

In [15]:
import sys
import pandas as pd
import numpy as np

def process_dataset(dataset_name):
    X_train, y_train, X_test, y_test = UCR_UEA_datasets(use_cache=True).load_dataset(dataset_name)
    print("Working on",dataset_name)
    if(not X_train.any()):
        print("Error in loading Dataset")
        return    
    
    n_clusters = len(set(y_train))
    model = TimeSeriesKMeans(metric="dtw", n_clusters=n_clusters,verbose=True)
    y_predict = model.fit_predict(X_train)
    
    return y_predict
    


results = process_dataset(chosen_dataset)

Working on TwoPatterns
18.884 --> 11.435 --> 10.400 --> 9.876 --> 9.719 --> 9.436 --> 8.551 --> 7.583 --> 7.569 --> 7.489 --> 7.479 --> 7.475 --> 7.475 --> 7.475 --> 


In [37]:
X_train, y_train, X_test, y_test = UCR_UEA_datasets(use_cache=True).load_dataset(chosen_dataset)

array([[[ 2.3037930e-01],
        [ 4.9096649e-01],
        [ 3.5883306e-01],
        [-2.3096511e-01],
        [ 9.0224785e-02],
        [-5.0147326e-01],
        [-3.3932938e-01],
        [ 6.6836482e-02],
        [ 2.3294938e-01],
        [ 2.8325503e-02],
        [ 2.3170201e-01],
        [ 1.6587095e-02],
        [ 1.4365201e-01],
        [ 2.8534305e-01],
        [ 7.1382826e-02],
        [-1.6801291e+00],
        [-1.6801291e+00],
        [-1.6801291e+00],
        [-1.6801291e+00],
        [-1.6801291e+00],
        [-1.6801291e+00],
        [-1.6801291e+00],
        [-1.6801291e+00],
        [-1.6801291e+00],
        [-1.6801291e+00],
        [-1.6801291e+00],
        [-1.6801291e+00],
        [-1.6801291e+00],
        [ 1.6600859e+00],
        [ 1.6600859e+00],
        [ 1.6600859e+00],
        [ 1.6600859e+00],
        [ 1.6600859e+00],
        [ 1.6600859e+00],
        [ 1.6600859e+00],
        [ 1.6600859e+00],
        [ 1.6600859e+00],
        [ 1.6600859e+00],
        [ 1.

In [40]:
results

['TwoPatterns',
 1000,
 4,
 0,
 0.9258843370755682,
 0.9321105127138701,
 0.974,
 12.020350188337824,
 0.9490738029420857,
 0.9256417293526799]

In [39]:
ss = silhouette_score(X_train[1],results,metric="dtw")    

ValueError: Found input variables with inconsistent numbers of samples: [128, 10]

In [24]:
vm = v_measure_score(y_train, results)
ars = adjusted_rand_score(y_train,results)
pur = purity(y_train, results)

X_train_resh = np.reshape(X_train,(X_train.shape[0],X_train.shape[1]))
db = davies_bouldin_score(X_train_resh,results)

fms = fowlkes_mallows_score(y_train,results)
amis = adjusted_mutual_info_score(y_train, results, average_method='arithmetic')

In [33]:
tuple_results = (chosen_dataset,X_train.shape[0],len(set(y_train)),0,vm,ars,pur,db,fms,amis)

DTW_results = list(tuple_results)

In [34]:
import os
dataframe_columns = ['DatasetName','NofTrainSamples','NofClasses','Shilhouette','VMesure','AdjRandIndex','Purity','DBScore','FMS','AMIS']
results_df = pd.DataFrame(columns=dataframe_columns)
results_df = results_df.append(pd.Series(DTW_results[0:10], index=dataframe_columns), ignore_index=True)

csv_directory = '../export/DTW/' + chosen_dataset+'/'
os.makedirs(csv_directory, exist_ok=True)

results_df.to_csv(path_or_buf=csv_directory+"metrics.csv", mode='w+')
results_df



Unnamed: 0,DatasetName,NofTrainSamples,NofClasses,Shilhouette,VMesure,AdjRandIndex,Purity,DBScore,FMS,AMIS
0,TwoPatterns,1000,4,0,0.925884,0.932111,0.974,12.02035,0.949074,0.925642
