# Dynamic Time Warping with TSLearn

## TSLearn imports

In [1]:
from tslearn.metrics import cdist_dtw
from tslearn.clustering import TimeSeriesKMeans, silhouette_score
from tslearn.datasets import UCR_UEA_datasets

## Metric imports

In [2]:
from sklearn.metrics.cluster import davies_bouldin_score, contingency_matrix,adjusted_rand_score,fowlkes_mallows_score,v_measure_score,adjusted_mutual_info_score

### Purity definition

In [3]:
def purity(y_true, y_pred):
    cm = contingency_matrix(y_true, y_pred)
    return np.sum(np.amax(cm, axis=0)) / np.sum(cm)

## Datasets declaration

In [4]:
datasets_names = ["ECG5000", "ECG200","ChlorineConcentration","FordA","FordB","PhalangesOutlinesCorrect","RefrigerationDevices","TwoLeadECG","TwoPatterns"]
chosen_dataset = datasets_names[5]

In [5]:
import sys
import pandas as pd
import numpy as np

def process_dataset(dataset_name):
    X_train, y_train, X_test, y_test = UCR_UEA_datasets(use_cache=True).load_dataset(dataset_name)
    print("Working on",dataset_name)
    if(not X_train.any()):
        print("Error in loading Dataset")
        return    
    
    n_clusters = len(set(y_train))
    model = TimeSeriesKMeans(metric="dtw", n_clusters=n_clusters,verbose=True)
    y_predict = model.fit_predict(X_train)
    
    return y_predict
    


results = process_dataset(chosen_dataset)

Working on PhalangesOutlinesCorrect
1.984 --> 0.775 --> 0.717 --> 0.672 --> 0.638 --> 0.616 --> 0.609 --> 0.607 --> 0.607 --> 0.607 --> 0.607 --> 0.607 --> 0.607 --> 0.607 --> 0.607 --> 0.607 --> 0.607 --> 0.607 --> 0.607 --> 0.607 --> 0.607 --> 0.607 --> 0.607 --> 0.607 --> 0.607 --> 0.607 --> 0.607 --> 0.607 --> 


In [6]:
X_train, y_train, X_test, y_test = UCR_UEA_datasets(use_cache=True).load_dataset(chosen_dataset)

In [7]:
results

array([1, 1, 1, ..., 0, 0, 0])

In [None]:
ss = silhouette_score(X_train,results,metric="dtw")    

In [10]:
vm = v_measure_score(y_train, results)
ars = adjusted_rand_score(y_train,results)
pur = purity(y_train, results)

X_train_resh = np.reshape(X_train,(X_train.shape[0],X_train.shape[1]))
db = davies_bouldin_score(X_train_resh,results)

fms = fowlkes_mallows_score(y_train,results)
amis = adjusted_mutual_info_score(y_train, results, average_method='arithmetic')

In [12]:
tuple_results = (chosen_dataset,X_train.shape[0],len(set(y_train)),ss,vm,ars,pur,db,fms,amis)

DTW_results = list(tuple_results)

In [13]:
import os
dataframe_columns = ['DatasetName','NofTrainSamples','NofClasses','Shilhouette','VMesure','AdjRandIndex','Purity','DBScore','FMS','AMIS']
results_df = pd.DataFrame(columns=dataframe_columns)
results_df = results_df.append(pd.Series(DTW_results[0:10], index=dataframe_columns), ignore_index=True)

csv_directory = '../export/DTW/' + chosen_dataset+'/'
os.makedirs(csv_directory, exist_ok=True)

results_df.to_csv(path_or_buf=csv_directory+"metrics.csv", mode='w+')
results_df



Unnamed: 0,DatasetName,NofTrainSamples,NofClasses,Shilhouette,VMesure,AdjRandIndex,Purity,DBScore,FMS,AMIS
0,PhalangesOutlinesCorrect,1800,2,0,0.000295,-0.00082,0.651111,1.187164,0.522583,-0.00012
