# Dynamic Time Warping with TSLearn
## Importing Data

In [1]:
from tslearn.datasets import UCR_UEA_datasets
import numpy as np

In [2]:
X_train, y_train, X_test, y_test = UCR_UEA_datasets().load_dataset("TwoLeadECG")
print(X_train.shape)
print(type(X_train))

(23, 82, 1)
<class 'numpy.ndarray'>


## Training

In [3]:
from tslearn.metrics import cdist_dtw
from tslearn.clustering import TimeSeriesKMeans

In [4]:
n_clusters = len(set(y_train))
model = TimeSeriesKMeans(metric="dtw", n_clusters=n_clusters)
y_predict = model.fit_predict(X_train)

1.430 --> 0.829 --> 0.812 --> 0.812 --> 


## Evaluation

In [5]:
from tslearn.clustering import silhouette_score

In [6]:
ss = silhouette_score(X_train,y_predict,metric="dtw")
print("Silhouette Score",ss)
dtw_matrix = cdist_dtw(X_train)
ss_pre = silhouette_score(dtw_matrix,y_predict,metric="precomputed")
print("Silhouette Precomputed",ss_pre)
print("Metrics Match?",ss == ss_pre)

Silhouette Score 0.3047585295912364
Silhouette Precomputed 0.3047585295912364
Metrics Match? True


In [7]:
from sklearn.metrics.cluster import davies_bouldin_score, contingency_matrix,adjusted_rand_score,fowlkes_mallows_score,v_measure_score,adjusted_mutual_info_score

In [8]:
cm = contingency_matrix(y_train, y_predict)
print(cm)

[[9 3]
 [3 8]]


In [9]:
def purity(y_true, y_pred):
    cm = contingency_matrix(y_true, y_pred)
    return np.sum(np.amax(cm, axis=0)) / np.sum(cm)

In [10]:
pur = purity(y_train, y_predict)
print("Purity", pur)

Purity 0.7391304347826086


In [11]:
ars = adjusted_rand_score(y_train,y_predict)
print("Adjusted rand index", ars)

Adjusted rand index 0.1921487603305785


In [12]:
# FMS: Media geometrica di precision e recall pairwise
fms = fowlkes_mallows_score(y_train,y_predict)
print("Fowlkes-Mallows score: ", fms)

Fowlkes-Mallows score:  0.578512396694215


In [13]:
X_train_resh = np.reshape(X_train,(X_train.shape[0],X_train.shape[1]))
db = davies_bouldin_score(X_train_resh,y_predict)
print("Davies-Bouldin",db)

Davies-Bouldin 2.3838660448235367


In [14]:
vm = v_measure_score(y_train, y_predict)
print("V-Measure: ", vm)

V-Measure:  0.1712958005371039


## Automation

In [15]:
import pandas as pd
datasets_names = ["ECG5000", "ECG200","ChlorineConcentration","FordA","FordB","PhalangesOutlinesCorrect","RefrigerationDevices","TwoLeadECG","TwoPatterns"]
chosen_dataset = datasets_names[7]

In [16]:
import sys

def process_dataset(dataset_name):
    X_train, y_train, X_test, y_test = UCR_UEA_datasets(use_cache=True).load_dataset(dataset_name)
    print("Working on",dataset_name)
    sys.stdout.flush()
    if(not X_train.any()):
        print("Error in loading Dataset")
        return    
    
    n_clusters = len(set(y_train))
    model = TimeSeriesKMeans(metric="dtw", n_clusters=n_clusters,verbose=True)
    y_predict = model.fit_predict(X_train)
    
    ss = 0#silhouette_score(X_train,y_predict,metric="dtw")    
    vm = v_measure_score(y_train, y_predict)
    ars = adjusted_rand_score(y_train,y_predict)
    pur = purity(y_train, y_predict)
    
    X_train_resh = np.reshape(X_train,(X_train.shape[0],X_train.shape[1]))
    db = davies_bouldin_score(X_train_resh,y_predict)
    
    fms = fowlkes_mallows_score(y_train,y_predict)
    amis = adjusted_mutual_info_score(y_train, y_predict, average_method='arithmetic')
    
    return (dataset_name,X_train.shape[0],n_clusters,ss,vm,ars,pur,db,fms,amis)
    
    print("Fit Completed")


results = process_dataset(chosen_dataset)

Working on TwoLeadECG
2.224 --> 0.873 --> 0.845 --> 0.845 --> 


In [17]:
results = list(results)

In [18]:
import os
dataframe_columns = ['DatasetName','NofTrainSamples','NofClasses','Shilhouette','VMesure','AdjRandIndex','Purity','DBScore','FMS','AMIS']
results_df = pd.DataFrame(columns=dataframe_columns)
results_df = results_df.append(pd.Series(results[0:10], index=dataframe_columns), ignore_index=True)

csv_directory = '../export/DTW/' + chosen_dataset+'/'
os.makedirs(csv_directory, exist_ok=True)

results_df.to_csv(path_or_buf=csv_directory+"metrics.csv", mode='w+')
results_df

Unnamed: 0,DatasetName,NofTrainSamples,NofClasses,Shilhouette,VMesure,AdjRandIndex,Purity,DBScore,FMS,AMIS
0,TwoLeadECG,23,2,0,0.013609,-0.014936,0.565217,2.248334,0.548282,-0.028843
