# Dynamic Time Warping with TSLearn
## Importing Data

In [67]:
from tslearn.datasets import UCR_UEA_datasets
import numpy as np

In [68]:
X_train, y_train, X_test, y_test = UCR_UEA_datasets().load_dataset("TwoLeadECG")
print(X_train.shape)
print(type(X_train))

(23, 82, 1)
<class 'numpy.ndarray'>


## Training

In [69]:
from tslearn.metrics import cdist_dtw
from tslearn.clustering import TimeSeriesKMeans

In [70]:
n_clusters = len(set(y_train))
model = TimeSeriesKMeans(metric="dtw", n_clusters=n_clusters)
y_predict = model.fit_predict(X_train)

1.563 --> 1.015 --> 1.014 --> 1.014 --> 


## Evaluation

In [71]:
from tslearn.clustering import silhouette_score

In [72]:
ss = silhouette_score(X_train,y_predict,metric="dtw")
print("Silhouette Score",ss)
dtw_matrix = cdist_dtw(X_train)
ss_pre = silhouette_score(dtw_matrix,y_predict,metric="precomputed")
print("Silhouette Precomputed",ss_pre)
print("Metrics Match?",ss == ss_pre)

Silhouette Score 0.15042389203119969
Silhouette Precomputed 0.15042389203119969
Metrics Match? True


In [73]:
from sklearn.metrics.cluster import davies_bouldin_score, contingency_matrix,adjusted_rand_score,fowlkes_mallows_score,v_measure_score,adjusted_mutual_info_score

In [74]:
cm = contingency_matrix(y_train, y_predict)
print(cm)

[[ 2 10]
 [ 1 10]]


In [75]:
def purity(y_true, y_pred):
    cm = contingency_matrix(y_true, y_pred)
    return np.sum(np.amax(cm, axis=0)) / np.sum(cm)

In [76]:
pur = purity(y_train, y_predict)
print("Purity", pur)

Purity 0.5217391304347826


In [77]:
ars = adjusted_rand_score(y_train,y_predict)
print("Adjusted rand index", ars)

Adjusted rand index -0.020161290322580596


In [78]:
# FMS: Media geometrica di precision e recall pairwise
fms = fowlkes_mallows_score(y_train,y_predict)
print("Fowlkes-Mallows score: ", fms)

Fowlkes-Mallows score:  0.5954839392557383


In [79]:
X_train_resh = np.reshape(X_train,(X_train.shape[0],X_train.shape[1]))
db = davies_bouldin_score(X_train_resh,y_predict)
print("Davies-Bouldin",db)

Davies-Bouldin 0.9766361517906136


In [80]:
vm = v_measure_score(y_train, y_predict)
print("V-Measure: ", vm)

V-Measure:  0.011935062134185988


## Automation

In [81]:
import pandas as pd
datasets_names = ["ECG5000", "ECG200","ChlorineConcentration","FordA","FordB","PhalangesOutlinesCorrect","RefrigerationDevices","TwoLeadECG","TwoPatterns"]

In [82]:
from multiprocessing import Pool
from joblib import Parallel, delayed
def worker(x):
    return x*x
if __name__ ==  '__main__': #this is the only additional line
     num_processors = 3
     p=Pool(processes = num_processors)
     output = p.map(worker,[i for i in range(0,3)])
     print(output)

[0, 1, 4]


In [None]:
import sys

def process_dataset(dataset_name):
    X_train, y_train, X_test, y_test = UCR_UEA_datasets(use_cache=True).load_dataset(dataset_name)
    print("Working on",dataset_name)
    sys.stdout.flush()
    if(not X_train.any()):
        print("Error in loading Dataset")
        return    
    
    n_clusters = len(set(y_train))
    model = TimeSeriesKMeans(metric="dtw", n_clusters=n_clusters,verbose=True)
    y_predict = model.fit_predict(X_train)
    
    ss = 0#silhouette_score(X_train,y_predict,metric="dtw")    
    vm = v_measure_score(y_train, y_predict)
    ars = adjusted_rand_score(y_train,y_predict)
    pur = purity(y_train, y_predict)
    
    X_train_resh = np.reshape(X_train,(X_train.shape[0],X_train.shape[1]))
    db = davies_bouldin_score(X_train_resh,y_predict)
    
    fms = fowlkes_mallows_score(y_train,y_predict)
    amis = adjusted_mutual_info_score(y_train, y_predict, average_method='arithmetic')
    
    return (dataset_name,X_train.shape[0],n_clusters,ss,vm,ars,pur,db,fms,amis)
    
    print("Fit Completed")


results = process_dataset(datasets_names[0])

Working on ECG5000


In [61]:
results = list(results)

In [66]:
import os
dataframe_columns = ['DatasetName','NofTrainSamples','NofClasses','Shilhouette','VMesure','AdjRandIndex','Purity','DBScore','FMS','AMIS']
results_df = pd.DataFrame(columns=dataframe_columns)
results_df = results_df.append(pd.Series(results[0:10], index=dataframe_columns), ignore_index=True)

csv_directory = '../export/DTW/' + datasets_names[0]+'/'
os.makedirs(csv_directory, exist_ok=True)

results_df.to_csv(path_or_buf=csv_directory+"metrics.csv", mode='w+')
results_df

Unnamed: 0,DatasetName,NofTrainSamples,NofClasses,Shilhouette,VMesure,AdjRandIndex,Purity,DBScore,FMS,AMIS
0,ECG5000,500,5,0,0.520783,0.448145,0.92,1.954812,0.658989,0.513946
