# Dynamic Time Warping with TSLearn
## Importing Data

In [1]:
from tslearn.datasets import UCR_UEA_datasets
import numpy as np

In [2]:
X_train, y_train, X_test, y_test = UCR_UEA_datasets().load_dataset("TwoLeadECG")
print(X_train.shape)
print(type(X_train))

(23, 82, 1)
<class 'numpy.ndarray'>


## Training

In [3]:
from tslearn.metrics import cdist_dtw
from tslearn.clustering import TimeSeriesKMeans

In [4]:
n_clusters = len(set(y_train))
model = TimeSeriesKMeans(metric="dtw", n_clusters=n_clusters)
y_predict = model.fit_predict(X_train)

1.824 --> 1.010 --> 0.911 --> 0.885 --> 0.885 --> 


## Evaluation

In [5]:
from tslearn.clustering import silhouette_score

In [6]:
ss = silhouette_score(X_train,y_predict,metric="dtw")
print("Silhouette Score",ss)
dtw_matrix = cdist_dtw(X_train)
ss_pre = silhouette_score(dtw_matrix,y_predict,metric="precomputed")
print("Silhouette Precomputed",ss_pre)
print("Metrics Match?",ss == ss_pre)

Silhouette Score 0.25730158066863873
Silhouette Precomputed 0.25730158066863873
Metrics Match? True


In [7]:
from sklearn.metrics.cluster import davies_bouldin_score, contingency_matrix,adjusted_rand_score,fowlkes_mallows_score,v_measure_score,adjusted_mutual_info_score

In [8]:
cm = contingency_matrix(y_train, y_predict)
print(cm)

[[ 0 12]
 [ 6  5]]


In [9]:
def purity(y_true, y_pred):
    cm = contingency_matrix(y_true, y_pred)
    return np.sum(np.amax(cm, axis=0)) / np.sum(cm)

In [10]:
pur = purity(y_train, y_predict)
print("Purity", pur)

Purity 0.782608695652174


In [11]:
ars = adjusted_rand_score(y_train,y_predict)
print("Adjusted rand index", ars)

Adjusted rand index 0.29447852760736193


In [12]:
# FMS: Media geometrica di precision e recall pairwise
fms = fowlkes_mallows_score(y_train,y_predict)
print("Fowlkes-Mallows score: ", fms)

Fowlkes-Mallows score:  0.6732249977106864


In [13]:
X_train_resh = np.reshape(X_train,(X_train.shape[0],X_train.shape[1]))
db = davies_bouldin_score(X_train_resh,y_predict)
print("Davies-Bouldin",db)

Davies-Bouldin 3.370701299716391


In [14]:
vm = v_measure_score(y_train, y_predict)
print("V-Measure: ", vm)

V-Measure:  0.38610781894301427


## Automation

In [15]:
import pandas as pd
datasets_names = ["ECG5000", "ECG200","ChlorineConcentration","FordA","FordB","PhalangesOutlinesCorrect","RefrigerationDevices","TwoLeadECG","TwoPatterns"]
results_df = pd.DataFrame(columns=['DatasetName','NofTrainSamples','NofClasses','Shilhouette','VMesure','AdjRandIndex','Purity','DBScore','FMS','AMIS'])

In [3]:
from multiprocessing import Pool
def worker(x):
    return x*x
if __name__ ==  '__main__': #this is the only additional line
     num_processors = 3
     p=Pool(processes = num_processors)
     output = p.map(worker,[i for i in range(0,3)])
     print(output)

Button(button_style='success', description='start', style=ButtonStyle())

Button(button_style='primary', description='watch', style=ButtonStyle())

Button(description='clear log', style=ButtonStyle())

Output()

In [None]:
def process_dataset(dataset_name):
    X_train, y_train, X_test, y_test = UCR_UEA_datasets(use_cache=True).load_dataset(dataset_name)
    print("Working on",ds)
    
    if(not X_train.any()):
        print("Error in loading Dataset")
        return    
    
    n_clusters = len(set(y_train))
    model = TimeSeriesKMeans(metric="dtw", n_clusters=n_clusters,verbose=False)
    y_predict = model.fit_predict(X_train)
    
    ss = 0#silhouette_score(X_train,y_predict,metric="dtw")    
    vm = v_measure_score(y_train, y_predict)
    ars = adjusted_rand_score(y_train,y_predict)
    pur = purity(y_train, y_predict)
    
    X_train_resh = np.reshape(X_train,(X_train.shape[0],X_train.shape[1]))
    db = davies_bouldin_score(X_train_resh,y_predict)
    
    fms = fowlkes_mallows_score(y_train,y_predict)
    amis = adjusted_mutual_info_score(y_train, y_predict, average_method='arithmetic')
    
    return (ds,X_train.shape[0],n_clusters,ss,vm,ars,pur,db,fms,amis)
    
    print("Fit Completed")


results = Parallel(n_jobs=num_cores)(delayed(process_dataset)(ds) for ds in datasets_names)
print(results)
    
    