In [3]:
import os
import csv
import time
import numpy as np
from sklearn.metrics import rand_score, normalized_mutual_info_score, adjusted_rand_score

# Read Data

In [4]:
class ClusterDataLoader:
    def __init__(self, dataset_path):
        self.path = dataset_path

    def load(self, sub_dataset_name):
        ts, labels = [], []
        for mode in ['_TRAIN', '_TEST']:
            with open(os.path.join(self.path, sub_dataset_name, sub_dataset_name + mode)) as csv_file:
                lines = csv.reader(csv_file, delimiter=',')
                for line in lines:
                    ts.append([float(x) for x in line[1:]])
                    labels.append(int(line[0])-1)

        if min(labels) == 1:
            labels = labels - 1
        if min(labels) == -1:
            labels = labels + 1

        return np.array(ts), np.array(labels), int(len(set(labels)))

In [5]:
DATASET_PATH = '../data/univariate_example/'
DATASET_NAME = 'Crop'

dataloder = ClusterDataLoader(DATASET_PATH)

In [6]:
ts, labels, num_clusters = dataloder.load(DATASET_NAME)

# CPU Benchmark

In [5]:
from kshape.core import kshape
from kshape.core import KShapeClusteringCPU

In [6]:
cpu_times = []
for i in range(5):
    start_time = time.time()

    ksc = KShapeClusteringCPU(n_clusters=num_clusters,max_iter=100,n_jobs=-1)
    ksc.fit(np.expand_dims(ts, axis=2))
    print(f'Iteration {i}: time: {time.time() - start_time}')
    
    cpu_times.append(time.time() - start_time)

Iteration 0: time: 995.6358451843262
Iteration 1: time: 832.5988781452179
Iteration 2: time: 997.8731744289398
Iteration 3: time: 506.3470458984375
Iteration 4: time: 915.2455561161041


In [7]:
print('Mean CPU Benchmark for 5 Runs:', np.mean(cpu_times))

Mean CPU Benchmark for 5 Runs: 849.5401813030243


In [8]:
predictions = ksc.labels_

cluster_centers = np.zeros((num_clusters, ts.shape[1], 1))
for k in range(num_clusters):
    cluster_centers[k, :, :] = ksc.centroids_[k]

In [9]:
ri_ks = rand_score(predictions, labels)
print('Rand Score:', ri_ks)
ari_ks = adjusted_rand_score(predictions, labels)
print('Adjusted Rand Score:', ari_ks)
nmi_ks = normalized_mutual_info_score(predictions, labels)
print('Normalized Mutual Information:', nmi_ks)

Rand Score: 0.9258831895773435
Adjusted Rand Score: 0.24634633355841157
Normalized Mutual Information: 0.43663386420321454


# GPU Benchmark

In [7]:
from kshape.core_gpu import kshape
from kshape.core_gpu import KShapeClusteringGPU

In [None]:
gpu_times = []
for i in range(5):
    start_time = time.time()
    ksg = KShapeClusteringGPU(n_clusters=num_clusters,max_iter=100)
    ksg.fit(np.expand_dims(ts, axis=2))
    print(f'Iteration {i}: time: {time.time() - start_time}')
    
    gpu_times.append(time.time() - start_time)

In [None]:
print('Mean GPU Benchmark for 5 Runs:', np.mean(gpu_times))

In [None]:
predictions = ksg.labels_

cluster_centers = np.zeros((num_clusters, ts.shape[1], 1))
for k in range(num_clusters):
    cluster_centers[k, :, :] = ksg.centroids_[k].detach().cpu()

In [None]:
ri_ks = rand_score(predictions, labels)
print('Rand Score:', ri_ks)
ari_ks = adjusted_rand_score(predictions, labels)
print('Adjusted Rand Score:', ari_ks)
nmi_ks = normalized_mutual_info_score(predictions, labels)
print('Normalized Mutual Information:', nmi_ks)                                                                                                                                                         