In [1]:
import os
import csv
import time
import numpy as np
from sklearn.metrics import rand_score, normalized_mutual_info_score, adjusted_rand_score

# Read Data

In [2]:
class ClusterDataLoader:
    def __init__(self, dataset_path):
        self.path = dataset_path

    def load(self, sub_dataset_name):
        ts, labels = [], []
        for mode in ['_TRAIN', '_TEST']:
            with open(os.path.join(self.path, sub_dataset_name, sub_dataset_name + mode)) as csv_file:
                lines = csv.reader(csv_file, delimiter=',')
                for line in lines:
                    ts.append([float(x) for x in line[1:]])
                    labels.append(int(line[0])-1)

        if min(labels) == 1:
            labels = labels - 1
        if min(labels) == -1:
            labels = labels + 1

        return np.array(ts), np.array(labels), int(len(set(labels)))

In [3]:
DATASET_PATH = '../data/univariate_example/'
DATASET_NAME = 'Crop'

dataloder = ClusterDataLoader(DATASET_PATH)

In [4]:
ts, labels, num_clusters = dataloder.load(DATASET_NAME)

# CPU Benchmark

In [5]:
from kshape.core import kshape

In [6]:
cpu_times = []
for i in range(5):
    start_time = time.time()
    cpu_kshape_model = kshape(np.expand_dims(ts, axis=2), num_clusters)
    cpu_times.append(time.time() - start_time)

In [7]:
print('Mean CPU Benchmark for 5 Runs:', np.mean(cpu_times))

Mean CPU Benchmark for 5 Runs: 2527.477051258087


In [9]:
predictions = np.zeros(ts.shape[0])
for i in range(num_clusters):
    predictions[cpu_kshape_model[i][1]] = i

cluster_centers = np.zeros((num_clusters, ts.shape[1], 1))
for k in range(num_clusters):
    cluster_centers[k, :, :] = cpu_kshape_model[k][0]

In [11]:
ri_ks = rand_score(predictions, labels)
print('Rand Score:', ri_ks)
ari_ks = adjusted_rand_score(predictions, labels)
print('Adjusted Rand Score:', ari_ks)
nmi_ks = normalized_mutual_info_score(predictions, labels)
print('Normalized Mutual Information:', nmi_ks)

Rand Score: 0.9241423149575677
Adjusted Rand Score: 0.2446950839815576
Normalized Mutual Information: 0.4313775426755777


# GPU Benchmark

In [6]:
from kshape.core_gpu import kshape

In [7]:
gpu_times = []
for i in range(5):
    start_time = time.time()
    gpu_kshape_model = kshape(np.expand_dims(ts, axis=2), num_clusters)
    gpu_times.append(time.time() - start_time)

In [8]:
print('Mean GPU Benchmark for 5 Runs:', np.mean(gpu_times))

Mean GPU Benchmark for 5 Runs: 33078.67823410034


In [9]:
predictions = np.zeros(ts.shape[0])
for i in range(num_clusters):
    predictions[gpu_kshape_model[i][1]] = i

cluster_centers = np.zeros((num_clusters, ts.shape[1], 1))
for k in range(num_clusters):
    cluster_centers[k, :, :] = gpu_kshape_model[k][0].detach().cpu()

In [10]:
ri_ks = rand_score(predictions, labels)
print('Rand Score:', ri_ks)
ari_ks = adjusted_rand_score(predictions, labels)
print('Adjusted Rand Score:', ari_ks)
nmi_ks = normalized_mutual_info_score(predictions, labels)
print('Normalized Mutual Information:', nmi_ks)                                                                                                                                                         

Rand Score: 0.9350468352848035
Adjusted Rand Score: 0.24933303905728887
Normalized Mutual Information: 0.4350312365117842
