In [1]:
import stumpy
import pickle
import numpy as np
import sklearn.metrics as metrics

from pipelinetools import *
from multiprocessing import Pool
from sklearn.ensemble import RandomForestClassifier

In [11]:
# LOAD ORIGINALS OF TRANSFORMER DATASET AND CONVERT TO OUR FORMAT (ONLY NEED TO RUN ONCE)

In [33]:
traces_train = load_traces('transformer/train')
traces_test = load_traces('transformer/test')

In [34]:
def reformat_transformer(traces):
    trace_ids = list(set(traces['label']))
    new_traces = {id: [] for id in trace_ids}
    
    for i in tqdm(range(len(traces['data']))):
        time = traces['time'][i]
        direction = traces['data'][i]
        label = traces['label'][i]
        
        new_trace = np.multiply(time, direction).astype('float64')
        new_traces[label].append(new_trace)
        
    return new_traces

In [35]:
traces_train = reformat_transformer(traces_train)
traces_test = reformat_transformer(traces_test)
traces_kfp = process_traces(traces_train, 'kfp')

100%|████████████████████████████████████| 8000/8000 [00:00<00:00, 65819.72it/s]
100%|███████████████████████████████████| 2000/2000 [00:00<00:00, 116865.53it/s]
100%|███████████████████████████████████████████| 51/51 [05:42<00:00,  6.72s/it]


In [38]:
with open("../datasets/transformer_train", 'wb') as f:
    pickle.dump(traces_train, f)

with open("../datasets/transformer_test", 'wb') as f:
    pickle.dump(traces_test, f)
    
with open("../datasets/transformer_kfp", 'wb') as f:
    pickle.dump(traces_kfp, f)

In [None]:
# LOAD AND TRAIN THE MODEL

In [2]:
traces_train = load_traces('transformer_train')
traces_test = load_traces('transformer_test')
traces_kfp = load_traces('transformer_kfp')

del traces_train[-1]
del traces_test[-1]
del traces_kfp[-1]

filenames_train = make_name_list({
    'data':['transformer_train'],
    'centroid_id':list(range(2)),
    'distance':['mean']
})
filenames_test = make_name_list({
    'data':['transformer_test'],
    'centroid_id':list(range(2)),
    'distance':['mean']
})

In [3]:
shapelets = generate_cluster_shapelets(traces_train, traces_kfp, 2)
save_shapelets(shapelets, filenames_train)

100%|███████████████████████████████████████████| 50/50 [00:01<00:00, 45.12it/s]

Saving shapelets as ../results/shapelets/data=transformer_traincentroid_id=0distance=mean
Saving shapelets as ../results/shapelets/data=transformer_traincentroid_id=1distance=mean





In [4]:
parameter_list = [] 
X, y = traces_to_xy(traces_train)

for i in range(len(filenames_train)):
    parameter_set = [
        filenames_train[i],
        X,
        y,
        shapelets[i],
        "stumpy_mean"
    ]
    parameter_list.append(parameter_set)

with Pool(2) as p:
    p.map(compute_shapelet_distances_mp, parameter_list)

100%|██████████| 4000/4000 [06:16<00:00, 10.62it/s]
100%|██████████| 4000/4000 [06:30<00:00, 10.24it/s]


Saving X as ../results/data/X/data=transformer_traincentroid_id=1distance=mean
Saving y as ../results/data/y/data=transformer_traincentroid_id=1distance=mean


In [5]:
parameter_list = [] 
X, y = traces_to_xy(traces_test)

for i in range(len(filenames_test)):
    parameter_set = [
        filenames_test[i],
        X,
        y,
        shapelets[i],
        "stumpy_mean"
    ]
    parameter_list.append(parameter_set)
    
print(len(parameter_list))

with Pool(2) as p:
    p.map(compute_shapelet_distances_mp, parameter_list)

2


100%|██████████| 1000/1000 [01:33<00:00, 10.70it/s]
100%|██████████| 1000/1000 [01:36<00:00, 10.31it/s]


In [23]:
filenames_min_train = make_name_list({
    'dataset':['transformer'],
    'centroid_id':list(range(2)),
})
filenames_min_test = make_name_list({
    'dataset':['transformer_test'],
    'centroid_id':list(range(2)),
})

In [24]:
X_train_mean, y_train = load_xy(filenames_train, True)
X_test_mean, y_test = load_xy(filenames_test, True)

X_train_min, y_train = load_xy(filenames_min_train, True)
X_test_min, y_test = load_xy(filenames_min_test, True)

X_train = np.concatenate((X_train_mean, X_train_min), axis=1)
X_test = np.concatenate((X_test_mean, X_test_min), axis=1)

Loading X from ../results/data/X/data=transformer_traincentroid_id=0distance=mean
Loading X from ../results/data/X/data=transformer_traincentroid_id=1distance=mean
Loading y from ../results/data/y/data=transformer_traincentroid_id=0distance=mean
Loading X from ../results/data/X/data=transformer_testcentroid_id=0distance=mean
Loading X from ../results/data/X/data=transformer_testcentroid_id=1distance=mean
Loading y from ../results/data/y/data=transformer_testcentroid_id=0distance=mean
Loading X from ../results/data/X/dataset=transformercentroid_id=0
Loading X from ../results/data/X/dataset=transformercentroid_id=1
Loading y from ../results/data/y/dataset=transformercentroid_id=0
Loading X from ../results/data/X/dataset=transformer_testcentroid_id=0
Loading X from ../results/data/X/dataset=transformer_testcentroid_id=1
Loading y from ../results/data/y/dataset=transformer_testcentroid_id=0


In [25]:
print(X_train.shape)

(4000, 200)


In [30]:
clf = RandomForestClassifier()

clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

scores = metrics.accuracy_score(y_test, y_pred)

In [31]:
print(scores)

0.953


In [8]:
# (1) LOAD ORIGINALS OF BIGENOUGH DATASET AND CONVERT TO OUR FORMAT(ONLY RUN ONCE)