In [1]:
import stumpy
import pickle
import numpy as np
import sklearn.metrics as metrics

from pipelinetools import *
from multiprocessing import Pool
from sklearn.ensemble import RandomForestClassifier

In [2]:
# LOAD AND TRAIN THE MODEL

In [3]:
traces_train = load_traces('transformer_train')
traces_test = load_traces('transformer_test')
traces_kfp = load_traces('transformer_kfp')

del traces_train[-1]
del traces_test[-1]
del traces_kfp[-1]

filenames_train = make_name_list({
    'data':['train'],
    'centroid_id':list(range(2)),
    'type':['neg']
})
filenames_test = make_name_list({
    'data':['test'],
    'centroid_id':list(range(2)),
    'type':['neg']
})

In [4]:
traces_train = process_traces(traces_train, "n")
traces_test = process_traces(traces_test, "n")

100%|███████████████████████████████████████████| 50/50 [00:04<00:00, 10.38it/s]
100%|███████████████████████████████████████████| 50/50 [00:01<00:00, 42.21it/s]


In [5]:
shapelets = generate_cluster_shapelets(traces_train, traces_kfp, 2)
save_shapelets(shapelets, filenames_train)

100%|███████████████████████████████████████████| 50/50 [00:00<00:00, 67.97it/s]

Saving shapelets as ../results/shapelets/data=traincentroid_id=0type=neg
Saving shapelets as ../results/shapelets/data=traincentroid_id=1type=neg





In [6]:
parameter_list = [] 
X_train, y_train = traces_to_xy(traces_train)
X_test, y_test = traces_to_xy(traces_test)

for i in range(len(filenames_train)):
    parameter_set = [
        filenames_train[i],
        X_train,
        y_train,
        shapelets[i],
        "stumpy_mean"
    ]
    parameter_list.append(parameter_set)
    
for i in range(len(filenames_test)):
    parameter_set = [
        filenames_test[i],
        X_test,
        y_test,
        shapelets[i],
        "stumpy_mean"
    ]
    parameter_list.append(parameter_set)
    
print(len(parameter_list))

with Pool(4) as p:
    p.map(compute_shapelet_distances_mp, parameter_list)

4


100%|██████████| 1000/1000 [01:37<00:00, 10.24it/s]
100%|██████████| 1000/1000 [01:39<00:00, 10.09it/s]
100%|██████████| 4000/4000 [06:20<00:00, 10.51it/s]
100%|██████████| 4000/4000 [06:25<00:00, 10.38it/s]


Saving X as ../results/data/X/data=testcentroid_id=1type=neg
Saving y as ../results/data/y/data=testcentroid_id=1type=neg
Saving X as ../results/data/X/data=testcentroid_id=0type=neg
Saving y as ../results/data/y/data=testcentroid_id=0type=neg
Saving X as ../results/data/X/data=traincentroid_id=1type=neg
Saving y as ../results/data/y/data=traincentroid_id=1type=neg


In [11]:
filenames_train_pos = make_name_list({
    'data':['train'],
    'centroid_id':list(range(2)),
    'type':['pos']
})
filenames_test_pos = make_name_list({
    'data':['test'],
    'centroid_id':list(range(2)),
    'type':['pos']
})

filenames_train_neg = make_name_list({
    'data':['train'],
    'centroid_id':list(range(2)),
    'type':['neg']
})
filenames_test_neg = make_name_list({
    'data':['test'],
    'centroid_id':list(range(2)),
    'type':['neg']
})

In [12]:
X_train_pos, y_train_pos = load_xy(filenames_train_pos, True)
X_test_pos, y_test_pos = load_xy(filenames_test_pos, True)

X_train_neg, y_train_neg = load_xy(filenames_train_neg, True)
X_test_neg, y_test_neg = load_xy(filenames_test_neg, True)

X_train = np.concatenate((X_train_pos, X_train_neg), axis=1)
X_test = np.concatenate((X_test_pos, X_test_neg), axis=1)

Loading X from ../results/data/X/data=traincentroid_id=0type=pos
Loading X from ../results/data/X/data=traincentroid_id=1type=pos
Loading y from ../results/data/y/data=traincentroid_id=0type=pos
Loading X from ../results/data/X/data=testcentroid_id=0type=pos
Loading X from ../results/data/X/data=testcentroid_id=1type=pos
Loading y from ../results/data/y/data=testcentroid_id=0type=pos
Loading X from ../results/data/X/data=traincentroid_id=0type=neg
Loading X from ../results/data/X/data=traincentroid_id=1type=neg
Loading y from ../results/data/y/data=traincentroid_id=0type=neg
Loading X from ../results/data/X/data=testcentroid_id=0type=neg
Loading X from ../results/data/X/data=testcentroid_id=1type=neg
Loading y from ../results/data/y/data=testcentroid_id=0type=neg


In [13]:
print(X_train.shape)

(4000, 200)


In [14]:
clf = RandomForestClassifier()

clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

scores = metrics.accuracy_score(y_test, y_pred)

In [15]:
print(scores)

0.962


In [8]:
# (1) LOAD ORIGINALS OF BIGENOUGH DATASET AND CONVERT TO OUR FORMAT(ONLY RUN ONCE)

In [33]:
traces_train = load_traces('transformer/train')
traces_test = load_traces('transformer/test')

In [34]:
def reformat_transformer(traces):
    trace_ids = list(set(traces['label']))
    new_traces = {id: [] for id in trace_ids}
    
    for i in tqdm(range(len(traces['data']))):
        time = traces['time'][i]
        direction = traces['data'][i]
        label = traces['label'][i]
        
        new_trace = np.multiply(time, direction).astype('float64')
        new_traces[label].append(new_trace)
        
    return new_traces

In [35]:
traces_train = reformat_transformer(traces_train)
traces_test = reformat_transformer(traces_test)
traces_kfp = process_traces(traces_train, 'kfp')

100%|████████████████████████████████████| 8000/8000 [00:00<00:00, 65819.72it/s]
100%|███████████████████████████████████| 2000/2000 [00:00<00:00, 116865.53it/s]
100%|███████████████████████████████████████████| 51/51 [05:42<00:00,  6.72s/it]


In [38]:
with open("../datasets/transformer_train", 'wb') as f:
    pickle.dump(traces_train, f)

with open("../datasets/transformer_test", 'wb') as f:
    pickle.dump(traces_test, f)
    
with open("../datasets/transformer_kfp", 'wb') as f:
    pickle.dump(traces_kfp, f)