In [1]:
from pipelinetools import *
import stumpy
import numpy as np
from multiprocessing import Pool

import sklearn.metrics as metrics
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

In [8]:
# (1) LOADING AND PRE-PROCESSING OF TRACE FILES

In [9]:
traces = load_traces('ds19.npy')
traces_kfp = load_traces('ds19_kfp.npy')

In [10]:
print(len(traces))
print(len(traces[4]))
print(len(traces[4][0]))
demo_trace = traces[4][0]
print(demo_trace)

In [11]:
# remove zeros from all traces
demo_traces = process_traces(traces,'z')
print(demo_traces[4][0])

# only negative-valued packets
demo_traces = process_traces(traces,'n')
print(demo_traces[4][0])

# only positive-valued packets
demo_traces = process_traces(traces,'p')
print(demo_traces[4][0])

# inter-packet timing
demo_traces = process_traces(traces,'ipt')
print(demo_traces[4][0])

# k-fingerprinting features
demo_traces = process_traces(traces,'kfp')
print(demo_traces[4][0])

In [12]:
# (2) GENERATING, SAVING AND LOADING SHAPELETS TO/FROM FILES

In [13]:
print(len(traces[10][0])) # sample trace from class #10

# generating shapelets randomly and saving to file
shapelet_filenames = make_name_list({'num':list(range(4))})
shapelets = generate_random_shapelets(traces, 4)
print(len(shapelets[0][10]))
save_shapelets(shapelets, shapelet_filenames)

# loading from file
shapelets = load_shapelets(shapelet_filenames)
print(len(shapelets[0][10]))

# generating shapelets as cluster centers and saving to file
shapelet_filenames = make_name_list({'num':[0], 'centroid_id':list(range(3))})
shapelets = generate_cluster_shapelets(traces, traces_kfp, 3)
print(len(shapelets[0][10]))
save_shapelets(shapelets, shapelet_filenames)

# loading from file
shapelets = load_shapelets(shapelet_filenames)
print(len(shapelets[0][10]))

In [None]:
# # (3) SINGLE-THREAD PIPELINE

X, y = traces_to_xy(traces)

print(len(X))
print(len(y))

shapelets  = load_shapelets('num=0')
print(len(shapelets))

# "custom" distance between shapelet and trace function
def stumpy_distance(shapelet, trace):
    try:
        distance = stumpy.mass(shapelet, trace)
    except ValueError:
        distance = stumpy.mass(trace, shapelet)
    
    return distance.min()

X = compute_shapelet_distances(X, shapelets, stumpy_distance)

clf = RandomForestClassifier()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

scores = metrics.accuracy_score(y_test, y_pred)

print(scores)

In [2]:
# (4) MULTI-THREAD PIPELINE

In [4]:
def _stumpy_distance(shapelet, trace):
    try:
        distance = stumpy.mass(shapelet, trace)
    except ValueError:
        distance = stumpy.mass(trace, shapelet)
    
    return distance.min()

shapelet_filenames = make_name_list({'num':list(range(4))})
shapelets = load_shapelets(shapelet_filenames)

parameter_list = [] 
for i in range(len(shapelet_filenames)):
    X, y = traces_to_xy(traces)
    
    parameter_set = [
        shapelet_filenames[i],
        X,
        y,
        shapelets[i],
        "stumpy"
    ]
    
    parameter_list.append(parameter_set)
    
print(len(parameter_list))

# unfortunately, multiprocessing is annoying (as always)

# multiprocessing does not allow for functions to be passed as arguments
# We need to copy-paste the distance function into compute_shapelet_distances_mp for stuff to work

# multiprocessing does not allow for functions to return any information
# the results of the shapelet distance computations will be automatically saved into x and y files

with Pool(4) as p:
    p.map(compute_shapelet_distances_mp, parameter_list)
    
X, y = load_xy(shapelet_filenames, True)

X.shape