In [1]:
import numpy as np
import pickle
import re
import os
import random


from tqdm import tqdm
from modified_features import extract_features

In [2]:
data_folder = "../ds19/"
traces = {id: [None] * 100 for id in range(100)}
indices = {id: 0 for id in range(100)}
patterns = [re.compile("^\d{2}-\d{2}.cell"), re.compile("^\d{2}-\d{1}.cell"), re.compile("^\d{1}-\d{2}.cell"), re.compile("^\d{1}-\d{1}.cell")]

In [None]:
# process the data by converting DS-19 to our format

In [47]:
def process(filename, trace_id):
    with open(data_folder + filename) as f:
        raw_file = np.loadtxt(f)
        
    converted_file = np.asarray([row[0] * row[1] for row in raw_file])
    
    traces[trace_id][indices[trace_id]] = converted_file
    indices[trace_id] += 1

In [50]:
for filename in os.listdir(data_folder):
    # 2-digit, 2-digit
    if patterns[0].match(filename):
        process(filename, int(filename[:2]))
    # 2-digit, 1-digit
    elif patterns[1].match(filename):
        process(filename, int(filename[:2]))
    # 1-digit, 2-digit or 1-digit, 1-digit
    elif patterns[2].match(filename) or patterns[3].match(filename):
        process(filename, int(filename[0]))

with open('../ds19.npy', 'wb') as f:
    pickle.dump(traces, f)

In [57]:
# random shapelet selection

'''
shapelets = [None] * 100

for i in range(100):
    shapelets[i] = random.choice(traces[i])

with open('../results/shapelets/num=0size=0', 'wb') as f:
    pickle.dump(shapelets, f)
'''

In [4]:
# process the data by extracting k-fp features from the dataset

In [96]:
data_folder = "../ds19/"
traces = {id: [None] * 100 for id in range(100)}
indices = {id: 0 for id in range(100)}
patterns = [re.compile("^\d{2}-\d{2}.cell"), re.compile("^\d{2}-\d{1}.cell"), re.compile("^\d{1}-\d{2}.cell"), re.compile("^\d{1}-\d{1}.cell")]

In [97]:
def process_kfp(filename, trace_id):
    
    with open(data_folder + filename) as f:
        # nightmare double list comprehension
        # just loads the file where each line is a tuple (the whole file is stored as a list)
        converted_file = [tuple([float(i) for i in line.rstrip().split('\t')]) for line in f]
    
    traces[trace_id][indices[trace_id]] = converted_file
    indices[trace_id] += 1

In [98]:
# save extracted features to a file

In [99]:
for filename in tqdm(os.listdir(data_folder)):
    # 2-digit, 2-digit
    if patterns[0].match(filename):
        process_kfp(filename, int(filename[:2]))
    # 2-digit, 1-digit
    elif patterns[1].match(filename):
        process_kfp(filename, int(filename[:2]))
    # 1-digit, 2-digit or 1-digit, 1-digit
    elif patterns[2].match(filename) or patterns[3].match(filename):
        process_kfp(filename, int(filename[0]))

100%|████████████████████████████████████| 20000/20000 [00:35<00:00, 556.85it/s]


In [100]:
for trace_id, trace_list in tqdm(traces.items()):
    traces[trace_id] = [extract_features(trace) for trace in trace_list]

100%|█████████████████████████████████████████| 100/100 [05:03<00:00,  3.03s/it]


In [None]:
print(traces[0][0])
print(traces[0][50])
print(traces[50][50])
print(traces[99][99])

In [102]:
with open('../ds19_kfp.npy', 'wb') as f:
    pickle.dump(traces, f)

In [103]:
# convert our format to the same one as ds-19, then extract k-fp features

In [3]:
with open('../nonzero_traces.npy', 'rb') as f:
    traces = pickle.load(f)

In [4]:
def convert_format(trace):
    return [(abs(packet),np.sign(packet)) for packet in trace]

In [5]:
for trace_id, trace_list in tqdm(traces.items()):
    traces[trace_id] = [convert_format(trace) for trace in trace_list]

100%|█████████████████████████████████████████| 100/100 [16:51<00:00, 10.11s/it]


In [6]:
for trace_id, trace_list in tqdm(traces.items()):
    traces[trace_id] = [extract_features(trace) for trace in trace_list]

100%|█████████████████████████████████████| 100/100 [18:43:06<00:00, 673.87s/it]


In [7]:
with open('../nonzero_kfp.npy', 'wb') as f:
    pickle.dump(traces, f)