In [90]:
import numpy as np
import pickle
import re
import os
import random


from tqdm import tqdm
from modified_features import extract_features

In [None]:
# SECTION 1
# process the data by converting DS-19 to our format

In [2]:
data_folder = "../ds19/"
traces = {id: [None] * 100 for id in range(100)}
indices = {id: 0 for id in range(100)}
patterns = [re.compile("^\d{2}-\d{2}.cell"), re.compile("^\d{2}-\d{1}.cell"), re.compile("^\d{1}-\d{2}.cell"), re.compile("^\d{1}-\d{1}.cell")]

In [47]:
def process(filename, trace_id):
    with open(data_folder + filename) as f:
        raw_file = np.loadtxt(f)
        
    converted_file = np.asarray([row[0] * row[1] for row in raw_file])
    
    traces[trace_id][indices[trace_id]] = converted_file
    indices[trace_id] += 1

In [50]:
for filename in os.listdir(data_folder):
    # 2-digit, 2-digit
    if patterns[0].match(filename):
        process(filename, int(filename[:2]))
    # 2-digit, 1-digit
    elif patterns[1].match(filename):
        process(filename, int(filename[:2]))
    # 1-digit, 2-digit or 1-digit, 1-digit
    elif patterns[2].match(filename) or patterns[3].match(filename):
        process(filename, int(filename[0]))

with open('../ds19.npy', 'wb') as f:
    pickle.dump(traces, f)

In [2]:
# SECTION 2
# extract randomly selected shapelets from dataset

In [88]:
with open("../datasets/ds19_ipt.npy", 'rb') as f:
    traces = pickle.load(f)

In [89]:
# random shapelet selection

for j in range(0, 6):

    shapelets = [None] * 100

    for i in range(100):
        shapelets[i] = random.choice(traces[i])

    with open('../results/shapelets/num=' + str(j), 'wb') as f:
        pickle.dump(shapelets, f)

In [4]:
# SECTION 3
# process the data by extracting k-fp features from the dataset

In [96]:
data_folder = "../ds19/"
traces = {id: [None] * 100 for id in range(100)}
indices = {id: 0 for id in range(100)}
patterns = [re.compile("^\d{2}-\d{2}.cell"), re.compile("^\d{2}-\d{1}.cell"), re.compile("^\d{1}-\d{2}.cell"), re.compile("^\d{1}-\d{1}.cell")]

In [97]:
def process_kfp(filename, trace_id):
    
    with open(data_folder + filename) as f:
        # nightmare double list comprehension
        # just loads the file where each line is a tuple (the whole file is stored as a list)
        converted_file = [tuple([float(i) for i in line.rstrip().split('\t')]) for line in f]
    
    traces[trace_id][indices[trace_id]] = converted_file
    indices[trace_id] += 1

In [99]:
for filename in tqdm(os.listdir(data_folder)):
    # 2-digit, 2-digit
    if patterns[0].match(filename):
        process_kfp(filename, int(filename[:2]))
    # 2-digit, 1-digit
    elif patterns[1].match(filename):
        process_kfp(filename, int(filename[:2]))
    # 1-digit, 2-digit or 1-digit, 1-digit
    elif patterns[2].match(filename) or patterns[3].match(filename):
        process_kfp(filename, int(filename[0]))

100%|████████████████████████████████████| 20000/20000 [00:35<00:00, 556.85it/s]


In [100]:
for trace_id, trace_list in tqdm(traces.items()):
    traces[trace_id] = [extract_features(trace) for trace in trace_list]
    
with open('../ds19_kfp.npy', 'wb') as f:
    pickle.dump(traces, f)

100%|█████████████████████████████████████████| 100/100 [05:03<00:00,  3.03s/it]


In [103]:
# SECTION 4
# convert our format (from 450k dataset) to the same one as ds-19, then extract k-fp features

In [3]:
with open('../nonzero_traces.npy', 'rb') as f:
    traces = pickle.load(f)

In [4]:
def convert_format(trace):
    return [(abs(packet),np.sign(packet)) for packet in trace]

In [5]:
for trace_id, trace_list in tqdm(traces.items()):
    traces[trace_id] = [convert_format(trace) for trace in trace_list]
    
for trace_id, trace_list in tqdm(traces.items()):
    traces[trace_id] = [extract_features(trace) for trace in trace_list]

100%|█████████████████████████████████████████| 100/100 [16:51<00:00, 10.11s/it]


In [7]:
with open('../nonzero_kfp.npy', 'wb') as f:
    pickle.dump(traces, f)

In [2]:
# SECTION 5
# save traces with low average distances to file for later

In [23]:
filenames = ["num=0","num=1","num=2","num=3","num=4","num=5"]

for name in filenames:
    with open('../results/data/X/' + name, 'rb') as f:
        X = pickle.load(f)
    with open('../results/data/y/' + name, 'rb') as f:
        y = pickle.load(f)
    
    average_distances = [np.mean(X[i:i+100]) for i in range(0,10000,100)]
    distance_ids = np.argsort(average_distances)
    
    with open("../results/data/distances/" + name + "min=1", 'wb') as f:
        pickle.dump(distance_ids[:50], f)
    with open("../results/data/distances/" + name + "min=0", 'wb') as f:
        pickle.dump(distance_ids[50:], f)

In [22]:
# SECTION 6

# convert ds19 to inter-packet timing

with open("../datasets/ds19.npy", 'rb') as f:
    traces = pickle.load(f)

In [23]:
print(len(traces))
print(len(traces[0]))
print(len(traces[0][0]))

100
100
1214


In [24]:
def convert_to_ipt(trace):
    processed_trace = np.zeros(len(trace), dtype=np.float32)
    signs = np.sign(trace)
    for i, x in enumerate(trace):
        distance = abs(x) - abs(trace[i-1])
        value = signs[i] * distance
        processed_trace[i] = value
    
    return processed_trace

In [17]:
for trace_id, trace_list in traces.items():
    traces[trace_id] = [convert_to_ipt(trace) for trace in trace_list]

In [20]:
with open("../datasets/ds19_ipt.npy", 'wb') as f:
    pickle.dump(traces, f)

In [None]:
# testing

In [91]:
with open("../datasets/ds19.npy", 'rb') as f:
    traces = pickle.load(f)

In [92]:
def merge_traces(traces, overlap_factor):
    signs = [np.sign(trace) for trace in traces]

    overlap_val_1 = abs(traces[0][round(len(traces[0]) * (1-overlap_factor))])
    overlap_val_2 = abs(traces[1][round(len(traces[1]) * (1-overlap_factor))])
    
    traces[1] = [(abs(traces[1][i]) + overlap_val_1) * signs[1][i] for i in range(len(traces[1]))]
    traces[2] = [(abs(traces[2][i]) + overlap_val_1 + overlap_val_2) * signs[2][i] for i in range(len(traces[2]))]
    
    combined = np.concatenate((traces[0], traces[1], traces[2]))
    
    return sorted(combined, key=abs)