In [None]:
from google.colab import drive
drive.mount('/content/drive')

import pickle
import numpy as np

# Burst Feature Functions

def extract_bursts(timestamps, directions):
    bursts = []
    n = len(timestamps)
    dirs = np.array(directions)
    sign = np.where(dirs > 0, 1, -1)
    i = 0
    while i < n:
        start_sign = sign[i]
        start_t = timestamps[i]
        j = i
        while j + 1 < n and sign[j + 1] == start_sign:
            j += 1
        end_t = timestamps[j]
        bursts.append((start_sign, start_t, end_t))
        i = j + 1
    return bursts


def extract_burst_features_for_trace(timestamps, directions, max_first_bursts=10):
    bursts = extract_bursts(timestamps, directions)
    feat = [len(bursts)]  # number of bursts
    for sgn, start_t, end_t in bursts[:max_first_bursts]:
        feat.append(end_t - start_t)
    while len(feat) < 1 + max_first_bursts:
        feat.append(0.0)
    return np.array(feat)



Mounted at /content/drive


### Monitored Dataset

In [None]:

MON_PATH = '/content/drive/MyDrive/ML_Dataset/mon_standard.pkl'
print("Loading monitored dataset...")
with open(MON_PATH, 'rb') as f:
    mon_data = pickle.load(f)

TOTAL_URLS = 950
URL_PER_SITE = 10
USE_SUBLABEL = False

features_burst_mon = []
labels_mon = []

for i in range(TOTAL_URLS):
    label = i if USE_SUBLABEL else i // URL_PER_SITE
    for trace in mon_data[i]:
        time_seq = [abs(c) for c in trace]
        size_seq = [(1 if c > 0 else -1) * 512 for c in trace]

        # Burst feature
        burst_feat = extract_burst_features_for_trace(time_seq, size_seq, max_first_bursts=10)
        features_burst_mon.append(burst_feat)
        labels_mon.append(label)

features_burst_mon = np.array(features_burst_mon)
labels_mon = np.array(labels_mon)

print("âœ… Monitored burst feature shape:", features_burst_mon.shape)
np.save('/content/drive/MyDrive/ML_Dataset/features_burst_mon.npy', features_burst_mon)
np.save('/content/drive/MyDrive/ML_Dataset/labels_mon.npy', labels_mon)
print("ðŸ’¾ Saved monitored burst features!")



Loading monitored dataset...
âœ… Monitored burst feature shape: (19000, 11)
ðŸ’¾ Saved monitored burst features!


### Unmonitored Dataset

In [None]:

UNMON_PATH = '/content/drive/MyDrive/ML_Dataset/unmon_standard10_3000.pkl'
print("Loading unmonitored dataset...")
with open(UNMON_PATH, 'rb') as f:
    unmon_data = pickle.load(f)

features_burst_unmon = []
labels_unmon = []

for trace in unmon_data:
    time_seq = [abs(c) for c in trace]
    size_seq = [(1 if c > 0 else -1) * 512 for c in trace]

    burst_feat = extract_burst_features_for_trace(time_seq, size_seq, max_first_bursts=10)
    features_burst_unmon.append(burst_feat)
    labels_unmon.append(-1)

features_burst_unmon = np.array(features_burst_unmon)
labels_unmon = np.array(labels_unmon)

print("âœ… Unmonitored burst feature shape:", features_burst_unmon.shape)
np.save('/content/drive/MyDrive/ML_Dataset/features_burst_unmon.npy', features_burst_unmon)
np.save('/content/drive/MyDrive/ML_Dataset/labels_unmon.npy', labels_unmon)
print("ðŸ’¾ Saved unmonitored burst features!")


Loading unmonitored dataset...
âœ… Unmonitored burst feature shape: (3000, 11)
ðŸ’¾ Saved unmonitored burst features!
