In [None]:

from google.colab import drive
drive.mount('/content/drive')

import pickle
import numpy as np

# Feature Extraction Function
def extract_cumulative_features(packet_sequence, max_points=10):
    """
    Extract cumulative packet size sequence features.
    - packet_sequence: list of packet directions Ã— size (Â±512)
    - max_points: number of points to sample from cumulative sequence
    """
    if len(packet_sequence) == 0:
        return np.zeros(max_points)

    # cumulative sum
    cumulative = np.cumsum(packet_sequence)

    # normalize cumulative size for stability (optional but helps)
    cumulative = cumulative / (abs(cumulative).max() + 1e-6)

    # sample fixed number of points from cumulative sequence
    indices = np.linspace(0, len(cumulative) - 1, max_points).astype(int)
    sampled = cumulative[indices]
    return sampled


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Monitored Dataset

In [None]:
MON_PATH = '/content/drive/MyDrive/ML_Dataset/mon_standard.pkl'
print("Loading monitored dataset...")
with open(MON_PATH, 'rb') as f:
    mon_data = pickle.load(f)

TOTAL_URLS = 950     # total URLs
URL_PER_SITE = 10
USE_SUBLABEL = False

X1_mon, X2_mon, y_mon = [], [], []
features_cum_mon = []

for i in range(TOTAL_URLS):
    label = i if USE_SUBLABEL else i // URL_PER_SITE
    for trace in mon_data[i]:
        size_seq = [ (1 if c > 0 else -1) * 512 for c in trace ]
        time_seq = [ abs(c) for c in trace ]

        X1_mon.append(time_seq)
        X2_mon.append(size_seq)
        y_mon.append(label)

        # cumulative feature
        cum_feat = extract_cumulative_features(size_seq, max_points=10)
        features_cum_mon.append(cum_feat)

features_cum_mon = np.array(features_cum_mon)
print("âœ… Monitored cumulative feature shape:", features_cum_mon.shape)
print("âœ… Monitored labels shape:", len(y_mon))



Loading monitored dataset...
âœ… Monitored cumulative feature shape: (19000, 10)
âœ… Monitored labels shape: 19000


### Unmonitored Dataset

In [None]:
UNMON_PATH = '/content/drive/MyDrive/ML_Dataset/unmon_standard10_3000.pkl'
print("Loading unmonitored dataset...")
with open(UNMON_PATH, 'rb') as f:
    unmon_data = pickle.load(f)

X1_unmon, X2_unmon = [], []
features_cum_unmon = []

for trace in unmon_data:
    size_seq = [ (1 if c > 0 else -1) * 512 for c in trace ]
    time_seq = [ abs(c) for c in trace ]

    X1_unmon.append(time_seq)
    X2_unmon.append(size_seq)

    cum_feat = extract_cumulative_features(size_seq, max_points=10)
    features_cum_unmon.append(cum_feat)

features_cum_unmon = np.array(features_cum_unmon)
labels_unmon = np.full(len(X1_unmon), -1)  # label -1 for unmonitored
print("âœ… Unmonitored cumulative feature shape:", features_cum_unmon.shape)
print("âœ… Unmonitored labels shape:", labels_unmon.shape)


Loading unmonitored dataset...
âœ… Unmonitored cumulative feature shape: (3000, 10)
âœ… Unmonitored labels shape: (3000,)


### Save Extracted Features

In [None]:
np.save('/content/drive/MyDrive/ML_Dataset/features_cum_mon.npy', features_cum_mon)
np.save('/content/drive/MyDrive/ML_Dataset/features_cum_unmon.npy', features_cum_unmon)
print("ðŸ’¾ Saved cumulative features to Drive!")

ðŸ’¾ Saved cumulative features to Drive!
