In [None]:

from google.colab import drive
drive.mount('/content/drive')

import pickle
import numpy as np

# In/Out Feature Function

def extract_inout_features(packet_sequence):
    incoming_count = sum(1 for c in packet_sequence if c < 0)
    outgoing_count = sum(1 for c in packet_sequence if c > 0)
    total_packets = len(packet_sequence)
    incoming_fraction = incoming_count / total_packets if total_packets > 0 else 0
    outgoing_fraction = outgoing_count / total_packets if total_packets > 0 else 0
    return incoming_count, outgoing_count, incoming_fraction, outgoing_fraction




Mounted at /content/drive


### Monitored Dataset

In [None]:

MON_PATH = '/content/drive/MyDrive/ML_Dataset/mon_standard.pkl'
print("Loading monitored dataset...")
with open(MON_PATH, 'rb') as f:
    mon_data = pickle.load(f)

TOTAL_URLS = 950
URL_PER_SITE = 10
USE_SUBLABEL = False

features_inout_mon = []
labels_mon = []

for i in range(TOTAL_URLS):
    label = i if USE_SUBLABEL else i // URL_PER_SITE
    for trace in mon_data[i]:
        size_seq = [(1 if c > 0 else -1) * 512 for c in trace]

        # In/Out feature
        in_count, out_count, in_frac, out_frac = extract_inout_features(size_seq)
        features_inout_mon.append([in_count, out_count, in_frac, out_frac])
        labels_mon.append(label)

features_inout_mon = np.array(features_inout_mon)
labels_mon = np.array(labels_mon)

print("âœ… Monitored in/out feature shape:", features_inout_mon.shape)
np.save('/content/drive/MyDrive/ML_Dataset/features_inout_mon.npy', features_inout_mon)
np.save('/content/drive/MyDrive/ML_Dataset/labels_mon.npy', labels_mon)
print("ðŸ’¾ Saved monitored in/out features!")



Loading monitored dataset...
âœ… Monitored in/out feature shape: (19000, 4)
ðŸ’¾ Saved monitored in/out features!


### Unmonitored Dataset

In [None]:

UNMON_PATH = '/content/drive/MyDrive/ML_Dataset/unmon_standard10_3000.pkl'
print("Loading unmonitored dataset...")
with open(UNMON_PATH, 'rb') as f:
    unmon_data = pickle.load(f)

features_inout_unmon = []
labels_unmon = []

for trace in unmon_data:
    size_seq = [(1 if c > 0 else -1) * 512 for c in trace]
    in_count, out_count, in_frac, out_frac = extract_inout_features(size_seq)
    features_inout_unmon.append([in_count, out_count, in_frac, out_frac])
    labels_unmon.append(-1)

features_inout_unmon = np.array(features_inout_unmon)
labels_unmon = np.array(labels_unmon)

print("âœ… Unmonitored in/out feature shape:", features_inout_unmon.shape)
np.save('/content/drive/MyDrive/ML_Dataset/features_inout_unmon.npy', features_inout_unmon)
np.save('/content/drive/MyDrive/ML_Dataset/labels_unmon.npy', labels_unmon)
print("ðŸ’¾ Saved unmonitored in/out features!")

Loading unmonitored dataset...
âœ… Unmonitored in/out feature shape: (3000, 4)
ðŸ’¾ Saved unmonitored in/out features!
