In [2]:
import csv
from sklearn.metrics import classification_report, accuracy_score


In [None]:
def load_true_labels(test_file):
    with open(test_file, newline='') as f:
        reader = csv.DictReader(f, delimiter='\t')
        return [int(row['label']) for row in reader]

def load_predicted_labels(pred_file):
    with open(pred_file, 'r') as f:
        return [int(line.strip()) for line in f if line.strip().isdigit()]

def main():

    pol_test_path = "./test_dataset.tsv"
    pol_pred_path = "./prediction.tsv"

    test_path = "datasets/cstnet-tls1.3/packet/test_dataset.tsv"
    pred_path = "datasets/cstnet-tls1.3/packet/prediction.tsv"

    y_true = load_true_labels(pol_test_path)
    y_pred = load_predicted_labels(pol_pred_path)

    assert len(y_true) == len(y_pred), "Mismatch in number of samples!"

    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("\nDetailed Report:\n")
    print(classification_report(y_true, y_pred, digits=4))


main()

In [1]:
import pandas as pd

In [3]:
df = pd.read_csv("datasets/cstnet-tls1.3/packet/test_dataset.tsv", sep="\t")
df_no_label = pd.read_csv("datasets/cstnet-tls1.3/packet/nolabel_test_dataset.tsv", sep="\t")



In [4]:
df.text_a.head()

0    cbb8 b854 5421 21c3 c3de de8d 8d32 3264 64e1 e...
1    e2b3 b3af af8a 8a62 6263 6361 61cc cc73 734a 4...
2    01bb bbfc fc8e 8e3c 3c82 8250 50de de59 59f7 f...
3    480b 0b21 211f 1f37 374d 4d2f 2fb3 b341 41e6 e...
4    01bb bb55 5529 294f 4f3f 3f77 7787 8744 4402 0...
Name: text_a, dtype: object

In [10]:
# add random bigrams to the text_a column
import random
import string




def append_k_random_bigrams(text, k=32):

    ## grab the last bye
    tokens = text.strip().split()
    last_byte = tokens[-1]


    for _ in range(k):
           #select last 2 bytes of the last byte
        last_byte = last_byte[-2:]
        # generate a random byte
        new_byte = f"{random.randint(0, 255):02x}"
        # append the new byte to the last byte
        last_byte = last_byte + new_byte

        # append the new byte to the tokens
        tokens.append(last_byte)

    return ' '.join(tokens)


df["text_a"] = df["text_a"].apply(append_k_random_bigrams)

df_no_label["text_a"] = df_no_label["text_a"].apply(append_k_random_bigrams)


In [8]:
df['text_a'] = df['text_a'].str[:140]
df_no_label['text_a'] = df_no_label['text_a'].str[:140]



In [9]:
df.text_a.head(40)

0     cbb8 b854 5421 21c3 c3de de8d 8d32 3264 64e1 e...
1     e2b3 b3af af8a 8a62 6263 6361 61cc cc73 734a 4...
2     01bb bbfc fc8e 8e3c 3c82 8250 50de de59 59f7 f...
3     480b 0b21 211f 1f37 374d 4d2f 2fb3 b341 41e6 e...
4     01bb bb55 5529 294f 4f3f 3f77 7787 8744 4402 0...
5     9a52 52ad adcb cb96 9690 90b8 b8ce ceb1 b1cf c...
6     9262 62e3 e3c1 c1f3 f3b8 b8cc cc9c 9c87 87d3 d...
7     e69e 9ead adad ad32 3242 4279 7998 981e 1ec4 c...
8     f4d5 d57f 7f05 05de de74 7409 090c 0c5f 5fa4 a...
9     ceda da4d 4da0 a0ef ef3f 3f66 6695 9567 675b 5...
10    01bb bbc0 c0a2 a2b2 b2ce ce04 04c3 c3de de5e 5...
11    c5dc dc93 93e5 e50b 0b57 57d6 d6f2 f2e4 e42f 2...
12    cad0 d0d7 d78a 8a18 180a 0ac8 c8c9 c9a7 a773 7...
13    d4b4 b476 760c 0c99 9927 2706 06e9 e96f 6fa6 a...
14    abd9 d900 00a6 a658 5835 35ad ad5f 5f28 2869 6...
15    dc42 42a4 a46d 6d81 8196 9610 10b4 b4e8 e889 8...
16    f1d2 d22a 2a3e 3e0d 0df0 f0d8 d89b 9b0f 0f92 9...
17    84bc bcaf af61 6181 816d 6dfd fd3d 3d99 99

In [5]:
df.to_csv("test_dataset_noTCP.tsv", sep='\t', index=False)
df_no_label.to_csv("nolabel_test_dataset_noTCP.tsv", sep='\t', index=False)