# **GPT Settings**

In [103]:
!pip install openai==0.28



In [104]:
import openai
import os

# **Extract Handshake Output**

In [105]:
with open('/content/handshake_hex.txt', 'r') as f:  # Adjust path if needed
    hex_dump = f.read()

**Producing clean hex:**

In [106]:
import re
# Extract hex bytes using regex
hex_lines = []
for line in hex_dump.strip().split('\n'):
    # Extract only valid 2-digit hex groups
    hex_bytes = re.findall(r'\b[0-9a-fA-F]{2}\b', line)
    if hex_bytes:
        hex_lines.append(' '.join(hex_bytes))

# Join all hex lines into one full hex stream
clean_hex_output = '\n'.join(hex_lines)
print(clean_hex_output)

02 00 00 00 45 00 00 34 6d 36 40 00 80 06 00 00
7f 00 00 01 7f 00 00 01 ca 3b 30 39 38 25 3e 79
00 00 00 00 80 02 ff ff 05 d7 00 00 02 04 ff d7
01 03 03 08 01 01 04 02
02 00 00 00 45 00 00 34 6d 37 40 00 80 06 00 00
7f 00 00 01 7f 00 00 01 30 39 ca 3b 57 dc 1f 22 09
38 25 3e 7a 80 12 ff ff 8e c7 00 00 02 04 ff d7
01 03 03 08 01 01 04 02
02 00 00 00 45 00 00 28 6d 38 40 00 80 06 00 00
7f 00 00 01 7f 00 00 01 ca 3b 30 39 38 25 3e 7a
57 dc 1f 23 50 10 00 ff c8 bf 00 00
02 00 00 00 45 00 00 35 6d 39 40 00 80 06 00 00
7f 00 00 01 7f 00 00 01 ca 3b 30 39 38 25 3e 7a
57 dc 1f 23 50 18 00 ff 58 6a 00 00 48 65 6c 6c
6f 20 53 65 72 76 65 72 21
02 00 00 00 45 00 00 28 6d 3a 40 00 80 06 00 00
7f 00 00 01 7f 00 00 01 30 39 ca 3b 57 dc 1f 23 09
38 25 3e 87 50 10 00 ff c8 b2 00 00
02 00 00 00 45 00 00 35 6d 3b 40 00 80 06 00 00
7f 00 00 01 7f 00 00 01 30 39 ca 3b 57 dc 1f 23 09
38 25 3e 87 50 18 00 ff 68 65 00 00 48 65 6c 6c
6f 20 43 6c 69 65 6e 74 21
02 00 00 00 45 00 00 28 6d 3c 40 00 80 06 00 00
7

**Getting rid of spaces from HEX data**

In [107]:
import pandas as pd
from google.colab import files

# 1. Paste your full hex dump inside the triple quotes
raw_hex = clean_hex_output

# 2. Clean lines and split into packets
lines = [l.strip() for l in raw_hex.splitlines() if l.strip()]

packets_with_spaces = []
current = []

for line in lines:
    # New packet starts when we see the loopback+IPv4 header pattern
    if line.startswith("02 00 00 00 45 00"):
        if current:
            packets_with_spaces.append(" ".join(current))
            current = []
    current.append(line)

# Add the last packet
if current:
    packets_with_spaces.append(" ".join(current))

# 3. Also create a version with no spaces (continuous hex string)
packets_clean = [p.replace(" ", "") for p in packets_with_spaces]

# 4. Build DataFrame
df = pd.DataFrame({
    "packet_id": range(1, len(packets_clean) + 1),
    "hex_with_spaces": packets_with_spaces,
    "hex_clean": packets_clean,
})

# 5. Save to CSV in Colab
csv_name = "tcp_packets_hex.csv"
df.to_csv(csv_name, index=False)
print(f"Saved {len(df)} packets to {csv_name}")

# 6. Offer CSV for download
files.download(csv_name)


Saved 221 packets to tcp_packets_hex.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

**Loading Wireshark (non hex) output from CMD**

In [108]:
import pandas as pd

col_names = [
    "packet_id",
    "src_port",
    "dst_port",
    "seq",
    "ack",
    "tcp_hdr_len",
    "tcp_flags",
    "tcp_flag_syn",
    "tcp_flag_ack",
    "window_size",
    "checksum",
    "urgent_pointer",
    "tcp_options",
    "tcp_payload",
]

df = pd.read_csv("/content/handshake_packets.txt",
                 header=None,
                 names=col_names)

df.head()


Unnamed: 0,packet_id,src_port,dst_port,seq,ack,tcp_hdr_len,tcp_flags,tcp_flag_syn,tcp_flag_ack,window_size,checksum,urgent_pointer,tcp_options,tcp_payload
0,1,56039,12345,0,0,32,0x0002,True,False,65535,0x953f,0,0204ffd70103030801010402,
1,2,12345,56039,0,1,32,0x0012,True,True,65535,0x6c5e,0,0204ffd70103030801010402,
2,3,56039,12345,1,1,20,0x0010,False,True,255,0xa656,0,,
3,4,56039,12345,1,1,20,0x0018,False,True,255,0x3601,0,,48656c6c6f2053657276657221
4,5,12345,56039,1,14,20,0x0010,False,True,255,0xa649,0,,


**Decoding each TCP flag from flags column**

In [109]:
def decode_flags(flags_hex):
    # flags_hex like '0x0018'
    flags = int(str(flags_hex), 16)
    return pd.Series({
        "flag_FIN": bool(flags & 0x01),
        "flag_SYN": bool(flags & 0x02),
        "flag_RST": bool(flags & 0x04),
        "flag_PSH": bool(flags & 0x08),
        "flag_ACK": bool(flags & 0x10),
        "flag_URG": bool(flags & 0x20),
        "flag_ECE": bool(flags & 0x40),
        "flag_CWR": bool(flags & 0x80),
    })

df_flags = df["tcp_flags"].apply(decode_flags)
df = pd.concat([df, df_flags], axis=1)

df.head()


Unnamed: 0,packet_id,src_port,dst_port,seq,ack,tcp_hdr_len,tcp_flags,tcp_flag_syn,tcp_flag_ack,window_size,...,tcp_options,tcp_payload,flag_FIN,flag_SYN,flag_RST,flag_PSH,flag_ACK,flag_URG,flag_ECE,flag_CWR
0,1,56039,12345,0,0,32,0x0002,True,False,65535,...,0204ffd70103030801010402,,False,True,False,False,False,False,False,False
1,2,12345,56039,0,1,32,0x0012,True,True,65535,...,0204ffd70103030801010402,,False,True,False,False,True,False,False,False
2,3,56039,12345,1,1,20,0x0010,False,True,255,...,,,False,False,False,False,True,False,False,False
3,4,56039,12345,1,1,20,0x0018,False,True,255,...,,48656c6c6f2053657276657221,False,False,False,True,True,False,False,False
4,5,12345,56039,1,14,20,0x0010,False,True,255,...,,,False,False,False,False,True,False,False,False


**Categorizing PAcket Types**

In [110]:
def classify_pkt(row):
    syn = row["flag_SYN"]
    ack = row["flag_ACK"]
    fin = row["flag_FIN"]
    rst = row["flag_RST"]

    if syn and not ack:
        return "SYN"
    if syn and ack:
        return "SYN-ACK"
    if fin and ack:
        return "FIN-ACK"
    if fin and not ack:
        return "FIN"
    if rst:
        return "RST"
    if ack:
        return "ACK"
    return "OTHER"

df["pkt_type"] = df.apply(classify_pkt, axis=1)

df[["packet_id", "src_port", "dst_port", "tcp_flags", "pkt_type"]].head(20)


Unnamed: 0,packet_id,src_port,dst_port,tcp_flags,pkt_type
0,1,56039,12345,0x0002,SYN
1,2,12345,56039,0x0012,SYN-ACK
2,3,56039,12345,0x0010,ACK
3,4,56039,12345,0x0018,ACK
4,5,12345,56039,0x0010,ACK
5,6,12345,56039,0x0018,ACK
6,7,56039,12345,0x0010,ACK
7,8,12345,56039,0x0011,FIN-ACK
8,9,56039,12345,0x0010,ACK
9,10,56039,12345,0x0011,FIN-ACK


**Merging Two dataframes for capturing all TCP header fields with Values**

In [111]:
df_hex = pd.read_csv("/content/tcp_packets_hex.csv")  # packet_id, hex_with_spaces, hex_clean

df_full = df_hex.merge(df, on="packet_id", how="left")

df_full.to_csv("/content/tcp_handshake_full_tcp_headers.csv", index=False)


**Adding Flow Connections**

In [112]:
import pandas as pd

df = pd.read_csv("/content/tcp_handshake_full_tcp_headers.csv")

# loopback, so we can define a flow just by ports (smallest-biggest)
df["flow_id"] = df.apply(
    lambda r: f"{min(r.src_port, r.dst_port)}-{max(r.src_port, r.dst_port)}",
    axis=1
)
df.head()


Unnamed: 0,packet_id,hex_with_spaces,hex_clean,src_port,dst_port,seq,ack,tcp_hdr_len,tcp_flags,tcp_flag_syn,...,flag_FIN,flag_SYN,flag_RST,flag_PSH,flag_ACK,flag_URG,flag_ECE,flag_CWR,pkt_type,flow_id
0,1,02 00 00 00 45 00 00 34 6d 36 40 00 80 06 00 0...,02000000450000346d364000800600007f0000017f0000...,56039,12345,0,0,32,0x0002,True,...,False,True,False,False,False,False,False,False,SYN,12345-56039
1,2,02 00 00 00 45 00 00 34 6d 37 40 00 80 06 00 0...,02000000450000346d374000800600007f0000017f0000...,12345,56039,0,1,32,0x0012,True,...,False,True,False,False,True,False,False,False,SYN-ACK,12345-56039
2,3,02 00 00 00 45 00 00 28 6d 38 40 00 80 06 00 0...,02000000450000286d384000800600007f0000017f0000...,56039,12345,1,1,20,0x0010,False,...,False,False,False,False,True,False,False,False,ACK,12345-56039
3,4,02 00 00 00 45 00 00 35 6d 39 40 00 80 06 00 0...,02000000450000356d394000800600007f0000017f0000...,56039,12345,1,1,20,0x0018,False,...,False,False,False,True,True,False,False,False,ACK,12345-56039
4,5,02 00 00 00 45 00 00 28 6d 3a 40 00 80 06 00 0...,02000000450000286d3a4000800600007f0000017f0000...,12345,56039,1,14,20,0x0010,False,...,False,False,False,False,True,False,False,False,ACK,12345-56039


**Getting individual handshake outputs**

In [113]:
handshakes = []

for flow_id, g in df.groupby("flow_id"):
    g = g.sort_values("packet_id")

    syn = g[g["pkt_type"] == "SYN"].head(1)
    if syn.empty:
        continue

    syn_pkt = syn.iloc[0]

    syn_ack = g[(g["pkt_type"] == "SYN-ACK") &
                (g["packet_id"] > syn_pkt["packet_id"])].head(1)
    if syn_ack.empty:
        continue
    syn_ack_pkt = syn_ack.iloc[0]

    ack = g[(g["pkt_type"] == "ACK") &
            (g["packet_id"] > syn_ack_pkt["packet_id"])].head(1)
    if ack.empty:
        continue
    ack_pkt = ack.iloc[0]

    handshakes.append({
        "flow_id": flow_id,
        "syn_id": int(syn_pkt["packet_id"]),
        "syn_ack_id": int(syn_ack_pkt["packet_id"]),
        "ack_id": int(ack_pkt["packet_id"]),
    })

df_hs = pd.DataFrame(handshakes)
df_hs.head()


Unnamed: 0,flow_id,syn_id,syn_ack_id,ack_id
0,12345-56039,1,2,3
1,12345-56040,12,13,14
2,12345-56041,23,24,25
3,12345-56042,33,34,35
4,12345-56043,44,45,46


In [114]:
import pandas as pd

# All packets with headers + pkt_type + hex
df = pd.read_csv("/content/tcp_handshake_full_tcp_headers.csv")

# The handshake summary you just printed
df_hs = pd.DataFrame(handshakes)
  # or re-create from earlier code if needed
print(df_hs.head())


       flow_id  syn_id  syn_ack_id  ack_id
0  12345-56039       1           2       3
1  12345-56040      12          13      14
2  12345-56041      23          24      25
3  12345-56042      33          34      35
4  12345-56043      44          45      46


#  **1. Ground Truth**

In [115]:
# We'll grab only the fields we need for now
cols_keep = [
    "packet_id",
    "hex_clean",
    "pkt_type",
    "seq",
    "ack",
    "window_size",
    "tcp_flags",
]

df_small = df[cols_keep].set_index("packet_id")

handshake_rows = []

for _, row in df_hs.iterrows():
    syn     = df_small.loc[row["syn_id"]]
    synack  = df_small.loc[row["syn_ack_id"]]
    ackpkt  = df_small.loc[row["ack_id"]]

    handshake_rows.append({
        "flow_id":   row["flow_id"],

        "syn_id":    int(row["syn_id"]),
        "syn_type":  syn["pkt_type"],
        "syn_seq":   syn["seq"],
        "syn_ack":   syn["ack"],
        "syn_wss":   syn["window_size"],
        "syn_flags": syn["tcp_flags"],
        "syn_hex":   syn["hex_clean"],

        "synack_id":    int(row["syn_ack_id"]),
        "synack_type":  synack["pkt_type"],
        "synack_seq":   synack["seq"],
        "synack_ack":   synack["ack"],
        "synack_wss":   synack["window_size"],
        "synack_flags": synack["tcp_flags"],
        "synack_hex":   synack["hex_clean"],

        "ack_id":    int(row["ack_id"]),
        "ack_type":  ackpkt["pkt_type"],
        "ack_seq":   ackpkt["seq"],
        "ack_ack":   ackpkt["ack"],
        "ack_wss":   ackpkt["window_size"],
        "ack_flags": ackpkt["tcp_flags"],
        "ack_hex":   ackpkt["hex_clean"],
    })

df_handshake = pd.DataFrame(handshake_rows)
df_handshake.head()


Unnamed: 0,flow_id,syn_id,syn_type,syn_seq,syn_ack,syn_wss,syn_flags,syn_hex,synack_id,synack_type,...,synack_wss,synack_flags,synack_hex,ack_id,ack_type,ack_seq,ack_ack,ack_wss,ack_flags,ack_hex
0,12345-56039,1,SYN,0,0,65535,0x0002,02000000450000346d364000800600007f0000017f0000...,2,SYN-ACK,...,65535,0x0012,02000000450000346d374000800600007f0000017f0000...,3,ACK,1,1,255,0x0010,02000000450000286d384000800600007f0000017f0000...
1,12345-56040,12,SYN,0,0,65535,0x0002,02000000450000346d414000800600007f0000017f0000...,13,SYN-ACK,...,65535,0x0012,02000000450000346d424000800600007f0000017f0000...,14,ACK,1,1,255,0x0010,02000000450000286d434000800600007f0000017f0000...
2,12345-56041,23,SYN,0,0,65535,0x0002,02000000450000346d4c4000800600007f0000017f0000...,24,SYN-ACK,...,65535,0x0012,02000000450000346d4d4000800600007f0000017f0000...,25,ACK,1,1,255,0x0010,02000000450000286d4e4000800600007f0000017f0000...
3,12345-56042,33,SYN,0,0,65535,0x0002,02000000450000286d564000800600007f0000017f0000...,34,SYN-ACK,...,65535,0x0012,02000000450000346d574000800600007f0000017f0000...,35,ACK,1,1,255,0x0010,02000000450000346d584000800600007f0000017f0000...
4,12345-56043,44,SYN,0,0,65535,0x0002,02000000450000286d614000800600007f0000017f0000...,45,SYN-ACK,...,65535,0x0012,02000000450000346d624000800600007f0000017f0000...,46,ACK,1,1,255,0x0010,02000000450000346d634000800600007f0000017f0000...


In [116]:
df_handshake.to_csv("/content/tcp_handshakes_ground_truth.csv", index=False)
print("Saved tcp_handshakes_ground_truth.csv")


Saved tcp_handshakes_ground_truth.csv


In [117]:
import pandas as pd
import random

df_handshake = pd.read_csv("/content/tcp_handshakes_ground_truth.csv")
df_handshake.head()


Unnamed: 0,flow_id,syn_id,syn_type,syn_seq,syn_ack,syn_wss,syn_flags,syn_hex,synack_id,synack_type,...,synack_wss,synack_flags,synack_hex,ack_id,ack_type,ack_seq,ack_ack,ack_wss,ack_flags,ack_hex
0,12345-56039,1,SYN,0,0,65535,0x0002,02000000450000346d364000800600007f0000017f0000...,2,SYN-ACK,...,65535,0x0012,02000000450000346d374000800600007f0000017f0000...,3,ACK,1,1,255,0x0010,02000000450000286d384000800600007f0000017f0000...
1,12345-56040,12,SYN,0,0,65535,0x0002,02000000450000346d414000800600007f0000017f0000...,13,SYN-ACK,...,65535,0x0012,02000000450000346d424000800600007f0000017f0000...,14,ACK,1,1,255,0x0010,02000000450000286d434000800600007f0000017f0000...
2,12345-56041,23,SYN,0,0,65535,0x0002,02000000450000346d4c4000800600007f0000017f0000...,24,SYN-ACK,...,65535,0x0012,02000000450000346d4d4000800600007f0000017f0000...,25,ACK,1,1,255,0x0010,02000000450000286d4e4000800600007f0000017f0000...
3,12345-56042,33,SYN,0,0,65535,0x0002,02000000450000286d564000800600007f0000017f0000...,34,SYN-ACK,...,65535,0x0012,02000000450000346d574000800600007f0000017f0000...,35,ACK,1,1,255,0x0010,02000000450000346d584000800600007f0000017f0000...
4,12345-56043,44,SYN,0,0,65535,0x0002,02000000450000286d614000800600007f0000017f0000...,45,SYN-ACK,...,65535,0x0012,02000000450000346d624000800600007f0000017f0000...,46,ACK,1,1,255,0x0010,02000000450000346d634000800600007f0000017f0000...


# **2. Fuzzing**

**Initial Seed**

In [118]:
cols_for_seed = [
    "flow_id",

    "syn_hex",
    "syn_type",
    "syn_seq",
    "syn_ack",
    "syn_wss",
    "syn_flags",

    "synack_hex",
    "synack_type",
    "synack_seq",
    "synack_ack",
    "synack_wss",
    "synack_flags",

    "ack_hex",      # ground truth output for LLM
    "ack_type",
    "ack_seq",
    "ack_ack",
    "ack_wss",
    "ack_flags",
]

df_task2 = df_handshake[cols_for_seed].copy()
df_task2.head()


Unnamed: 0,flow_id,syn_hex,syn_type,syn_seq,syn_ack,syn_wss,syn_flags,synack_hex,synack_type,synack_seq,synack_ack,synack_wss,synack_flags,ack_hex,ack_type,ack_seq,ack_ack,ack_wss,ack_flags
0,12345-56039,02000000450000346d364000800600007f0000017f0000...,SYN,0,0,65535,0x0002,02000000450000346d374000800600007f0000017f0000...,SYN-ACK,0,1,65535,0x0012,02000000450000286d384000800600007f0000017f0000...,ACK,1,1,255,0x0010
1,12345-56040,02000000450000346d414000800600007f0000017f0000...,SYN,0,0,65535,0x0002,02000000450000346d424000800600007f0000017f0000...,SYN-ACK,0,1,65535,0x0012,02000000450000286d434000800600007f0000017f0000...,ACK,1,1,255,0x0010
2,12345-56041,02000000450000346d4c4000800600007f0000017f0000...,SYN,0,0,65535,0x0002,02000000450000346d4d4000800600007f0000017f0000...,SYN-ACK,0,1,65535,0x0012,02000000450000286d4e4000800600007f0000017f0000...,ACK,1,1,255,0x0010
3,12345-56042,02000000450000286d564000800600007f0000017f0000...,SYN,0,0,65535,0x0002,02000000450000346d574000800600007f0000017f0000...,SYN-ACK,0,1,65535,0x0012,02000000450000346d584000800600007f0000017f0000...,ACK,1,1,255,0x0010
4,12345-56043,02000000450000286d614000800600007f0000017f0000...,SYN,0,0,65535,0x0002,02000000450000346d624000800600007f0000017f0000...,SYN-ACK,0,1,65535,0x0012,02000000450000346d634000800600007f0000017f0000...,ACK,1,1,255,0x0010


In [119]:
import pandas as pd
df_gt = pd.read_csv("/content/tcp_handshakes_ground_truth.csv")
print(df_gt.columns.tolist())


['flow_id', 'syn_id', 'syn_type', 'syn_seq', 'syn_ack', 'syn_wss', 'syn_flags', 'syn_hex', 'synack_id', 'synack_type', 'synack_seq', 'synack_ack', 'synack_wss', 'synack_flags', 'synack_hex', 'ack_id', 'ack_type', 'ack_seq', 'ack_ack', 'ack_wss', 'ack_flags', 'ack_hex']


**HEX Mutation**

In [120]:
def mutate_hex(hex_str, num_mutations=2):
    """
    hex_str: string like '0200000045000034...'
    Randomly changes num_mutations hex digits.
    """
    hex_str = str(hex_str)
    hex_list = list(hex_str)

    for _ in range(num_mutations):
        i = random.randrange(len(hex_list))
        hex_list[i] = random.choice("0123456789abcdef")

    return "".join(hex_list)


**SYN and SYN ACK Mutation**

In [121]:
import random

def mutate_hex(hex_str, num_flips=2):
    b = bytearray.fromhex(hex_str)
    for _ in range(num_flips):
        i = random.randrange(len(b))
        b[i] ^= 0xFF
    return b.hex()

df_task2 = df_gt.copy()

# mutate ONLY the seeds you feed the LLM (SYN + SYN-ACK)
df_task2["syn_hex_mut"] = df_task2["syn_hex"].apply(lambda x: mutate_hex(x, num_flips=2))
df_task2["synack_hex_mut"] = df_task2["synack_hex"].apply(lambda x: mutate_hex(x, num_flips=2))



# **3. LLM Prompting (GPT 4.1)**

**3a. Build prompt using persona + template engineering**

In [None]:

df2_task2 = df_task2

In [None]:
import openai
openai.api_key = "sk-............"

In [123]:
def build_prompt(row):
    return f"""You are analyzing a TCP 3-way handshake on localhost.

Packet 1 (possibly mutated SYN):
  Type: {row['syn_type']}
  Flags: {row['syn_flags']}
  Seq: {row['syn_seq']}
  Ack: {row['syn_ack']}
  Window: {row['syn_wss']}
  Hex: {row['syn_hex_mut']}

Packet 2 (possibly mutated SYN-ACK):
  Type: {row['synack_type']}
  Flags: {row['synack_flags']}
  Seq: {row['synack_seq']}
  Ack: {row['synack_ack']}
  Window: {row['synack_wss']}
  Hex: {row['synack_hex_mut']}

Some bytes may be corrupted due to fuzzing.

Task:
Generate the third packet of this handshake (the final ACK) as a HEX STRING ONLY,
with no spaces and no explanation.
"""

df_task2["prompt"] = df_task2.apply(build_prompt, axis=1)

print("Has prompt?", "prompt" in df_task2.columns)
df_task2[["flow_id","prompt"]].head(1)


Has prompt? True


Unnamed: 0,flow_id,prompt
0,12345-56039,You are analyzing a TCP 3-way handshake on loc...


In [124]:
print(df_task2.columns.tolist())


['flow_id', 'syn_id', 'syn_type', 'syn_seq', 'syn_ack', 'syn_wss', 'syn_flags', 'syn_hex', 'synack_id', 'synack_type', 'synack_seq', 'synack_ack', 'synack_wss', 'synack_flags', 'synack_hex', 'ack_id', 'ack_type', 'ack_seq', 'ack_ack', 'ack_wss', 'ack_flags', 'ack_hex', 'syn_hex_mut', 'synack_hex_mut', 'prompt']


**3b. Run prompt 20 times**

In [125]:
from tqdm import trange

runs = []

for run_id in trange(20):
    for _, row in df_task2.iterrows():
        response = openai.ChatCompletion.create(
            model="gpt-4.1",
            messages=[
                {"role": "system", "content": "You are a TCP packet generator."},
                {"role": "user", "content": row["prompt"]}
            ]
        )

        llm_hex = response["choices"][0]["message"]["content"].strip()

        runs.append({
            "run_id": run_id,
            "flow_id": row["flow_id"],
            "ack_groundtruth": row["ack_hex"],
            "ack_llm": llm_hex
        })

df_runs = pd.DataFrame(runs)
df_runs.head()


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 20/20 [07:10<00:00, 21.51s/it]


Unnamed: 0,run_id,flow_id,ack_groundtruth,ack_llm
0,0,12345-56039,02000000450000286d384000800600007f0000017f0000...,02000000450000346d384100800600007f0000017f0000...
1,0,12345-56040,02000000450000286d434000800600007f0000017f0000...,02000000450000286d4340008006ffff7f0000017f0000...
2,0,12345-56041,02000000450000286d4e4000800600007f0000017f0000...,02000000450000346d4e4000800600007f0000017f0000...
3,0,12345-56042,02000000450000346d584000800600007f0000017f0000...,02000000450000286d58400080060000800000017f0000...
4,0,12345-56043,02000000450000346d634000800600007f0000017f0000...,02000000450000286d614000801100007f0000017f0000...


In [126]:
df_runs.to_csv("/content/tcp_baseline_gpt4.1_20.csv", index=False)
print("Saved tcp_baseline_gpt4.1_task2_20.csv")


Saved tcp_baseline_gpt4.1_task2_20.csv


**3c. Build prompt using persona + template engineering + One shot + Chain of Thought**

In [127]:
print(df_task2.columns.tolist())


['flow_id', 'syn_id', 'syn_type', 'syn_seq', 'syn_ack', 'syn_wss', 'syn_flags', 'syn_hex', 'synack_id', 'synack_type', 'synack_seq', 'synack_ack', 'synack_wss', 'synack_flags', 'synack_hex', 'ack_id', 'ack_type', 'ack_seq', 'ack_ack', 'ack_wss', 'ack_flags', 'ack_hex', 'syn_hex_mut', 'synack_hex_mut', 'prompt']


In [128]:
import pandas as pd

df_gt = pd.read_csv("/content/tcp_handshakes_ground_truth.csv")

# Use the FIRST real handshake as one-shot example
ex = df_gt.iloc[0]

ONE_SHOT = f"""
ONE-SHOT EXAMPLE (REAL HANDSHAKE)

Packet 1 (SYN):
Hex: {ex['syn_hex']}

Packet 2 (SYN-ACK):
Hex: {ex['synack_hex']}

Correct Packet 3 (ACK):
Hex: {ex['ack_hex']}
""".strip()

print(ONE_SHOT[:500])


ONE-SHOT EXAMPLE (REAL HANDSHAKE)

Packet 1 (SYN):
Hex: 02000000450000346d364000800600007f0000017f000001ca3b303938253e79000000008002ffff05d700000204ffd70103030801010402

Packet 2 (SYN-ACK):
Hex: 02000000450000346d374000800600007f0000017f0000013039ca3b57dc1f220938253e7a8012ffff8ec700000204ffd70103030801010402

Correct Packet 3 (ACK):
Hex: 02000000450000286d384000800600007f0000017f000001ca3b303938253e7a57dc1f23501000ffc8bf0000


In [129]:
def build_prompt(row):
    return f"""You are analyzing TCP 3-way handshakes on localhost.

{ONE_SHOT}

NOW SOLVE THIS CASE:

Packet 1 (possibly mutated SYN):
Hex: {row['syn_hex_mut']}

Packet 2 (possibly mutated SYN-ACK):
Hex: {row['synack_hex_mut']}

Instructions:
- Think step-by-step silently to ensure correctness.
- Output ONLY the final ACK packet as a HEX STRING.
- No spaces, no explanation, no extra text.
"""


In [130]:
df_task2["prompt"] = df_task2.apply(build_prompt, axis=1)


In [131]:
print(df_task2["prompt"].iloc[0][:400])


You are analyzing TCP 3-way handshakes on localhost.

ONE-SHOT EXAMPLE (REAL HANDSHAKE)

Packet 1 (SYN):
Hex: 02000000450000346d364000800600007f0000017f000001ca3b303938253e79000000008002ffff05d700000204ffd70103030801010402

Packet 2 (SYN-ACK):
Hex: 02000000450000346d374000800600007f0000017f0000013039ca3b57dc1f220938253e7a8012ffff8ec700000204ffd70103030801010402

Correct Packet 3 (ACK):
Hex: 020000


**3d. Run prompt 20 times**

In [None]:
import openai
import os
from tqdm import trange

openai.api_key = "sk-..............."

ROWS = []

for run_id in trange(20, desc="20 LLM runs"):
    for _, row in df_task2.iterrows():

        response = openai.ChatCompletion.create(
            model="gpt-4.1",
            temperature=0.2,
            messages=[
                {"role": "system", "content": "You generate valid TCP packets."},
                {"role": "user", "content": row["prompt"]}
            ]
        )

        ack_llm = response["choices"][0]["message"]["content"].strip()

        ROWS.append({
            "run_id": run_id,
            "flow_id": row["flow_id"],
            "syn_hex_mut": row["syn_hex_mut"],
            "synack_hex_mut": row["synack_hex_mut"],
            "ack_hex_gt": row["ack_hex"],
            "ack_hex_llm": ack_llm
        })


20 LLM runs: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 20/20 [06:42<00:00, 20.12s/it]


In [133]:
df_20 = pd.DataFrame(ROWS)
df_20.to_csv("tcp_oneshot_gpt4.1_20.csv", index=False)

print("Saved tcp_oneshot_gpt4.1_20.csv")
df_20.head()


Saved tcp_oneshot_gpt4.1_20.csv


Unnamed: 0,run_id,flow_id,syn_hex_mut,synack_hex_mut,ack_hex_gt,ack_hex_llm
0,0,12345-56039,02000000450000346d36bf00800600007f0000017f0000...,02000000450000346d374000800600007f0000017fff00...,02000000450000286d384000800600007f0000017f0000...,02000000450000286d384000800600007f0000017f0000...
1,0,12345-56040,02000000450000346d41400080f900007f0000017f0000...,02000000450000346d4240008006ffff7f0000017f0000...,02000000450000286d434000800600007f0000017f0000...,02000000450000286d434000800600007f0000017f0000...
2,0,12345-56041,02000000450000346d4c4000800600007f00ff017f0000...,02000000450000346d4d400080f900007f0000017f0000...,02000000450000286d4e4000800600007f0000017f0000...,02000000450000286d4e4000800600007f00ff017f0000...
3,0,12345-56042,02000000450000286d56400080060000800000017f0000...,02000000450000346d574000800600007f0000017f0000...,02000000450000346d584000800600007f0000017f0000...,02000000450000286d584000800600007f0000017f0000...
4,0,12345-56043,02000000ba0000286d614000800600007f0000017fff00...,02000000450000346d62400080f900007f0000017f0000...,02000000450000346d634000800600007f0000017f0000...,02000000450000286d634000800600007f0000017fff00...


# **3. LLM Prompting (GPT 3.5)**

**3a. Build prompt using persona + template engineering**

In [None]:
import openai
openai.api_key = "sk-.............."

In [135]:
def build_prompt(row):
    return f"""You are analyzing a TCP 3-way handshake on localhost.

Packet 1 (possibly mutated SYN):
  Type: {row['syn_type']}
  Flags: {row['syn_flags']}
  Seq: {row['syn_seq']}
  Ack: {row['syn_ack']}
  Window: {row['syn_wss']}
  Hex: {row['syn_hex_mut']}

Packet 2 (possibly mutated SYN-ACK):
  Type: {row['synack_type']}
  Flags: {row['synack_flags']}
  Seq: {row['synack_seq']}
  Ack: {row['synack_ack']}
  Window: {row['synack_wss']}
  Hex: {row['synack_hex_mut']}

Some bytes may be corrupted due to fuzzing.

Task:
Generate the third packet of this handshake (the final ACK) as a HEX STRING ONLY,
with no spaces and no explanation.
"""

df_task2["prompt"] = df_task2.apply(build_prompt, axis=1)

print("Has prompt?", "prompt" in df_task2.columns)
df_task2[["flow_id","prompt"]].head(1)


Has prompt? True


Unnamed: 0,flow_id,prompt
0,12345-56039,You are analyzing a TCP 3-way handshake on loc...


**3b. Run prompt 20 times**

In [136]:
from tqdm import trange

runs = []

for run_id in trange(20):
    for _, row in df_task2.iterrows():
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a TCP packet generator."},
                {"role": "user", "content": row["prompt"]}
            ]
        )

        llm_hex = response["choices"][0]["message"]["content"].strip()

        runs.append({
            "run_id": run_id,
            "flow_id": row["flow_id"],
            "ack_groundtruth": row["ack_hex"],
            "ack_llm": llm_hex
        })

df_runs = pd.DataFrame(runs)
df_runs.head()


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 20/20 [04:14<00:00, 12.72s/it]


Unnamed: 0,run_id,flow_id,ack_groundtruth,ack_llm
0,0,12345-56039,02000000450000286d384000800600007f0000017f0000...,02000000450000346d376100800600007f0000017f0000...
1,0,12345-56040,02000000450000286d434000800600007f0000017f0000...,02000000450000346d4340008006ffff7f0000017f0000...
2,0,12345-56041,02000000450000286d4e4000800600007f0000017f0000...,0201405a00000102cb6c5a24bbfe00516d4c19cbff0050...
3,0,12345-56042,02000000450000346d584000800600007f0000017f0000...,0602125045000028000001060000007f0000017f000001...
4,0,12345-56043,02000000450000346d634000800600007f0000017f0000...,020000006d9cbea8ca3f30390200000050028010ffcfa6...


In [137]:
df_runs.to_csv("/content/tcp_baseline_gpt3.5_20.csv", index=False)
print("Saved tcp_baseline_gpt3.5_20.csv")


Saved tcp_baseline_gpt3.5_20.csv


**3c. Build prompt using persona + template engineering + One shot + Chain of Thought**

In [138]:
import pandas as pd

df_gt = pd.read_csv("/content/tcp_handshakes_ground_truth.csv")

# Use the FIRST real handshake as one-shot example
ex = df_gt.iloc[0]

ONE_SHOT = f"""
ONE-SHOT EXAMPLE (REAL HANDSHAKE)

Packet 1 (SYN):
Hex: {ex['syn_hex']}

Packet 2 (SYN-ACK):
Hex: {ex['synack_hex']}

Correct Packet 3 (ACK):
Hex: {ex['ack_hex']}
""".strip()

print(ONE_SHOT[:500])


ONE-SHOT EXAMPLE (REAL HANDSHAKE)

Packet 1 (SYN):
Hex: 02000000450000346d364000800600007f0000017f000001ca3b303938253e79000000008002ffff05d700000204ffd70103030801010402

Packet 2 (SYN-ACK):
Hex: 02000000450000346d374000800600007f0000017f0000013039ca3b57dc1f220938253e7a8012ffff8ec700000204ffd70103030801010402

Correct Packet 3 (ACK):
Hex: 02000000450000286d384000800600007f0000017f000001ca3b303938253e7a57dc1f23501000ffc8bf0000


In [139]:
def build_prompt(row):
    return f"""You are analyzing TCP 3-way handshakes on localhost.

{ONE_SHOT}

NOW SOLVE THIS CASE:

Packet 1 (possibly mutated SYN):
Hex: {row['syn_hex_mut']}

Packet 2 (possibly mutated SYN-ACK):
Hex: {row['synack_hex_mut']}

Instructions:
- Think step-by-step silently to ensure correctness.
- Output ONLY the final ACK packet as a HEX STRING.
- No spaces, no explanation, no extra text.
"""


In [140]:
df_task2["prompt"] = df_task2.apply(build_prompt, axis=1)


**3b. Run prompt 20 times**

In [141]:
from tqdm import trange

runs = []

for run_id in trange(20):
    for _, row in df_task2.iterrows():
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a TCP packet generator."},
                {"role": "user", "content": row["prompt"]}
            ]
        )

        llm_hex = response["choices"][0]["message"]["content"].strip()

        runs.append({
            "run_id": run_id,
            "flow_id": row["flow_id"],
            "ack_groundtruth": row["ack_hex"],
            "ack_llm": llm_hex
        })

df_runs = pd.DataFrame(runs)
df_runs.head()


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 20/20 [04:22<00:00, 13.14s/it]


Unnamed: 0,run_id,flow_id,ack_groundtruth,ack_llm
0,0,12345-56039,02000000450000286d384000800600007f0000017f0000...,02000000450000286d384000800600007f0000017f0000...
1,0,12345-56040,02000000450000286d434000800600007f0000017f0000...,02000000450000346d414000800600007f0000017f0000...
2,0,12345-56041,02000000450000286d4e4000800600007f0000017f0000...,02000000450000286d4e4000800600007f00ff017f0000...
3,0,12345-56042,02000000450000346d584000800600007f0000017f0000...,02000000450000286d384000800600007f0000017f0000...
4,0,12345-56043,02000000450000346d634000800600007f0000017f0000...,02000000450000286d634000800600007f0000017f0000...


In [142]:
df_runs.to_csv("/content/tcp_oneshot_gpt3.5_20.csv", index=False)
print("Saved tcp_oneshot_gpt3.5_20.csv")


Saved tcp_oneshot_gpt3.5_20.csv


# **4a. Evaluation (GPT 4.1)**

**4a. For Baseline (persona + template prompting)**

In [143]:
import pandas as pd

df = pd.read_csv("/content/tcp_baseline_gpt4.1_20.csv")
print(df.columns)
df.head()


Index(['run_id', 'flow_id', 'ack_groundtruth', 'ack_llm'], dtype='object')


Unnamed: 0,run_id,flow_id,ack_groundtruth,ack_llm
0,0,12345-56039,02000000450000286d384000800600007f0000017f0000...,02000000450000346d384100800600007f0000017f0000...
1,0,12345-56040,02000000450000286d434000800600007f0000017f0000...,02000000450000286d4340008006ffff7f0000017f0000...
2,0,12345-56041,02000000450000286d4e4000800600007f0000017f0000...,02000000450000346d4e4000800600007f0000017f0000...
3,0,12345-56042,02000000450000346d584000800600007f0000017f0000...,02000000450000286d58400080060000800000017f0000...
4,0,12345-56043,02000000450000346d634000800600007f0000017f0000...,02000000450000286d614000801100007f0000017f0000...


**Classify PAcket type**

In [144]:
def classify_tcp_type(hex_str):
    try:
        b = bytes.fromhex(hex_str)

        # Find TCP flags byte (robust enough for your captures)
        flags = b[-7]

        syn = flags & 0x02
        ack = flags & 0x10
        fin = flags & 0x01
        rst = flags & 0x04

        if syn and ack:
            return "SYN-ACK"
        if syn:
            return "SYN"
        if ack and not fin and not rst:
            return "ACK"
        if fin:
            return "FIN"
        if rst:
            return "RST"
        return "OTHER"
    except Exception:
        return "INVALID"


In [145]:
# IMPORTANT: adjust column name if needed
df["pkt_type"] = df["ack_llm"].apply(classify_tcp_type)


In [146]:
print(df[["ack_llm", "pkt_type"]].head())


                                             ack_llm pkt_type
0  02000000450000346d384100800600007f0000017f0000...      SYN
1  02000000450000286d4340008006ffff7f0000017f0000...    OTHER
2  02000000450000346d4e4000800600007f0000017f0000...      ACK
3  02000000450000286d58400080060000800000017f0000...    OTHER
4  02000000450000286d614000801100007f0000017f0000...      ACK


In [147]:
df["is_valid"] = df["pkt_type"].isin(
    ["ACK", "SYN", "SYN-ACK", "FIN", "RST"]
)

df["is_ack"] = df["pkt_type"] == "ACK"

df["is_hallucination"] = ~df["is_valid"]


In [148]:
grouped = df.groupby("flow_id")


In [149]:
missing_rate = grouped["is_ack"].apply(lambda x: 1 - x.mean()).mean()


In [150]:
hallucination_rate = grouped["is_hallucination"].mean().mean()


In [151]:
valid_unique_types = (
    df[df["is_valid"]]
    .groupby("flow_id")["pkt_type"]
    .nunique()
    .mean()
)


In [152]:
relative_position_score = grouped["is_ack"].mean().mean()


In [153]:
results = pd.DataFrame([{
    "Model": "gpt-4.1",
    "Prompting": "Baseline",
    "Average Packet Types after Enrichment": 1.0,
    "Valid Unique Packet Types": valid_unique_types,
    "Missing Packet Type Rate": missing_rate,
    "Hallucination Rate": hallucination_rate,
    "Average Relative Position Score": relative_position_score
}])
results


Unnamed: 0,Model,Prompting,Average Packet Types after Enrichment,Valid Unique Packet Types,Missing Packet Type Rate,Hallucination Rate,Average Relative Position Score
0,gpt-4.1,Baseline,1.0,1.95,0.4725,0.37,0.5275


**4b. For  baseline + One shot + CoT**

In [154]:
import pandas as pd

df = pd.read_csv("/content/tcp_oneshot_gpt4.1_20.csv")
print(df.columns)
df.head()


Index(['run_id', 'flow_id', 'syn_hex_mut', 'synack_hex_mut', 'ack_hex_gt',
       'ack_hex_llm'],
      dtype='object')


Unnamed: 0,run_id,flow_id,syn_hex_mut,synack_hex_mut,ack_hex_gt,ack_hex_llm
0,0,12345-56039,02000000450000346d36bf00800600007f0000017f0000...,02000000450000346d374000800600007f0000017fff00...,02000000450000286d384000800600007f0000017f0000...,02000000450000286d384000800600007f0000017f0000...
1,0,12345-56040,02000000450000346d41400080f900007f0000017f0000...,02000000450000346d4240008006ffff7f0000017f0000...,02000000450000286d434000800600007f0000017f0000...,02000000450000286d434000800600007f0000017f0000...
2,0,12345-56041,02000000450000346d4c4000800600007f00ff017f0000...,02000000450000346d4d400080f900007f0000017f0000...,02000000450000286d4e4000800600007f0000017f0000...,02000000450000286d4e4000800600007f00ff017f0000...
3,0,12345-56042,02000000450000286d56400080060000800000017f0000...,02000000450000346d574000800600007f0000017f0000...,02000000450000346d584000800600007f0000017f0000...,02000000450000286d584000800600007f0000017f0000...
4,0,12345-56043,02000000ba0000286d614000800600007f0000017fff00...,02000000450000346d62400080f900007f0000017f0000...,02000000450000346d634000800600007f0000017f0000...,02000000450000286d634000800600007f0000017fff00...


In [155]:
def classify_tcp_type(hex_str):
    try:
        b = bytes.fromhex(hex_str)

        # Find TCP flags byte (robust enough for your captures)
        flags = b[-7]

        syn = flags & 0x02
        ack = flags & 0x10
        fin = flags & 0x01
        rst = flags & 0x04

        if syn and ack:
            return "SYN-ACK"
        if syn:
            return "SYN"
        if ack and not fin and not rst:
            return "ACK"
        if fin:
            return "FIN"
        if rst:
            return "RST"
        return "OTHER"
    except Exception:
        return "INVALID"


In [156]:
# IMPORTANT: adjust column name if needed
df["pkt_type"] = df["ack_hex_llm"].apply(classify_tcp_type)


In [157]:
print(df[["ack_hex_llm", "pkt_type"]].head())


                                         ack_hex_llm pkt_type
0  02000000450000286d384000800600007f0000017f0000...      ACK
1  02000000450000286d434000800600007f0000017f0000...      ACK
2  02000000450000286d4e4000800600007f00ff017f0000...      FIN
3  02000000450000286d584000800600007f0000017f0000...      ACK
4  02000000450000286d634000800600007f0000017fff00...      ACK


In [158]:
df["is_valid"] = df["pkt_type"].isin(
    ["ACK", "SYN", "SYN-ACK", "FIN", "RST"]
)

df["is_ack"] = df["pkt_type"] == "ACK"

df["is_hallucination"] = ~df["is_valid"]


In [159]:
grouped = df.groupby("flow_id")


In [160]:
missing_rate = grouped["is_ack"].apply(lambda x: 1 - x.mean()).mean()


In [161]:
hallucination_rate = grouped["is_hallucination"].mean().mean()



In [162]:
valid_unique_types = (
    df[df["is_valid"]]
    .groupby("flow_id")["pkt_type"]
    .nunique()
    .mean()
)


In [163]:
relative_position_score = grouped["is_ack"].mean().mean()


In [164]:
results2 = pd.DataFrame([{
    "Model": "gpt-4.1",
    "Prompting": "one-shot + CoT",
    "Average Packet Types after Enrichment": 1.0,
    "Valid Unique Packet Types": valid_unique_types,
    "Missing Packet Type Rate": missing_rate,
    "Hallucination Rate": hallucination_rate,
    "Average Relative Position Score": relative_position_score
}])
results2


Unnamed: 0,Model,Prompting,Average Packet Types after Enrichment,Valid Unique Packet Types,Missing Packet Type Rate,Hallucination Rate,Average Relative Position Score
0,gpt-4.1,one-shot + CoT,1.0,1.210526,0.435,0.3875,0.565


In [165]:
# Stack rows
df_merged = pd.concat([results, results2], ignore_index=True)

df_merged

Unnamed: 0,Model,Prompting,Average Packet Types after Enrichment,Valid Unique Packet Types,Missing Packet Type Rate,Hallucination Rate,Average Relative Position Score
0,gpt-4.1,Baseline,1.0,1.95,0.4725,0.37,0.5275
1,gpt-4.1,one-shot + CoT,1.0,1.210526,0.435,0.3875,0.565


# **4a. Evaluation (GPT 3.5)**

**4a. For Baseline (persona + template prompting)**

In [166]:
import pandas as pd

df = pd.read_csv("/content/tcp_baseline_gpt3.5_20.csv")
print(df.columns)
df.head()


Index(['run_id', 'flow_id', 'ack_groundtruth', 'ack_llm'], dtype='object')


Unnamed: 0,run_id,flow_id,ack_groundtruth,ack_llm
0,0,12345-56039,02000000450000286d384000800600007f0000017f0000...,02000000450000346d376100800600007f0000017f0000...
1,0,12345-56040,02000000450000286d434000800600007f0000017f0000...,02000000450000346d4340008006ffff7f0000017f0000...
2,0,12345-56041,02000000450000286d4e4000800600007f0000017f0000...,0201405a00000102cb6c5a24bbfe00516d4c19cbff0050...
3,0,12345-56042,02000000450000346d584000800600007f0000017f0000...,0602125045000028000001060000007f0000017f000001...
4,0,12345-56043,02000000450000346d634000800600007f0000017f0000...,020000006d9cbea8ca3f30390200000050028010ffcfa6...


In [167]:
def classify_tcp_type(hex_str):
    try:
        b = bytes.fromhex(hex_str)

        # Find TCP flags byte (robust enough for your captures)
        flags = b[-7]

        syn = flags & 0x02
        ack = flags & 0x10
        fin = flags & 0x01
        rst = flags & 0x04

        if syn and ack:
            return "SYN-ACK"
        if syn:
            return "SYN"
        if ack and not fin and not rst:
            return "ACK"
        if fin:
            return "FIN"
        if rst:
            return "RST"
        return "OTHER"
    except Exception:
        return "INVALID"


In [168]:
# IMPORTANT: adjust column name if needed
df["pkt_type"] = df["ack_llm"].apply(classify_tcp_type)


In [169]:
print(df[["ack_llm", "pkt_type"]].head())


                                             ack_llm pkt_type
0  02000000450000346d376100800600007f0000017f0000...      RST
1  02000000450000346d4340008006ffff7f0000017f0000...  INVALID
2  0201405a00000102cb6c5a24bbfe00516d4c19cbff0050...  INVALID
3  0602125045000028000001060000007f0000017f000001...  INVALID
4  020000006d9cbea8ca3f30390200000050028010ffcfa6...  INVALID


In [170]:
df["is_valid"] = df["pkt_type"].isin(
    ["ACK", "SYN", "SYN-ACK", "FIN", "RST"]
)

df["is_ack"] = df["pkt_type"] == "ACK"

df["is_hallucination"] = ~df["is_valid"]


In [171]:
grouped = df.groupby("flow_id")


In [172]:
missing_rate = grouped["is_ack"].apply(lambda x: 1 - x.mean()).mean()


In [173]:
hallucination_rate = grouped["is_hallucination"].mean().mean()



In [174]:
valid_unique_types = (
    df[df["is_valid"]]
    .groupby("flow_id")["pkt_type"]
    .nunique()
    .mean()
)


In [175]:
relative_position_score = grouped["is_ack"].mean().mean()


In [176]:
results3 = pd.DataFrame([{
    "Model": "gpt-3.5",
    "Prompting": "baseline",
    "Average Packet Types after Enrichment": 1.0,
    "Valid Unique Packet Types": valid_unique_types,
    "Missing Packet Type Rate": missing_rate,
    "Hallucination Rate": hallucination_rate,
    "Average Relative Position Score": relative_position_score
}])
results3


Unnamed: 0,Model,Prompting,Average Packet Types after Enrichment,Valid Unique Packet Types,Missing Packet Type Rate,Hallucination Rate,Average Relative Position Score
0,gpt-3.5,baseline,1.0,3.7,0.865,0.5575,0.135


In [177]:
# Stack rows
df_merged = pd.concat([df_merged, results3], ignore_index=True)

df_merged

Unnamed: 0,Model,Prompting,Average Packet Types after Enrichment,Valid Unique Packet Types,Missing Packet Type Rate,Hallucination Rate,Average Relative Position Score
0,gpt-4.1,Baseline,1.0,1.95,0.4725,0.37,0.5275
1,gpt-4.1,one-shot + CoT,1.0,1.210526,0.435,0.3875,0.565
2,gpt-3.5,baseline,1.0,3.7,0.865,0.5575,0.135


**4b. For  baseline + One shot + CoT**

In [178]:
import pandas as pd

df = pd.read_csv("/content/tcp_oneshot_gpt3.5_20.csv")
print(df.columns)
df.head()


Index(['run_id', 'flow_id', 'ack_groundtruth', 'ack_llm'], dtype='object')


Unnamed: 0,run_id,flow_id,ack_groundtruth,ack_llm
0,0,12345-56039,02000000450000286d384000800600007f0000017f0000...,02000000450000286d384000800600007f0000017f0000...
1,0,12345-56040,02000000450000286d434000800600007f0000017f0000...,02000000450000346d414000800600007f0000017f0000...
2,0,12345-56041,02000000450000286d4e4000800600007f0000017f0000...,02000000450000286d4e4000800600007f00ff017f0000...
3,0,12345-56042,02000000450000346d584000800600007f0000017f0000...,02000000450000286d384000800600007f0000017f0000...
4,0,12345-56043,02000000450000346d634000800600007f0000017f0000...,02000000450000286d634000800600007f0000017f0000...


In [179]:
def classify_tcp_type(hex_str):
    try:
        b = bytes.fromhex(hex_str)

        # Find TCP flags byte (robust enough for your captures)
        flags = b[-7]

        syn = flags & 0x02
        ack = flags & 0x10
        fin = flags & 0x01
        rst = flags & 0x04

        if syn and ack:
            return "SYN-ACK"
        if syn:
            return "SYN"
        if ack and not fin and not rst:
            return "ACK"
        if fin:
            return "FIN"
        if rst:
            return "RST"
        return "OTHER"
    except Exception:
        return "INVALID"


In [180]:
# IMPORTANT: adjust column name if needed
df["pkt_type"] = df["ack_llm"].apply(classify_tcp_type)


In [181]:
print(df[["ack_llm", "pkt_type"]].head())


                                             ack_llm pkt_type
0  02000000450000286d384000800600007f0000017f0000...      FIN
1  02000000450000346d414000800600007f0000017f0000...  INVALID
2  02000000450000286d4e4000800600007f00ff017f0000...  INVALID
3  02000000450000286d384000800600007f0000017f0000...    OTHER
4  02000000450000286d634000800600007f0000017f0000...  INVALID


In [182]:
df["is_valid"] = df["pkt_type"].isin(
    ["ACK", "SYN", "SYN-ACK", "FIN", "RST"]
)

df["is_ack"] = df["pkt_type"] == "ACK"

df["is_hallucination"] = ~df["is_valid"]


In [183]:
grouped = df.groupby("flow_id")


In [184]:
missing_rate = grouped["is_ack"].apply(lambda x: 1 - x.mean()).mean()


In [185]:
hallucination_rate = grouped["is_hallucination"].mean().mean()



In [186]:
valid_unique_types = (
    df[df["is_valid"]]
    .groupby("flow_id")["pkt_type"]
    .nunique()
    .mean()
)


In [187]:
relative_position_score = grouped["is_ack"].mean().mean()


In [188]:
results4 = pd.DataFrame([{
    "Model": "gpt-3.5",
    "Prompting": "one-shot+CoT",
    "Average Packet Types after Enrichment": 1.0,
    "Valid Unique Packet Types": valid_unique_types,
    "Missing Packet Type Rate": missing_rate,
    "Hallucination Rate": hallucination_rate,
    "Average Relative Position Score": relative_position_score
}])
results4


Unnamed: 0,Model,Prompting,Average Packet Types after Enrichment,Valid Unique Packet Types,Missing Packet Type Rate,Hallucination Rate,Average Relative Position Score
0,gpt-3.5,one-shot+CoT,1.0,2.5,0.675,0.5475,0.325


In [189]:
# Stack rows
df_merged = pd.concat([df_merged, results4], ignore_index=True)

df_merged

Unnamed: 0,Model,Prompting,Average Packet Types after Enrichment,Valid Unique Packet Types,Missing Packet Type Rate,Hallucination Rate,Average Relative Position Score
0,gpt-4.1,Baseline,1.0,1.95,0.4725,0.37,0.5275
1,gpt-4.1,one-shot + CoT,1.0,1.210526,0.435,0.3875,0.565
2,gpt-3.5,baseline,1.0,3.7,0.865,0.5575,0.135
3,gpt-3.5,one-shot+CoT,1.0,2.5,0.675,0.5475,0.325


# **Final Output**

In [190]:
df_merged.to_csv("/content/tcp_task2_results.csv", index=False)
print("Saved tcp_task2_results.csv")


Saved tcp_task2_results.csv
