# **GPT Settings**

In [1]:
!pip install openai==0.28

Collecting openai==0.28
  Downloading openai-0.28.0-py3-none-any.whl.metadata (13 kB)
Downloading openai-0.28.0-py3-none-any.whl (76 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.5/76.5 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 2.9.0
    Uninstalling openai-2.9.0:
      Successfully uninstalled openai-2.9.0
Successfully installed openai-0.28.0


In [2]:
import openai
import os

In [None]:


openai.api_key = "sk-................"

# **1. Ground Truth Grammars**

Grammars for SYN, SYNACK and ACK packets (json files) are in ground_truth_grammar folder

# **2. Evaluation**

In [None]:
import os
import re
import time
import pandas as pd
import openai

# ==========================
# CONFIG
# ==========================

# Set your key safely (recommended):
# In Colab: os.environ["OPENAI_API_KEY"] = "YOUR_KEY"
openai.api_key = "sk-................"

MODELS = ["gpt-4.1", "gpt-3.5-turbo"]
PROMPTING_MODES = ["baseline", "oneshot_cot"]
N_REPETITIONS = 20

# Ground truth grammar files (your paths)
GROUND_TRUTH_FILES = {
    "SYN":     "ground_truth_grammar/SYN.json",
    "SYN-ACK": "ground_truth_grammar/SYNACK.json",
    "ACK":     "ground_truth_grammar/ACK.json",
}

# One-shot example: use SYN grammar (same as your uploaded SYN) :contentReference[oaicite:1]{index=1}
ONE_SHOT_SYN_PATH = "ground_truth_grammar/SYN.json"


# ==========================
# HELPERS
# ==========================

def normalize_quotes(text: str) -> str:
    return (
        text.replace("“", "\"")
            .replace("”", "\"")
            .replace("‘", "'")
            .replace("’", "'")
    )

def load_text_file(path: str) -> str:
    with open(path, "r", encoding="utf-8") as f:
        return normalize_quotes(f.read())

def load_gt_file(path: str):
    """
    Reads a JSON-like grammar file and extracts:
      - label (e.g., TCP_SYN)
      - dict of fields -> values
      - raw text (for prompt formatting)
    """
    raw = load_text_file(path)

    label_match = re.search(r'"([^"]+)"\s*:\s*\{', raw)
    if not label_match:
        raise ValueError(f"Could not find packet label in {path}")
    label = label_match.group(1)

    body_match = re.search(r'\{(.*)\}', raw, flags=re.DOTALL)
    body = body_match.group(1) if body_match else ""

    pairs = re.findall(r'"([^"]+)"\s*:\s*"([^"]*)"', body)
    fields = {k.strip(): v.strip() for k, v in pairs}
    return label, fields, raw

def parse_grammar(text: str):
    """
    Extracts key/value pairs from the model output object.
    """
    text = normalize_quotes(text)
    body_match = re.search(r'\{(.*)\}', text, flags=re.DOTALL)
    if not body_match:
        return {}
    body = body_match.group(1)

    pairs = re.findall(r'"([^"]+)"\s*:\s*"([^"]*)"', body)
    return {k.strip(): v.strip() for k, v in pairs}


# ==========================
# ONE-SHOT EXAMPLE (SYN)
# ==========================

SYN_ONE_SHOT_TEXT = load_text_file(ONE_SHOT_SYN_PATH)  # uses SYN grammar verbatim


# ==========================
# PROMPT BUILDERS
# ==========================

def build_prompt_baseline(pkt_type: str, gt_label: str, gt_text: str) -> str:
    return f"""
You are a protocol grammar extraction assistant.

Reconstruct the TCP grammar for:

    "{gt_label}"

Follow EXACTLY the same formatting style shown here:

{gt_text}

Rules:
- Use EXACT same field names.
- DO NOT add or remove fields.
- Use ONLY double quotes around names and values.
- Output only the {gt_label} grammar object.
- No explanations outside the grammar.
""".strip()

def build_prompt_oneshot_cot(pkt_type: str, gt_label: str, gt_text: str) -> str:
    return f"""
You are a protocol grammar extraction assistant.

Below is ONE example of a TCP grammar (for a SYN packet).
Use it ONLY to learn the formatting and structure.

ONE-SHOT EXAMPLE:
{SYN_ONE_SHOT_TEXT}

Now reconstruct the TCP grammar for:

"{gt_label}"

You must follow EXACTLY the same formatting style as the example.

Reference format (field names and ordering must match):
{gt_text}

Reasoning:
Think step-by-step internally to ensure correctness.
Do NOT reveal your reasoning.

Rules:
- Use EXACT same field names as the reference.
- DO NOT add or remove fields.
- Use ONLY double quotes around names and values.
- Output ONLY the "{gt_label}" grammar object.
- No explanations or extra text.
""".strip()

def build_prompt(pkt_type: str, gt_label: str, gt_text: str, prompting_mode: str) -> str:
    if prompting_mode == "baseline":
        return build_prompt_baseline(pkt_type, gt_label, gt_text)
    elif prompting_mode == "oneshot_cot":
        return build_prompt_oneshot_cot(pkt_type, gt_label, gt_text)
    else:
        raise ValueError(f"Unknown prompting_mode: {prompting_mode}")


# ==========================
# GPT CALL
# ==========================

def call_gpt(prompt: str, model_name: str) -> str:
    # temperature>0 ensures variation across runs
    resp = openai.ChatCompletion.create(
        model=model_name,
        temperature=0.7,
        messages=[
            {"role": "system", "content": "You are a helpful protocol analysis assistant."},
            {"role": "user", "content": prompt},
        ],
    )
    return resp.choices[0].message["content"]


# ==========================
# EVALUATION
# ==========================

def evaluate(gt_fields, model_fields, pkt_type):
    field_names = list(gt_fields.keys())

    missing_fields = 0
    correct_positions = 0
    missing_value_pairs = 0

    for f in field_names:
        if f not in model_fields or model_fields[f] == "":
            missing_fields += 1
            missing_value_pairs += 1
        else:
            if model_fields[f] == gt_fields[f]:
                correct_positions += 1
            else:
                missing_value_pairs += 1

    total_fields = len(field_names)

    return {
        "Packet Type": pkt_type,
        "Number of Packet Fields (Ground Truth)": total_fields,
        "Number of Fields Extracted": len(model_fields),

        # Kept your original metric names for compatibility
        "Average Number of Missing Fields": missing_fields / 1,
        "Average Number of Correct Field Positions": correct_positions / total_fields if total_fields else 0.0,
        "Average Number of Missing Field Value Pairs": missing_value_pairs / total_fields if total_fields else 0.0,
    }


# ==========================
# MAIN PIPELINE
# ==========================

all_runs = []

for pkt_type, path in GROUND_TRUTH_FILES.items():
    print(f"\n### Processing PacketType={pkt_type} ###")

    gt_label, gt_fields, gt_text = load_gt_file(path)

    for llm in MODELS:
        for prompting_mode in PROMPTING_MODES:
            print(f"  -> LLM={llm} | Prompting={prompting_mode}")

            for i in range(N_REPETITIONS):
                print(f"     Run {i+1}/{N_REPETITIONS}")

                prompt = build_prompt(pkt_type, gt_label, gt_text, prompting_mode)
                output = call_gpt(prompt, llm)

                model_fields = parse_grammar(output)
                metrics = evaluate(gt_fields, model_fields, pkt_type)

                # identifiers for combined CSV
                metrics["LLM"] = llm
                metrics["Prompting"] = prompting_mode
                metrics["Iteration"] = i + 1
                metrics["RawOutput"] = output  # optional but useful

                all_runs.append(metrics)

                time.sleep(0.2)

# ==========================
# SAVE RESULTS
# ==========================

df_all = pd.DataFrame(all_runs)

# Reorder columns nicely
front_cols = ["LLM", "Prompting", "Packet Type", "Iteration"]
other_cols = [c for c in df_all.columns if c not in front_cols]
df_all = df_all[front_cols + other_cols]

df_all.to_csv("tcp_task1a_runs_llm_prompting.csv", index=False)
print("\n✔ Saved tcp_task1a_runs_llm_prompting.csv")

# Optional: summary CSV (mean/std grouped by LLM + Prompting + Packet Type)
df_summary = (
    df_all.groupby(["LLM", "Prompting", "Packet Type"], as_index=False)
         .agg({
             "Average Number of Missing Fields": ["mean", "std"],
             "Average Number of Correct Field Positions": ["mean", "std"],
             "Average Number of Missing Field Value Pairs": ["mean", "std"],
         })
)

# Flatten multi-index columns
df_summary.columns = [
    "_".join([c for c in col if c]).replace("__", "_")
    for col in df_summary.columns.values
]

df_summary.to_csv("tcp_task1a_summary_llm_prompting.csv", index=False)
print("✔ Saved tcp_task1a_summary_llm_prompting.csv")



### Processing PacketType=SYN ###
  -> LLM=gpt-4.1 | Prompting=baseline
     Run 1/20
     Run 2/20
     Run 3/20
     Run 4/20
     Run 5/20
     Run 6/20
     Run 7/20
     Run 8/20
     Run 9/20
     Run 10/20
     Run 11/20
     Run 12/20
     Run 13/20
     Run 14/20
     Run 15/20
     Run 16/20
     Run 17/20
     Run 18/20
     Run 19/20
     Run 20/20
  -> LLM=gpt-4.1 | Prompting=oneshot_cot
     Run 1/20
     Run 2/20
     Run 3/20
     Run 4/20
     Run 5/20
     Run 6/20
     Run 7/20
     Run 8/20
     Run 9/20
     Run 10/20
     Run 11/20
     Run 12/20
     Run 13/20
     Run 14/20
     Run 15/20
     Run 16/20
     Run 17/20
     Run 18/20
     Run 19/20
     Run 20/20
  -> LLM=gpt-3.5-turbo | Prompting=baseline
     Run 1/20
     Run 2/20
     Run 3/20
     Run 4/20
     Run 5/20
     Run 6/20
     Run 7/20
     Run 8/20
     Run 9/20
     Run 10/20
     Run 11/20
     Run 12/20
     Run 13/20
     Run 14/20
     Run 15/20
     Run 16/20
     Run 17/20
     Run 18/20

# **3. FInal Output**

In [13]:

df_summary.to_csv("tcp_task1a_summary_llm_prompting.csv", index=False)
print("✔ Saved tcp_task1a_summary_llm_prompting.csv")

✔ Saved tcp_task1a_summary_llm_prompting.csv
