In [26]:
import sys
import time
import jsondiff
import pandas as pd
import os
import re
from langchain_chroma import Chroma
from langchain_core.example_selectors import MaxMarginalRelevanceExampleSelector
#from langchain_community.embeddings import OllamaEmbeddings
from langchain_ollama import OllamaEmbeddings
from ollama import Client
import json, jsondiff
from sklearn.model_selection import train_test_split
import copy

from dataclasses import dataclass, asdict
from typing import Any, Dict, List, Tuple, Optional

In [27]:
TRANSLATION_PROMPT_RYU = """
You are a meticulous network engineer. Convert the user's **intent** into a single Ryu (OpenFlow) flow rule
expressed as a **JSON object only** (no comments, no code fences, no extra text).

Return exactly one JSON object with this schema (keys in lower-case snake_case):

{
  "dpid": <int>,                  // datapath ID (switch ID). Example: 1
  "table_id": <int>,              // default 0 unless the intent clearly states otherwise
  "priority": <int>,              // guidance below
  "match": {                      // match conditions
      // Common keys (use only those needed by the intent):
      "in_port": <int>,
      "eth_type": <int>,          // IPv4=2048 (0x0800), IPv6=34525 (0x86DD)
      "ip_proto": <int>,          // TCP=6, UDP=17, ICMPv4=1, ICMPv6=58
      "ipv4_src": "<ip[/mask]>",
      "ipv4_dst": "<ip[/mask]>",
      "ipv6_dst": "<ip6[/mask]>",
      "tcp_src": <int>,
      "tcp_dst": <int>,
      "udp_src": <int>,
      "udp_dst": <int>,
      "icmpv4_type": <int>        // e.g., 8 for echo request (ping)
  },
  "actions": [                    // ordered list of actions
      // For forwarding use: {"type":"OUTPUT","port":<int|IN_PORT|FLOOD|CONTROLLER|LOCAL>}
      // For queue/QoS:      {"type":"SET_QUEUE","queue_id":<int>}
      // For VLAN ops:       {"type":"PUSH_VLAN","ethertype":33024} (0x8100), 
      //                     {"type":"SET_FIELD","field":"vlan_vid","value":<int>}
      // To drop:            []   (empty list)
  ]
}

Rules of interpretation:
- "in switch N" or "on switch N" → dpid=N. If not specified, infer the most reasonable dpid from context; otherwise omit only if impossible.
- IPv4 traffic → include eth_type=2048. IPv6 traffic → include eth_type=34525.
- ICMP/ICMPv6 ping (echo request) → ip_proto=1 (or 58 for v6) AND icmpv4_type=8 when clearly IPv4.
- "HTTP" → TCP port 80; "HTTPS" → TCP port 443; "DNS" → UDP/TCP port 53 (use UDP unless otherwise stated).
- "send out/through port X", "via interface X" → OUTPUT to port X.
- "block", "drop", "deny" → actions must be an empty list [].
- QoS/priority queues → include SET_QUEUE with the specific queue_id when requested.
- table_id=0 unless the intent explicitly mentions another table.
- Priority guideline (use the highest that fits the specificity):
    300 → explicit block/deny rules
    200 → highly specific forwarding with QoS/VLAN modifications
    100 → specific L3/L4 matches (e.g., dst IP and TCP/UDP port)
     50 → catch-all/less specific fall-through (e.g., only ip_proto or in_port)

Constraints:
- Output must be a single JSON object. No surrounding text, no markdown, no explanations.
- Use **only** fields necessary to satisfy the intent; do not invent values.
- Numeric fields may be written as integers (not strings). Use lowercase key names as shown.
- If something is ambiguous, choose the most conservative, commonly-used OpenFlow interpretation for Ryu.
"""

In [28]:
# ===== RYU CANONICALIZATION + COMPARATORS (use in BOTH notebooks) =====

# Normalizations
PROTO_MAP = {"tcp":6, "udp":17, "icmp":1, "icmpv4":1, "icmpv6":58}
ETH_TYPE_ALIASES = {"0x800":2048,"0x0800":2048,2048:2048,"2048":2048,"0x86dd":34525,"34525":34525,34525:34525}
MATCH_KEY_SYNONYMS = {"icmp_type":"icmpv4_type","nw_src":"ipv4_src","nw_dst":"ipv4_dst","dl_type":"eth_type"}
ACTION_DROP_SYNONYMS = {"drop","DROP","Drop"}

def _to_int_maybe(x: Any) -> Any:
    if isinstance(x, bool): return x
    if isinstance(x, (int, float)): return int(x) if isinstance(x,float) and x.is_integer() else x
    if isinstance(x, str):
        s = x.strip()
        if s.lower().startswith("0x"):
            try: return int(s,16)
            except: return x
        if re.fullmatch(r"-?\d+", s):
            try: return int(s)
            except: return x
        if s.lower() in PROTO_MAP: return PROTO_MAP[s.lower()]
        if s.lower() in ETH_TYPE_ALIASES: return ETH_TYPE_ALIASES[s.lower()]
        return s
    return x

def canonicalize_match(match: Dict[str, Any]) -> Dict[str, Any]:
    if not isinstance(match, dict): return {}
    out = {}
    for k, v in match.items():
        key = MATCH_KEY_SYNONYMS.get(k, k)
        if key in {"eth_type","ip_proto","in_port","tcp_src","tcp_dst","udp_src","udp_dst","icmpv4_type"}:
            out[key] = _to_int_maybe(v)
        else:
            out[key] = v.strip() if isinstance(v, str) else v
    return dict(sorted(out.items(), key=lambda kv: kv[0]))

def canonicalize_actions(actions: Any) -> List[Dict[str, Any]]:
    if actions in (None, "", []): return []
    if not isinstance(actions, list): actions = [actions]
    norm = []
    for a in actions:
        if not isinstance(a, dict): continue
        t = (a.get("type") or a.get("action") or a.get("name") or "").upper()
        if t in ACTION_DROP_SYNONYMS: continue  # normalize to []
        if t == "OUTPUT":
            norm.append({"type":"OUTPUT","port":_to_int_maybe(a.get("port"))})
        elif t == "SET_QUEUE":
            norm.append({"type":"SET_QUEUE","queue_id":_to_int_maybe(a.get("queue_id"))})
        elif t == "PUSH_VLAN":
            et = _to_int_maybe(a.get("ethertype"))
            norm.append({"type":"PUSH_VLAN","ethertype": 33024 if et in (None,"") else et})
        elif t == "SET_FIELD":
            norm.append({"type":"SET_FIELD","field":a.get("field"),"value":_to_int_maybe(a.get("value"))})
        else:
            b = {k:_to_int_maybe(v) for k,v in a.items()}
            b["type"] = t or "UNKNOWN"
            norm.append(b)

    # de-dup + deterministic order
    def sig(d): return tuple(sorted(d.items()))
    seen, uniq = set(), []
    for a in norm:
        s = sig(a)
        if s not in seen:
            seen.add(s); uniq.append(a)
    uniq.sort(key=lambda d: (d.get("type",""), json.dumps(d, sort_keys=True)))
    return uniq

def canonicalize_rule(obj: Dict[str, Any]) -> Dict[str, Any]:
    if not isinstance(obj, dict): return {}
    return {
        "dpid": _to_int_maybe(obj.get("dpid")) if "dpid" in obj else None,
        "table_id": _to_int_maybe(obj.get("table_id", 0)),
        "priority": _to_int_maybe(obj.get("priority")) if "priority" in obj else None,
        "match": canonicalize_match(obj.get("match", {})),
        "actions": canonicalize_actions(obj.get("actions", []))
    }

@dataclass
class CompareConfig:
    require_same_dpid: bool = True
    require_same_table: bool = False
    require_same_priority: bool = False
    allow_pred_match_superset: bool = True
    require_actions_equal: bool = True

def _match_includes(m_pred: Dict[str,Any], m_gt: Dict[str,Any]) -> bool:
    for k,v in m_gt.items():
        if k not in m_pred or m_pred[k] != v: return False
    return True

def _actions_equal(a_pred: List[Dict[str,Any]], a_gt: List[Dict[str,Any]]) -> bool:
    return a_pred == a_gt  # already canonicalized/order-insensitive/deduped deterministically

# ---- RYU translation comparator (rename your old compare_onos_json to this) ----
def compare_ryu_json_translation(pred_obj: Dict[str, Any], gt_obj: Dict[str, Any], cfg: Optional[CompareConfig]=None):
    cfg = cfg or CompareConfig()
    c_pred, c_gt = canonicalize_rule(pred_obj), canonicalize_rule(gt_obj)
    details = {
        "dpid_equal": c_pred["dpid"] == c_gt["dpid"],
        "table_equal": c_pred["table_id"] == c_gt["table_id"],
        "priority_equal": c_pred["priority"] == c_gt["priority"],
        "match_includes_gt": _match_includes(c_pred["match"], c_gt["match"]),
        "actions_equal": _actions_equal(c_pred["actions"], c_gt["actions"]),
        "pred_canonical": c_pred, "gt_canonical": c_gt
    }
    if cfg.require_same_dpid and not details["dpid_equal"]: return False, details
    if cfg.require_same_table and not details["table_equal"]: return False, details
    if cfg.require_same_priority and not details["priority_equal"]: return False, details
    if cfg.allow_pred_match_superset:
        if not details["match_includes_gt"]: return False, details
    else:
        if c_pred["match"] != c_gt["match"]: return False, details
    if cfg.require_actions_equal and not details["actions_equal"]: return False, details
    return True, details


In [29]:
my_models_real=[
    "codegemma:7b",
    "codestral:22b",
    "codellama:34b",
    "codellama:7b",
    "command-r:35b",
    "deepseek-coder:1.3b",
    "Deepseek-coder-v2:16b",
    "dolphin-mistral:7b",
    "gemma2:27b",
    "huihui_ai/qwq-abliterated:latest",
    "huihui_ai/qwq-fusion:latest",
    "llama2:7b",
    "Llama3:8b",
    "llama3.1:8b",
    "llama3.2:3b",
    "llava-llama3:8b",
    "marco-o1:7b",
    "mistral:latest",
    "mistral-nemo:12b",
    "openchat:7b",
    "orca-mini:3b",
    "phi:2.7b",
    "phi3:3.8b",
    "qwen:4b",
    "qwen2:7b",
    "qwen2.5:7b",
    "qwq:latest",
    "starcoder:3b",
    "starcoder2:3b",
    "TinyLlama:1.1b",
    "wizardlm2:7b",
    "yi:6b",
    "zephyr:7b"
]

very_large_models = [
"llama3.3:latest",
"llama2:70b",
"codellama:70b"]

default_model = "llama2:7b"

num_context_examples = [0,1,3,6,9]

In [30]:
ollama_embedding_url = "http://localhost:11434"
ollama_server_url = "http://localhost:11435"  

In [31]:
ollama_emb = OllamaEmbeddings(
    model=default_model,
    base_url=ollama_embedding_url,
)

client = Client(host=ollama_server_url , timeout=120)

# Load custom dataset from CSV
custom_dataset = pd.read_csv('Intent2Flow-Ryu.csv') #dataset for intent translation task.

# Ensure proper column names and format
if not {'instruction', 'output'}.issubset(custom_dataset.columns):
    raise ValueError("The dataset must have 'instruction' and 'output' columns.")

# Split into train and test (50/50 split for example)
trainset, testset = train_test_split(custom_dataset, test_size=0.5, random_state=42, shuffle=True)

# SYSTEM PROMPT (Manually define)
SYSTEM_PROMPT = TRANSLATION_PROMPT_RYU

In [32]:
csv_file = f"Intent_translation_accuracies_Ryu_{int(time.time())}.csv"
output_file = f"Intent_translation_details_Ryu_{int(time.time())}.txt"
# Open the file in write mode
file = open(output_file, "w")  # Use "w" for write mode

In [33]:
results = []

for num_examples in num_context_examples:
    for model in my_models_real:

        correct_translations = 0
        total_samples = len(testset)
        processing_times = []

        # create example selector with one example, then clear the data and add all examples
        # this is a trick to reset data and remove data from continuous learning in previous run
        example_selector = MaxMarginalRelevanceExampleSelector.from_examples(
            [{"instruction": trainset.iloc[0]["instruction"], "output": trainset.iloc[0]["output"]}],
            ollama_emb,
            Chroma,
            input_keys=["instruction"],
            k=num_examples,
            vectorstore_kwargs={"fetch_k": min(num_examples, len(trainset))}
            )

        # Clear and add all remaining examples from the trainset
        example_selector.vectorstore.reset_collection()
        for _, row in trainset.iterrows():
            example_selector.add_example({
                "instruction": row["instruction"],
                "output": row["output"]
            })

        for _, testcase in testset.iterrows():
            intent = testcase["instruction"]
            expected_output = testcase["output"]
            system_prompt = SYSTEM_PROMPT
            count = 0
            while True:
                count+=1
                try:
                    time.sleep(0.1)
                    current_time = time.time()
                    if num_examples > 0:
                        examples = example_selector.select_examples({"instruction": intent})
                        example_str = "\n\n\n".join(map(lambda x: "Input: " + x["instruction"] + "\n\nOutput: " + x["output"], examples))
                        system_prompt += example_str + "\n\n\n"  
                    
                    response = client.generate(
                        model=model,
                        options={
                            'temperature': 0.6,
                            'num_ctx': 8192,
                            'top_p': 0.3,
                            'num_predict': 1024,
                            'num_gpu': 99,
                            },
                        stream=False,
                        system=system_prompt,
                        prompt=intent,
                        format='json'
                    )
                    actual_output = response['response']
                    proc_time_s = (time.time() - current_time)
                    processing_times.append(proc_time_s)
                    break

                except Exception as e:
                    print(f"Error in generating translation for {model} with {num_examples} examples: {e}") 
                    sys.stdout.flush()
                    
                    if(count<15):
                        continue
                    else:
                        print("\n",model, " failed to produce valid JSON for translation info after 15 tries. Going to next model\n")
                        break

            try:
                expected_output = json.loads(expected_output)
                actual_output = json.loads(actual_output)

                #device_id = extract_switch_id_ONOS(intent)
                
                #for flow in actual_output.get("flows", []):  # Iterate over all flows
                    #flow["deviceId"] = device_id  # Replace the device ID

                # Check correctness

                cfg = CompareConfig(
                    require_same_dpid=True,
                    require_same_table=False,      # set True if you want strict table compare
                    require_same_priority=False,   # set True if you want strict priority compare
                    allow_pred_match_superset=False,
                    require_actions_equal=True
                )
                is_ok, details = compare_ryu_json_translation(actual_output, expected_output, cfg)

                #if compare_onos_json(expected_output, actual_output):
                if is_ok:
                    correct_translations += 1            

                file.write(f"Input: {intent}")
                file.write("\nExpected Output\n")
                file.write(f"Expected: {expected_output}")
                file.write("\nActual Output\n")
                file.write(f"Actual: {actual_output}")

                # Write the first output
                file.write(f"\nResult: model: {model}, num context examples: {num_examples}, processing time: {round(proc_time_s, 2)}\n")
                # Write the second output
                file.write(f"Diff: {jsondiff.diff(expected_output, actual_output)}\n\n")

            except Exception as e:
                print("Exception found in post-translation processing: ", e)

            print("========End of a testcase===========\n")
        print("\n************Next MODEL***********")
        file.write(f"*************************Next MODEL*************************\n\n")

        # Compute accuracy
        accuracy = round((correct_translations / total_samples) * 100, 2)
        avg_time = round(sum(processing_times) / total_samples, 2)

        # Store results
        results.append({
            "model": model,
            "context_example": num_examples,
            "num_test_samples": total_samples,
            "correct_translations": correct_translations,
            "accuracy (%)": accuracy,
            "avg_time_per_translation (s)": avg_time
            })

# Save to CSV
pd.DataFrame(results).to_csv(csv_file, mode='a', header=not os.path.exists(csv_file), index=False) 


# Close the file explicitly
file.close()



























************Next MODEL***********


























************Next MODEL***********


























************Next MODEL***********


























************Next MODEL***********


























************Next MODEL***********

Exception found in post-translation processing:  'int' object has no attribute 'upper'


Exception found in post-translation processing:  'int' object has no attribute 'upper'















Exception found in post-translation processing:  Expecting ':' delimiter: line 18 column 1 (char 758)








************Next MODEL***********


























************Next MODEL***********


























************Next MODEL***********


























************Next MODEL***********


























************Next MODEL***********


























************Next MODEL***********


























************Next MODEL***********




