In [7]:
import pysqlite3
import sys
sys.modules["sqlite3"] = sys.modules.pop("pysqlite3")
import time
import jsondiff
import pandas as pd
import os
import re
from langchain_chroma import Chroma
from langchain_core.example_selectors import MaxMarginalRelevanceExampleSelector
#from langchain_community.embeddings import OllamaEmbeddings
from langchain_ollama import OllamaEmbeddings
from ollama import Client
import json, jsondiff
from sklearn.model_selection import train_test_split
import copy

In [8]:
TRANSLATION_PROMPT_ONOS = """Your task is to transform natural language network intents into JSON-formatted network policies compatible with the ONOS SDN controller.

You only reply in JSON, no natural language. The network intents can represent different traffic control behaviors, such as:

1. **Traffic Forwarding, Queue Assignment, and VLAN Rules:** Define rules for forwarding traffic based on IPv4/IPv6 destination, TCP/UDP ports, and optionally assign traffic to specific queues or vlans.
2. **Blocking or Dropping Rule:** Define rules to drop traffic based on specific match criteria (e.g., source IP, destination IP). In ONOS, this is done by omitting the `"treatment"` field.

### **JSON STRUCTUREs FOR ONOS**

1. **Traffic Forwarding, Queue Assignment, and VLAN Rules:**  

```json
{
    "flows": [
        {
            "priority": <integer>,
            "timeout": <integer>, // Default: 0
            "isPermanent": "true",
            "deviceId": "<switch_id>",
            "treatment": {
                "instructions": [
                    {
                        "type": "QUEUE",
                        "queueId": <integer>
                    },
                    {
                        "type": "L2MODIFICATION",
                        "subtype": "VLAN_ID",
                        "vlanId": <integer> // Example: 100 for VLAN tagging
                    },
                    {
                        "type": "OUTPUT",
                        "port": "<integer>"
                    }
                ]
            },
            "selector": {
                "criteria": [
                    {
                        "type": "ETH_TYPE",
                        "ethType": "<string>" // Example: "0x800" for IPv4
                    },
                    {
                        "type": "IPV4_SRC",
                        "ip": "<ip_address/mask>"
                    },
                    {
                        "type": "IPV4_DST",
                        "ip": "<ip_address/mask>"
                    },
                    {
                        "type": "IP_PROTO",
                        "protocol": <integer> // Example: 6 for TCP, 17 for UDP
                    },
                    {
                        "type": "TCP_DST",
                        "tcpPort": <integer>
                    },
                    {
                        "type": "UDP_DST",
                        "udpPort": <integer>
                    },
                    {
                        "type": "IN_PORT",
                        "port": "<integer>"
                    }
                ]
            }
        }
    ]
}


2. **Blocking or Dropping Rule:**

{
    "flows": [
        {
            "priority": <integer>,
            "timeout": 0,
            "isPermanent": "true",
            "deviceId": "<switch_id>",
            "selector": {
                "criteria": [
                    {
                        "type": "ETH_TYPE",
                        "ethType": "<string>" // Example: "0x800" for IPv4
                    },
                    {
                        "type": "IPV4_SRC",
                        "ip": "<ip_address/mask>"
                    },
                    {
                        "type": "IPV4_DST",
                        "ip": "<ip_address/mask>"
                    }
                ]
            }
        }
    ]
}

Field Descriptions
priority (Mandatory): Priority level (higher numbers indicate higher priority). For blocking or firewall rules, assign a priority greater than 300.
timeout (Mandatory): Timeout in seconds after which the flow is removed (Default: 0).
isPermanent (Mandatory): "true" (always in quotes, per user preference).
deviceId (Mandatory): Switch ID where the rule is installed.
ethType (Mandatory): Ethernet protocol type. Use "0x800" for IPv4, "0x86DD" for IPv6, "0x806" for ARP.
IPV4_DST (Optional): IPv4 address in CIDR notation (e.g., "10.0.0.1/32"). Include only if explicitly mentioned.
IPV4_SRC (Optional): Source IP address (include only if explicitly mentioned).
IP_PROTO (Optional): Transport layer protocol (6 for TCP, 17 for UDP, 1 for ICMP).
TCP_DST (Optional): TCP destination port (e.g., 80 for HTTP).
UDP_DST (Optional): UDP destination port (e.g., 161 for SNMP).
IN_PORT (Optional): Incoming interface port number (use in port-based forwarding).
QUEUE (Optional): Use "QUEUE" with "queueId" to specify a QoS queue (queue ID is an integer, 0 is default).
OUTPUT (Optional): "OUTPUT" with "port" specifies the output port.
VLAN_ID (Optional): Use "L2MODIFICATION" with "subtype": "VLAN_ID" and "vlanId" to set a VLAN tag.

Rules for Translation
Each "priority" must be unique.
Set priority high (e.g., 1000) for queue-related rules.
Do not include VLAN-related fields unless explicitly mentioned in the intent.
Do not include optional fields unless explicitly mentioned in the intent.
Ensure valid ONOS-compliant JSON syntax.
Verify JSON structure before responding.
Always respond in valid JSON format only, without comments, explanations, or additional text.
If the intent cannot be mapped, return an empty JSON object {}."""

In [None]:
def extract_switch_id_ONOS(intent: str):
    """
    Extract the switch ID from a natural language intent for ONOS JSON format.
    
    Parameters:
        intent (str): The natural language intent.
    
    Returns:
        str: Extracted switch ID in ONOS format (e.g., 'of:0000000000000001') or None if not found.
    """
    # Mapping of ordinal words to numeric values
    ordinals = {
        "first": 1,
        "second": 2,
        "third": 3,
        "fourth": 4,
        "fifth": 5,
        "sixth": 6,
        "seventh": 7,
        "eighth": 8,
        "ninth": 9,
        "tenth": 10
    }

    # Match patterns like 'openflow:1' (convert from OpenFlow to ONOS format)
    match = re.search(r'openflow[:\s](\d+)', intent, re.IGNORECASE)
    if match:
        switch_number = int(match.group(1))
        return f"of:{switch_number:016x}"  # Convert to ONOS 16-digit hex format

    # Match patterns like 'switch 1', 'router 2', 'node 3'
    match = re.search(r'\b(?:switch|router|node|openflow|device)(?:\s*number)?\s*(\d+)', intent, re.IGNORECASE)
    if match:
        switch_number = int(match.group(1))
        return f"of:{switch_number:016x}"  # Convert to ONOS 16-digit hex format

    # Match ordinal words (e.g., 'fourth switch', 'second router')
    match = re.search(r'\b(?:switch|router|node|openflow|device)\s*(\w+)', intent, re.IGNORECASE)
    if match:
        ordinal_word = match.group(1).lower()
        if ordinal_word in ordinals:
            switch_number = ordinals[ordinal_word]
            return f"of:{switch_number:016x}"  # Convert to ONOS 16-digit hex format

    # Match standalone ordinal words (e.g., 'fourth' without 'switch')
    for word, number in ordinals.items():
        if word in intent.lower():
            return f"of:{number:016x}"  # Convert to ONOS 16-digit hex format

    return None

def normalize_value(value):
    """Normalize numeric values, including hex representations, for fair comparison."""
    if isinstance(value, str):
        # Handle hex values (like '0x0800' vs '0x800')
        if value.lower().startswith("0x"):
            return f"0x{int(value, 16):x}"  # Convert to int, then back to lowercase hex
        # Convert numeric strings to integers
        elif value.isdigit():
            return int(value)
    return value

def dict_equal_ignore_order(d1, d2, ignore_fields=set()):
    """Recursively compare dictionaries ignoring order and normalizing numeric & hex fields."""

    if isinstance(d1, dict) and isinstance(d2, dict):
        keys1 = set(d1.keys()) - ignore_fields
        keys2 = set(d2.keys()) - ignore_fields

        if keys1 != keys2:
            #print(f" Mismatch in keys!\nExpected Keys: {keys1}\nActual Keys: {keys2}")
            return False  

        for k in keys1:
            if not dict_equal_ignore_order(normalize_value(d1[k]), normalize_value(d2[k]), ignore_fields):
                #print(f" Value Mismatch in Key: {k}\nExpected: {d1[k]}\nActual: {d2[k]}")
                return False
        return True

    elif isinstance(d1, list) and isinstance(d2, list):
        # Normalize each item inside the lists before comparison
        normalized_d1 = sorted([normalize_value(item) if not isinstance(item, dict) 
                                else {k: normalize_value(v) for k, v in item.items()} for item in d1], key=str)
        normalized_d2 = sorted([normalize_value(item) if not isinstance(item, dict) 
                                else {k: normalize_value(v) for k, v in item.items()} for item in d2], key=str)

        if normalized_d1 != normalized_d2:  
            #print(f" List Mismatch!\nExpected: {normalized_d1}\nActual: {normalized_d2}")
            return False
        return True

    else:
        # Direct value comparison with normalization
        if normalize_value(d1) != normalize_value(d2):  
            #print(f" Primitive Value Mismatch!\nExpected: {d1}\nActual: {d2}")
            return False
        return True

def compare_onos_json(expected_json, actual_json):
    """
    Compare expected ONOS JSON with actual ONOS JSON from LLM output.

    Returns:
        bool: True if translation is correct, False otherwise.
    """

    # Define fields that must match exactly (excluding priority)
    exact_match_fields = ["deviceId", "isPermanent", "treatment", "selector"]

    # Define fields to ignore completely (EXCLUDING priority and timeout)
    ignore_fields = {"id", "appId", "life", "packets", "bytes", "lastSeen", "groupId", "liveType", "state"}

    #  Deep copy the JSONs before modifying
    expected_json = copy.deepcopy(expected_json)
    actual_json = copy.deepcopy(actual_json)

    #print("\n BEFORE CLEANING:")
    #print("Expected JSON:", expected_json)
    #print("Actual JSON:", actual_json)

    #  Remove ignored fields from both JSONs (priority and timeout are NOT removed)
    def clean_json(json_obj):
        for flow in json_obj.get("flows", []):
            for field in ignore_fields:
                flow.pop(field, None)
        return json_obj

    expected_json = clean_json(expected_json)
    actual_json = clean_json(actual_json)

    #print("\n AFTER CLEANING:")
    #print("Expected JSON:", expected_json)
    #print("Actual JSON:", actual_json)

    # Normalize all numbers in both JSONs before comparison
    def normalize_json(json_obj):
        for flow in json_obj.get("flows", []):
            for key in ["priority", "timeout"]:  # Normalize relevant numeric fields
                if key in flow:
                    flow[key] = int(flow[key]) if isinstance(flow[key], str) and flow[key].isdigit() else flow[key]
            if "treatment" in flow:
                for action in flow["treatment"].get("instructions", []):
                    if "port" in action:
                        action["port"] = int(action["port"]) if isinstance(action["port"], str) and action["port"].isdigit() else action["port"]
            if "selector" in flow:
                for criterion in flow["selector"].get("criteria", []):
                    if "port" in criterion:
                        criterion["port"] = int(criterion["port"]) if isinstance(criterion["port"], str) and criterion["port"].isdigit() else criterion["port"]
        return json_obj

    expected_json = normalize_json(expected_json)
    actual_json = normalize_json(actual_json)

    #  Separate Check: Ensure "priority" exists and is a valid number (but do not require an exact match)
    for flow in actual_json.get("flows", []):
        if ("priority" not in flow) or (not isinstance(flow["priority"], (int, str))):
            #print(" Priority Missing or Invalid!")
            return False  #  Fail if priority is missing or invalid

    #  Strictly check timeout field (must match exactly)
    for flow in actual_json.get("flows", []):
        expected_timeout = expected_json["flows"][0].get("timeout", 0)
        actual_timeout = flow.get("timeout", 0)

        if expected_timeout != actual_timeout:
            #print(f" Timeout Mismatch!\nExpected: {expected_timeout}\nActual: {actual_timeout}")
            return False  #  Timeout must match exactly

    #  Strictly compare exact match fields, ignoring priority
    for field in exact_match_fields:
        if field in expected_json["flows"][0] and field in actual_json["flows"][0]:
            #print(f"\n Comparing Field: {field}")
            if not dict_equal_ignore_order(expected_json["flows"][0][field], actual_json["flows"][0][field], ignore_fields):
                #print(f" Mismatch in Field: {field}")
                return False  #  Fail if any exact match field differs

    return True

In [10]:
# expected_json = {'flows': [{'priority': 80, 'timeout': 0, 'isPermanent': 'true', 'deviceId': 'of:0000000000000004', 'treatment': {'instructions': [{'type': 'OUTPUT', 'port': '1'}]}, 'selector': {'criteria': [{'type': 'ETH_TYPE', 'ethType': '0x800'}]}}]}
# actual_json = {'flows': [{'priority': '', 'timeout': '0', 'isPermanent': 'true', 'deviceId': 'of:0000000000000004', 'treatment': {'instructions': [{'type': 'OUTPUT', 'port': 1}]}, 'selector': {'criteria': [{'type': 'ETH_TYPE', 'ethType': '0x800'}]}}]}

# print(compare_onos_json(expected_json, actual_json))  # ✅ Should return True

# expected_json = {'flows': [{'priority': 102, 'timeout': 0, 'isPermanent': 'true', 'deviceId': 'of:0000000000000001', 'treatment': {'instructions': [{'type': 'OUTPUT', 'port': '3'}]}, 'selector': {'criteria': [{'type': 'ETH_TYPE', 'ethType': '0x0800'}, {'type': 'IP_PROTO', 'protocol': 1}, {'type': 'IPV4_DST', 'ip': '10.0.0.1/32'}]}}]}
# actual_json = {'flows': [{'priority': '501', 'timeout': '0', 'isPermanent': 'true', 'deviceId': 'of:0000000000000001', 'treatment': {'instructions': [{'type': 'OUTPUT', 'port': 3}]}, 'selector': {'criteria': [{'type': 'ETH_TYPE', 'ethType': '0x0800'}, {'type': 'IP_PROTO', 'protocol': '1'}, {'type': 'IPV4_DST', 'ip': '10.0.0.1/32'}]}}]}

# print(compare_onos_json(expected_json, actual_json))  # ✅ Should return True

# expected_json = {'flows': [{'priority': 110, 'timeout': 0, 'isPermanent': 'true', 'deviceId': 'of:0000000000000001', 'treatment': {'instructions': [{'type': 'OUTPUT', 'port': '2'}]}, 'selector': {'criteria': [{'type': 'ETH_TYPE', 'ethType': '0x800'}, {'type': 'IPV4_DST', 'ip': '10.0.0.3/32'}, {'type': 'IP_PROTO', 'protocol': 6}, {'type': 'TCP_DST', 'tcpPort': 80}]}}]}
# actual_json = {'flows': [{'priority': 150, 'timeout': 0, 'isPermanent': 'true', 'deviceId': 'of:0000000000000001', 'treatment': {'instructions': [{'type': 'OUTPUT', 'port': '2'}]}, 'selector': {'criteria': [{'type': 'ETH_TYPE', 'ethType': '0x800'}, {'type': 'IP_PROTO', 'protocol': 6}, {'type': 'TCP_DST', 'tcpPort': 80}, {'type': 'IPV4_DST', 'ip': '10.0.0.3/32'}]}}]}

# print(compare_onos_json(expected_json, actual_json))  # ✅ Should return True

In [None]:
my_models_real = [
"marco-o1",
"mistral",
"mistral-nemo",
"deepseek-coder",
"starcoder", 
"codegemma",
"starcoder2",
"openchat",
"phi3",
"dolphin-mistral",
"wizardlm2",
"phi",
"yi",
"zephyr",
"command-r",
"llava-llama3",
"codestral",
"codellama:34b",
"codellama",
"llama2",
"llama3",
"llama3.1",
"llama3.2",
"qwen",
"qwen2",
"qwen2.5",
"gemma2:27b",
"huihui_ai/qwq-abliterated",
"huihui_ai/qwq-fusion",
"qwq",
"llama3.3",
"llama2:70b",
"codellama:70b"
]

default_model = "llama2"

num_context_examples = [0,1,3,6,9]

In [12]:
ollama_embedding_url = "http://localhost:11434"
ollama_server_url = "http://localhost:11435"  

In [None]:
ollama_emb = OllamaEmbeddings(
    model=default_model,
    base_url=ollama_embedding_url,
)

client = Client(host=ollama_server_url , timeout=120)

# Load custom dataset from CSV
custom_dataset = pd.read_csv('ONOS_intent_translation_dataset_for_LLM_Evaluation.csv') #dataset for intent translation task.

# Ensure proper column names and format
if not {'instruction', 'output'}.issubset(custom_dataset.columns):
    raise ValueError("The dataset must have 'instruction' and 'output' columns.")

# Split into train and test (50/50 split for example)
trainset, testset = train_test_split(custom_dataset, test_size=0.5, random_state=42, shuffle=True)

# SYSTEM PROMPT (Manually define)
SYSTEM_PROMPT = TRANSLATION_PROMPT_ONOS

In [None]:
csv_file = f"Intent_translation_accuracies_ONOS_{int(time.time())}.csv"
output_file = f"Intent_translation_details_ONOS_{int(time.time())}.txt"
# Open the file in write mode
file = open(output_file, "w")  # Use "w" for write mode

In [None]:
results = []

for num_examples in num_context_examples:
    for model in my_models_real:

        correct_translations = 0
        total_samples = len(testset)
        processing_times = []

        # create example selector with one example, then clear the data and add all examples
        # this is a trick to reset data and remove data from continuous learning in previous run
        example_selector = MaxMarginalRelevanceExampleSelector.from_examples(
            [{"instruction": trainset.iloc[0]["instruction"], "output": trainset.iloc[0]["output"]}],
            ollama_emb,
            Chroma,
            input_keys=["instruction"],
            k=num_examples,
            vectorstore_kwargs={"fetch_k": min(num_examples, len(trainset))}
            )

        # Clear and add all remaining examples from the trainset
        example_selector.vectorstore.reset_collection()
        for _, row in trainset.iterrows():
            example_selector.add_example({
                "instruction": row["instruction"],
                "output": row["output"]
            })

        for _, testcase in testset.iterrows():
            intent = testcase["instruction"]
            expected_output = testcase["output"]
            system_prompt = SYSTEM_PROMPT
            count = 0
            while True:
                count+=1
                try:
                    time.sleep(0.1)
                    current_time = time.time()
                    if num_examples > 0:
                        examples = example_selector.select_examples({"instruction": intent})
                        example_str = "\n\n\n".join(map(lambda x: "Input: " + x["instruction"] + "\n\nOutput: " + x["output"], examples))
                        system_prompt += example_str + "\n\n\n"  
                    
                    response = client.generate(
                        model=model,
                        options={
                            'temperature': 0.6,
                            'num_ctx': 8192,
                            'top_p': 0.3,
                            'num_predict': 1024,
                            'num_gpu': 99,
                            },
                        stream=False,
                        system=system_prompt,
                        prompt=intent,
                        format='json'
                    )
                    actual_output = response['response']
                    proc_time_s = (time.time() - current_time)
                    processing_times.append(proc_time_s)
                    break

                except Exception as e:
                    print(f"Error in generating translation for {model} with {num_examples} examples: {e}") 
                    sys.stdout.flush()
                    
                    if(count<15):
                        continue
                    else:
                        print("\n",model, " failed to produce valid JSON for translation info after 15 tries. Going to next model\n")
                        break

            try:
                expected_output = json.loads(expected_output)
                actual_output = json.loads(actual_output)

                device_id = extract_switch_id_ONOS(intent)
                
                for flow in actual_output.get("flows", []):  # Iterate over all flows
                    flow["deviceId"] = device_id  # Replace the device ID

                # Check correctness
                if compare_onos_json(expected_output, actual_output):
                    correct_translations += 1            

                file.write(f"Input: {intent}")
                file.write("\nExpected Output\n")
                file.write(f"Expected: {expected_output}")
                file.write("\nActual Output\n")
                file.write(f"Actual: {actual_output}")

                # Write the first output
                file.write(f"\nResult: model: {model}, num context examples: {num_examples}, processing time: {round(proc_time_s, 2)}\n")
                # Write the second output
                file.write(f"Diff: {jsondiff.diff(expected_output, actual_output)}\n\n")

            except Exception as e:
                print("Exception found in post-translation processing: ", e)

            print("========End of a testcase===========\n")
        print("\n************Next MODEL***********")
        file.write(f"*************************Next MODEL*************************\n\n")

        # Compute accuracy
        accuracy = round((correct_translations / total_samples) * 100, 2)
        avg_time = round(sum(processing_times) / total_samples, 2)

        # Store results
        results.append({
            "model": model,
            "context_example": num_examples,
            "num_test_samples": total_samples,
            "correct_translations": correct_translations,
            "accuracy (%)": accuracy,
            "avg_time_per_translation (s)": avg_time
            })

# Save to CSV
pd.DataFrame(results).to_csv(csv_file, mode='a', header=not os.path.exists(csv_file), index=False) 


# Close the file explicitly
file.close()