In [None]:
import sys
sys.modules["sqlite3"] = sys.modules.pop("pysqlite3")
import time
import jsondiff
import pandas as pd
from langchain_chroma import Chroma
from langchain_core.example_selectors import MaxMarginalRelevanceExampleSelector
#from langchain_community.embeddings import OllamaEmbeddings
from langchain_ollama import OllamaEmbeddings
from ollama import Client
import json, jsondiff
from sklearn.model_selection import train_test_split

In [20]:
TRANSLATION_PROMPT = """Your task is to transform natural language network intents into JSON-formatted network policies compatible with the OpenDaylight (ODL) SDN controller's configuration datastore.

You only reply in JSON, no natural language. The network intents can represent different traffic control behaviors, such as:

a. **Traffic Forwarding and Queue-Based Traffic Control Rule:** Define rules for forwarding traffic based on IPv4 destination, TCP/UDP ports, and optionally assign traffic to specific queues for prioritization. 
b. **Firewall and Blocking:** Define rules to drop traffic based on specific match criteria (e.g., source IP, destination IP).  
c. **Port-Based Forwarding:** Redirect traffic entering a specific port to another designated port.

### JSON STRUCTURE:

1. **Traffic Forwarding and Queue-Based Traffic Control Rule:**  

```json
{
  "flow-node-inventory:flow": [
    {
      "id": "<unique_id>",
      "priority": <integer>,
      "table_id": <integer>,
      "flow-name": "<descriptive_name>",
      "hard-timeout": <integer>,
      "idle-timeout": <integer>,
      "match": {
        "ethernet-match": {
          "ethernet-type": {
            "type": 2048
          }
        },
        "ipv4-source": "<ip_address/mask>", //**optional**
        "ipv4-destination": "<ip_address/mask>", //**optional**
        "ip-match": {
          "ip-protocol": <integer>
        },
        "tcp-destination-port": <integer>,
        "udp-destination-port": <integer>
      },
      "instructions": {
        "instruction": [
          {
            "order": 0,
            "apply-actions": {
              "action": [
                {
                  "order": 0,
                  "set-queue-action": {
                    "queue-id": <queue_id>
                  }
                },
                {
                  "order": 1,
                  "output-action": {
                    "output-node-connector": "<port_number>"
                  }
                }
              ]
            }
          }
        ]
      }
    }
  ]
}

2. **Blocking or Dropping Rule:**  

{
  "flow-node-inventory:flow": [
    {
      "id": "<unique_id>",
      "priority": <integer>,
      "table_id": <integer>,
      "flow-name": "<descriptive_name>",
      "hard-timeout": <integer>,
      "idle-timeout": <integer>,
      "match": {
        "ethernet-match": {
          "ethernet-type": {
            "type": <integer>
          }
        },
        "ipv4-source": "<ip_address/mask>",
        "ipv4-destination": "<ip_address/mask>"
      },
      "instructions": {
        "instruction": [
          {
            "order": 0,
            "apply-actions": {
              "action": [
                {
                  "order": 0,
                  "drop-action": {}
                }
              ]
            }
          }
        ]
      }
    }
  ]
}

3. **Port-Based Forwarding Rule:**  

{
  "flow-node-inventory:flow": [
    {
      "id": "<unique_id>",
      "priority": <integer>,
      "table_id": <integer>,
      "flow-name": "<descriptive_name_summerizing_the_intent>",
      "hard-timeout": <integer>,
      "idle-timeout": <integer>,
      "match": {
        "in-port": <port_number>
      },
      "instructions": {
        "instruction": [
          {
            "order": 0,
            "apply-actions": {
              "action": [
                {
                  "order": 0,
                  "output-action": {
                    "output-node-connector": "<port_number>"
                  }
                }
              ]
            }
          }
        ]
      }
    }
  ]
}

Field Descriptions: 
id: A number representing a unique identifier for the flow (0 for default).
priority: Priority level (higher numbers indicate higher priority). For dropping or blocking or firewall rule, assign priority greater than 300.
table_id: An integer representing the flow table identifier (0 for default).
flow-name: A short, descriptive flow name that summerizes the intent.
hard-timeout: Timeout in seconds after which the flow is removed (0 for default).
idle-timeout: Timeout in seconds after which the flow is removed if there's no activity (0 for default).
ethernet-type: Ethernet protocol type (e.g., 2048 for IPv4).
ipv4-destination: IPv4 address in CIDR notation (e.g., 10.0.0.1/32). Note: This field is optional. Don't include it unless IP address (e.g., 10.0.0.1) is explicitly mentioned in the intent.
ipv4-source: Source IP address (optional). Note: This field is optional. Don't include it unless IP address (e.g., 10.0.0.1) is explicitly mentioned in the intent.
ip-protocol: Use "ip-match" and "ip-protocol" when specifying specific transport layer protocols (e.g., 6 for TCP, 17 for UDP, 1 for ICMP (optional)).
tcp-source-port: TCP port number (optional).
tcp-destination-port: TCP port number (optional).
udp-source-port: UDP port number (optional).
udp-destination-port: UDP port number (optional).
in-port: A value representing incoming interface port number (optional).
output-action: Use "output-action" and "output-node-connector" to specify the output port number (optional).
set-queue-action: Use "set-queue-action" and "queue-id" when the intent specifies assigning traffic to a queue (The "queue-id" is an integer (0 for default)).
drop-action: Use {} to indicate packet dropping.

RULES: 
Each id must be unique.
Set priority values appropriately (higher for critical rules, lower for defaults). Set priority very high (e.g., 500) for queue related rules.
Don't include any **optional field** unless it is explicitly mentioned in the intent.
Use valid match conditions (ipv4-destination, tcp-destination-port, ipv4-source, in-port) depending on the intent type.
Ensure valid ODL-compliant JSON syntax.
Avoid duplicate keys and empty fields.
Verify JSON structure for correctness before responding.
Always respond with valid JSON only, with no additional text, comments, or explanations.
If the intent cannot be mapped, return an empty JSON object {}."""

In [21]:
my_models = [
"marco-o1",
"mistral",
"mistral-nemo",
"deepseek-coder",
"starcoder", 
"codegemma",
"starcoder2",
"openchat",
"phi3",
"dolphin-mistral",
"wizardlm2",
"phi",
"yi",
"zephyr",
"command-r",
"llava-llama3",
"codestral",
"codellama:34b",
"codellama",
"llama2",
"llama3",
"llama3.1",
"llama3.2",
"qwen",
"qwen2",
"qwen2.5",
"gemma2:27b",
"huihui_ai/qwq-abliterated",
"huihui_ai/qwq-fusion",
"qwq"
]

#"tinyllama", "orca-mini"

default_model = "llama2"

In [None]:
num_examples = [0]
ollama_embedding_url = "http://localhost:11434"
ollama_server_url = "http://localhost:11435"  

In [23]:
ollama_emb = OllamaEmbeddings(
    model=default_model,
    base_url=ollama_embedding_url,
)

client = Client(host=ollama_server_url , timeout=120)

# Load custom dataset from CSV
custom_dataset = pd.read_csv('custom_ODL_intent_dataset_formal_Final.csv')

# Ensure proper column names and format
if not {'instruction', 'output'}.issubset(custom_dataset.columns):
    raise ValueError("The dataset must have 'instruction' and 'output' columns.")

# Split into train and test (50/50 split for example)
trainset, testset = train_test_split(custom_dataset, test_size=0.5, random_state=42, shuffle=True)

# SYSTEM PROMPT (Manually define)
SYSTEM_PROMPT = TRANSLATION_PROMPT

In [None]:
# Define the file path
output_file = "output_results_ODL_intent_customDataset_translate.txt"
# Open the file in write mode
file = open(output_file, "w")  # Use "w" for write mode

for num_examples in num_examples:
    for model in my_models:

        # create example selector with one example, then clear the data and add all examples
        # this is a trick to reset data and remove data from continuous learning in previous run
        example_selector = MaxMarginalRelevanceExampleSelector.from_examples(
            [{"instruction": trainset.iloc[0]["instruction"], "output": trainset.iloc[0]["output"]}],
            ollama_emb,
            Chroma,
            input_keys=["instruction"],
            k=num_examples,
            vectorstore_kwargs={"fetch_k": min(num_examples, len(trainset))}
            )

        # Clear and add all remaining examples from the trainset
        example_selector.vectorstore.reset_collection()
        for _, row in trainset.iterrows():
            example_selector.add_example({
                "instruction": row["instruction"],
                "output": row["output"]
            })

        for _, testcase in testset.iterrows():
            intent = testcase["instruction"]
            expected_output = testcase["output"]
        
            system_prompt = SYSTEM_PROMPT

            while True:
                try:
                    time.sleep(0.1)
                    current_time = time.time()
                    if num_examples > 0:
                        examples = example_selector.select_examples({"instruction": intent})
                        example_str = "\n\n\n".join(map(lambda x: "Input: " + x["instruction"] + "\n\nOutput: " + x["output"], examples))
                        system_prompt += example_str + "\n\n\n"  
                    
                    response = client.generate(model=model,
                        options={
                            'temperature': 0.6,
                            'num_ctx': 8192,
                            'top_p': 0.3,
                            'num_predict': 1024,
                            'num_gpu': 99,
                            },
                        stream=False,
                        system=system_prompt,
                        prompt=intent,
                        format='json'
                    )
                    actual_output = response['response']
                    proc_time_s = (time.time() - current_time)
                    break

                except Exception as e:
                    print("Exception on Input: ", e)
                    sys.stdout.flush()
                    continue

            try:
                expected_output = json.loads(expected_output)
                actual_output = json.loads(actual_output)

                # Write the first output
                file.write(f"\nResult: model: {model}, num context examples: {num_examples}, processing time: {round(proc_time_s, 2)}\n")
                # Write the second output
                file.write(f"\nDiff: {jsondiff.diff(expected_output, actual_output)}\n")

            except Exception as e:
                print("Exception found: ", e)

            print("\n========End of a testcase===========\n")
        print("\n************Next MODEL***********")

# Close the file explicitly
file.close()