**Installations**

In [None]:
%%capture --no-stderr
%pip install -U langgraph langchain langchain-community langchain-openai openevals langsmith openai tiktoken pycountry

In [None]:
!sudo apt-get update
!sudo apt-get install -y curl apt-transport-https ssl-cert ca-certificates gnupg lsb-release
!curl -1sLf 'https://dl.cloudsmith.io/public/wand/libwandio/cfg/setup/bash.deb.sh' | sudo -E bash
!echo "deb https://pkg.caida.org/os/$(lsb_release -si|awk '{print tolower($0)}') $(lsb_release -sc) main" | sudo tee /etc/apt/sources.list.d/caida.list
!sudo wget -O /etc/apt/trusted.gpg.d/caida.gpg https://pkg.caida.org/os/ubuntu/keyring.gpg
!sudo apt update; sudo apt-get install bgpstream

In [None]:
!pip install pybgpstream
!python3 -m pip install pybgpkit-parser
!python3 -m pip install pybgpkit
!pip install neo4j

**Mounting Google Drive**

In [None]:
from google.colab import drive
drive.mount('/content/drive')
%cd drive/MyDrive/LLM4BGP

Mounted at /content/drive
/content/drive/MyDrive/LLM4BGP


**Importing Dependencies**

In [None]:
import getpass
from pandas import DataFrame
import os
import json
import ast
import re
import yaml
import subprocess
from openai import OpenAI
from pathlib import Path
from langchain_openai import ChatOpenAI

**API Keys Management**

In [None]:
os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API key: ")

Enter your OpenAI API key: ··········


In [None]:
from langsmith import wrappers
from langsmith import Client
from evaluations.lang_smith_aux import *

In [None]:
os.environ["LANGSMITH_API_KEY"] = getpass.getpass("Enter your LangSmith API key: ")

Enter your LangSmith API key: ··········


In [None]:
os.environ['LANGSMITH_TRACING'] = 'true'
os.environ['LANGSMITH_PROJECT'] = 'test'
os.environ['LANGSMITH_ENDPOINT'] = 'https://api.smith.langchain.com'
client = Client(api_key=os.environ['LANGSMITH_API_KEY'])

**Loading Models**

In [None]:
model = ChatOpenAI(model_name="gpt-4.1")

**Datasets Update**

In [None]:
from datasets.datasets_preparation import *

In [None]:
# !LATEST_YEAR=$(curl -s https://publicdata.caida.org/datasets/peeringdb/ | grep -oP '(?<=href=")[0-9]{4}/' | sort -r | head -n1) && \
# LATEST_MONTH=$(curl -s https://publicdata.caida.org/datasets/peeringdb/${LATEST_YEAR} | grep -oP '(?<=href=")[0-9]{2}/' | sort -r | head -n1) && \
# LATEST_FILE=$(curl -s https://publicdata.caida.org/datasets/peeringdb/${LATEST_YEAR}${LATEST_MONTH} | grep -oP 'peeringdb_2_dump_[0-9_]+\.json' | sort -r | head -n1) && \
# wget -q https://publicdata.caida.org/datasets/peeringdb/${LATEST_YEAR}${LATEST_MONTH}${LATEST_FILE} \
#      -O tools/peeringdb/peeringdb_latest.json && \
# echo "Saved → tools/peeringdb/peeringdb_latest.json"

In [None]:
Q_A_DATASET_SIZE = 50
prepare_orgs_dataset(Q_A_DATASET_SIZE)

ASN:1124, Organization:Universiteit van Amsterdam
ASN:31742, Organization:Sota Solutions Ltd.
ASN:53403, Organization:Mount Royal University
ASN:370, Organization:DoD Network Information Center
ASN:160, Organization:The University of Chicago
ASN:28545, Organization:Cablemas Telecomunicaciones SA de CV
ASN:16050, Organization:Refinitiv Limited
ASN:34606, Organization:B.B.Bell SPA
ASN:23650, Organization:China Telecom
ASN:13438, Organization:Vivio Technologies
ASN:37363, Organization:Faircape
ASN:13445, Organization:Cisco Webex LLC
ASN:38341, Organization:China Internet Network Infomation Center
ASN:11841, Organization:Ultimate Internet Access, Inc
ASN:2379, Organization:CenturyLink Communications, LLC
ASN:13323, Organization:FMR LLC
ASN:17623, Organization:China Unicom
ASN:14335, Organization:VC3, Inc.
ASN:4587, Organization:One World Internetworking, Inc
ASN:45916, Organization:Gujarat Telelink Pvt Ltd
ASN:18245, Organization:China Internet Network Infomation Center
ASN:17603, Organiza

**Task 1 - Largest AS in a given organization**

In [None]:
PROMPT_FILE = Path("prompts/multi_source_knowledge_retreival/largest_as2org_prompts.yaml")

In [None]:
with PROMPT_FILE.open(encoding="utf-8") as fp:
    prompts = yaml.safe_load(fp)

In [None]:
largest_as_in_org_dataset_name = "Largest AS in Org Q&A dataset"

In [None]:
datasets = client.list_datasets()
largest_as_in_org_dataset = None
for dataset in datasets:
  if dataset.name == largest_as_in_org_dataset_name:
    largest_as_in_org_dataset = dataset
    break

if largest_as_in_org_dataset is None:
    # Handle the case where the dataset is not found, perhaps create it
    print(f"Dataset '{largest_as_in_org_dataset_name}' not found.")
    # You might want to add logic here to create the dataset if it doesn't exist
    largest_as_in_org_dataset = client.create_dataset(
    dataset_name=largest_as_in_org_dataset_name, description="Largest AS in Org Q&A dataset in LangSmith."
    )
    largest_as_in_org_dataset_path = 'datasets/as2org/largest_as2org_qas.json'
    with open(largest_as_in_org_dataset_path, 'r') as f:
        asn_cone_q_a = json.load(f)
    examples = convert_to_examples(asn_cone_q_a)
    client.create_examples(dataset_id=largest_as_in_org_dataset.id, examples=examples)
    print(f"Dataset '{largest_as_in_org_dataset_name}' created.")

Dataset 'Largest AS in Org Q&A dataset' not found.
Dataset 'Largest AS in Org Q&A dataset' created.


In [None]:
largest_as2org_context = prompts["largest_as2org_context"]
largest_as2org_hall_prompt = prompts["largest_as2org_hall_prompt"]

Direct Task Prompting

In [None]:
largest_as2org_direct_task_prompt = prompts["largest_as2org_direct_task_prompt"]

In [None]:
experiment = "zero-shot-prompting-largest-as-in-org"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  largest_as2org_direct_task_prompt,
                                  largest_as2org_hall_prompt,
                                  largest_as_in_org_dataset_name,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'zero-shot-prompting-largest-as-in-org-temp-0.0-8b32ec5e' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/1adba3ef-a9ce-4c9c-a361-8c7eb91a7997/compare?selectedSessions=33263fab-de88-4ac7-82bc-708bb5c19baf




0it [00:00, ?it/s]

Role-Based Prompting

In [None]:
largest_as2org_role_based_prompt = prompts["largest_as2org_role_based_prompt"]

In [None]:
experiment = "role-based-prompting-largest-as-in-org"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  largest_as2org_role_based_prompt,
                                  largest_as2org_hall_prompt,
                                  largest_as_in_org_dataset_name,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'role-based-prompting-largest-as-in-org-temp-0.0-a766a1e5' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/1adba3ef-a9ce-4c9c-a361-8c7eb91a7997/compare?selectedSessions=b20d7a8c-faf3-455d-a3e2-4906b63cefcb




0it [00:00, ?it/s]

Multi-Step Reasoning

In [None]:
multi_step_reasoning_prompt_for_largest_as2org = prompts["multi_step_reasoning_prompt_for_largest_as2org"]

In [None]:
experiment = "multi-step-prompting-largest-as-in-org"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  multi_step_reasoning_prompt_for_largest_as2org,
                                  largest_as2org_hall_prompt,
                                  largest_as_in_org_dataset_name,
                                  experiment,
                                   [])

View the evaluation results for experiment: 'multi-step-prompting-largest-as-in-org-temp-0.0-74215a46' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/1adba3ef-a9ce-4c9c-a361-8c7eb91a7997/compare?selectedSessions=df7e6713-1010-464d-acd7-6c9d1153166d




0it [00:00, ?it/s]

Few-Shot Learning

In [None]:
few_shot_prompt_for_largest_as2org = prompts["few_shot_prompt_for_largest_as2org"]

In [None]:
experiment = "few-shots-prompting-largest-as-in-org"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  few_shot_prompt_for_largest_as2org,
                                  largest_as2org_hall_prompt,
                                  largest_as_in_org_dataset_name,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'few-shots-prompting-largest-as-in-org-temp-0.0-514c8951' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/1adba3ef-a9ce-4c9c-a361-8c7eb91a7997/compare?selectedSessions=8ca9e429-64ca-47eb-9d42-578af27a2089




0it [00:00, ?it/s]

Chain-of-Thought (CoT) Prompting

In [None]:
cot_prompt_for_largest_as2org = prompts["cot_prompt_for_largest_as2org"]

In [None]:
experiment = "cot-prompting-largest-as-in-org"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  cot_prompt_for_largest_as2org,
                                  largest_as2org_hall_prompt,
                                  largest_as_in_org_dataset_name,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'cot-prompting-largest-as-in-org-temp-0.0-29453230' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/1adba3ef-a9ce-4c9c-a361-8c7eb91a7997/compare?selectedSessions=5c9b976f-a3f9-4fa1-81f4-c74efd6a4417




0it [00:00, ?it/s]

**Task 2 - Number of autonomous systems which organization owns**

In [None]:
PROMPT_FILE = Path("prompts/multi_source_knowledge_retreival/num_asns_org_prompts.yaml")

In [None]:
with PROMPT_FILE.open(encoding="utf-8") as fp:
    prompts = yaml.safe_load(fp)

In [None]:
as_count_dataset_name = "Number of ASes in Org Q&A dataset"

In [None]:
datasets = client.list_datasets()
as_count_dataset = None
for dataset in datasets:
  if dataset.name == as_count_dataset_name:
    as_count_dataset = dataset
    break

if as_count_dataset is None:
    # Handle the case where the dataset is not found, perhaps create it
    print(f"Dataset '{as_count_dataset_name}' not found.")
    # You might want to add logic here to create the dataset if it doesn't exist
    as_count_dataset = client.create_dataset(
    dataset_name=as_count_dataset_name, description="AS count in Org Q&A dataset in LangSmith."
    )
    as_count_dataset_path = 'datasets/as2org/num_of_as2org_qas.json'
    with open(as_count_dataset_path, 'r') as f:
        asn_cone_q_a = json.load(f)
    examples = convert_to_examples(asn_cone_q_a)
    client.create_examples(dataset_id=as_count_dataset.id, examples=examples)
    print(f"Dataset '{as_count_dataset_name}' created.")

In [None]:
as_count_hall_prompt = prompts["as_count_hall_prompt"]

Direct Task Prompting

In [None]:
direct_task_prompt_num_of_asns_org = prompts["direct_task_prompt_num_of_asns_org"]

In [None]:
experiment = "zero-shot-prompting-as-count"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  direct_task_prompt_num_of_asns_org,
                                  as_count_hall_prompt,
                                  as_count_dataset,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'zero-shot-prompting-as-count-temp-0.0-a763cdc6' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/e14443de-d24d-4d40-8d94-0b7f55921909/compare?selectedSessions=467c50c9-76f5-4600-9a78-8b2263a13489




0it [00:00, ?it/s]

Role-Based Prompting

In [None]:
role_based_prompt_num_of_asns_org = prompts["role_based_prompt_num_of_asns_org"]

In [None]:
experiment = "role-based-prompting-as-count"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  role_based_prompt_num_of_asns_org,
                                  as_count_hall_prompt,
                                  as_count_dataset,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'role-based-prompting-as-count-temp-0.0-e4d6ba83' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/e14443de-d24d-4d40-8d94-0b7f55921909/compare?selectedSessions=cd6d99c5-2110-45a0-ad1e-1595c56d7314




0it [00:00, ?it/s]

Multi-Step Reasoning

In [None]:
multi_step_reasoning_prompt_num_of_asns_org = prompts["multi_step_reasoning_prompt_num_of_asns_org"]

In [None]:
experiment = "multi-step-prompting-as-count"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  multi_step_reasoning_prompt_num_of_asns_org,
                                  as_count_hall_prompt,
                                  as_count_dataset,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'multi-step-prompting-as-count-temp-0.0-48cce567' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/e14443de-d24d-4d40-8d94-0b7f55921909/compare?selectedSessions=18e06791-4c24-4430-b494-8f6b3bb55e91




0it [00:00, ?it/s]

Few-Shot Learning

In [None]:
few_shots_prompt_num_of_asns_org = prompts["few_shots_prompt_num_of_asns_org"]

In [None]:
experiment = "few-shots-prompting-as-count"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  few_shots_prompt_num_of_asns_org,
                                  as_count_hall_prompt,
                                  as_count_dataset,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'few-shots-prompting-as-count-temp-0.0-bdabbc03' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/e14443de-d24d-4d40-8d94-0b7f55921909/compare?selectedSessions=df7a384b-9f98-41a3-8558-81dd991972b2




0it [00:00, ?it/s]

Chain-of-Thought (CoT) Prompting

In [None]:
cot_prompt_num_of_asns_org = prompts["cot_prompt_num_of_asns_org"]

In [None]:
experiment = "cot-prompting-as-count"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  cot_prompt_num_of_asns_org,
                                  as_count_hall_prompt,
                                  as_count_dataset,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'cot-prompting-as-count-temp-0.0-2f3646bb' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/e14443de-d24d-4d40-8d94-0b7f55921909/compare?selectedSessions=24261a23-e45a-4465-bfb0-f3762fbbd570




0it [00:00, ?it/s]

**Task 3 - AS2Org**

In [None]:
PROMPT_FILE = Path("prompts/multi_source_knowledge_retreival/as2org_prompts.yaml")

In [None]:
with PROMPT_FILE.open(encoding="utf-8") as fp:
    prompts = yaml.safe_load(fp)

In [None]:
as2org_dataset_name = "AS2Org Q&A dataset"

In [None]:
datasets = client.list_datasets()
as2org_dataset = None
for dataset in datasets:
  if dataset.name == as2org_dataset_name:
    as2org_dataset = dataset
    break

if as2org_dataset is None:
    # Handle the case where the dataset is not found, perhaps create it
    print(f"Dataset '{as2org_dataset_name}' not found.")
    # You might want to add logic here to create the dataset if it doesn't exist
    as2org_dataset = client.create_dataset(
    dataset_name=as2org_dataset_name, description="AS2Org Q&A dataset in LangSmith."
    )
    as2org_dataset_path = 'datasets/as2org/as2org_qas.json'
    with open(as2org_dataset_path, 'r') as f:
        asn_cone_q_a = json.load(f)
    examples = convert_to_examples(asn_cone_q_a)
    client.create_examples(dataset_id=as2org_dataset.id, examples=examples)
    print(f"Dataset '{as2org_dataset_name}' created.")

In [None]:
AS2ORG_HALLUCINATION_PROMPT = prompts["as2org_hall_prompt"]

Direct Task Prompting

In [None]:
direct_prompt_as2org = prompts["direct_task_prompt_as2org"]

In [None]:
experiment = "zero-shot-prompting-as2org"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  direct_prompt_as2org,
                                  AS2ORG_HALLUCINATION_PROMPT,
                                  as2org_dataset,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'zero-shot-prompting-as2org-temp-0.0-ff467a47' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/cf1de551-d0cb-4a03-960f-86ac05b84461/compare?selectedSessions=ecee5bc7-250d-4a77-bc6f-fb454600248e




0it [00:00, ?it/s]

Role-Based Prompting

In [None]:
role_based_prompt_as2org = prompts["role_based_prompt_as2org"]

In [None]:
experiment = "role-based-prompting-as2org"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  role_based_prompt_as2org,
                                  AS2ORG_HALLUCINATION_PROMPT,
                                  as2org_dataset,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'role-based-prompting-as2org-temp-0.0-10cd3526' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/cf1de551-d0cb-4a03-960f-86ac05b84461/compare?selectedSessions=1a24aedf-dc63-40db-89a8-aacf3126ff6f




0it [00:00, ?it/s]

Multi-Step Reasoning

In [None]:
multi_step_reasoning_prompt_as2org = prompts["multi_step_reasoning_prompt_as2org"]

In [None]:
experiment = "multi-step-prompting-as2org"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  multi_step_reasoning_prompt_as2org,
                                  AS2ORG_HALLUCINATION_PROMPT,
                                  as2org_dataset,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'multi-step-prompting-as2org-temp-0.0-1daeb800' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/cf1de551-d0cb-4a03-960f-86ac05b84461/compare?selectedSessions=1fd82a18-6d5f-42d2-9165-9b4ec2073d5d




0it [00:00, ?it/s]

Few-Shot Learning

In [None]:
few_shot_prompt_as2org = prompts["few_shot_prompt_as2org"]

In [None]:
experiment = "few-shots-prompting-as2org"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  few_shot_prompt_as2org,
                                  AS2ORG_HALLUCINATION_PROMPT,
                                  as2org_dataset,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'few-shots-prompting-as2org-temp-0.0-6db58c9e' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/cf1de551-d0cb-4a03-960f-86ac05b84461/compare?selectedSessions=b17f980f-2cc6-4fe1-aa96-2c92fa893114




0it [00:00, ?it/s]

Chain-of-Thought (CoT) Prompting

In [None]:
cot_prompt_as2org = prompts["cot_prompt_as2org"]

In [None]:
experiment = "cot-prompting-as2org"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  cot_prompt_as2org,
                                  AS2ORG_HALLUCINATION_PROMPT,
                                  as2org_dataset,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'cot-prompting-as2org-temp-0.0-793bc2b5' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/cf1de551-d0cb-4a03-960f-86ac05b84461/compare?selectedSessions=1f68c4e0-b124-40db-8b00-90081498206b




0it [00:00, ?it/s]