**Installations**

In [None]:
%%capture --no-stderr
%pip install -U pycountry langgraph langchain langchain-community langchain-openai openevals langsmith openai tiktoken

In [None]:
!sudo apt-get update
!sudo apt-get install -y curl apt-transport-https ssl-cert ca-certificates gnupg lsb-release
!curl -1sLf 'https://dl.cloudsmith.io/public/wand/libwandio/cfg/setup/bash.deb.sh' | sudo -E bash
!echo "deb https://pkg.caida.org/os/$(lsb_release -si|awk '{print tolower($0)}') $(lsb_release -sc) main" | sudo tee /etc/apt/sources.list.d/caida.list
!sudo wget -O /etc/apt/trusted.gpg.d/caida.gpg https://pkg.caida.org/os/ubuntu/keyring.gpg
!sudo apt update; sudo apt-get install bgpstream

In [None]:
!pip install pybgpstream
!python3 -m pip install pybgpkit-parser
!python3 -m pip install pybgpkit
!pip install neo4j
!pip install pycountry

**Mounting Google Drive**

In [None]:
from google.colab import drive
drive.mount('/content/drive')
%cd drive/MyDrive/LLM4BGP

Mounted at /content/drive
/content/drive/MyDrive/LLM4BGP


**Importing Dependencies**

In [None]:
import getpass
import os
import json
import ast
import re
import yaml
import subprocess
from pathlib import Path
from openai import OpenAI
from langchain_openai import ChatOpenAI

In [None]:
from langsmith import wrappers
from langsmith import Client
from evaluations.lang_smith_aux import *

**API Keys Management**

In [None]:
os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API key: ")

Enter your OpenAI API key: ··········


In [None]:
os.environ["LANGSMITH_API_KEY"] = getpass.getpass("Enter your LangSmith API key: ")

Enter your LangSmith API key: ··········


In [None]:
os.environ['LANGSMITH_TRACING'] = 'true'
os.environ['LANGSMITH_PROJECT'] = 'test'
os.environ['LANGSMITH_ENDPOINT'] = 'https://api.smith.langchain.com'
client = Client(api_key=os.environ['LANGSMITH_API_KEY'])

**Loading Models**

In [None]:
model = ChatOpenAI(model_name="gpt-4.1")

**Datasets Update**

In [None]:
from datasets.datasets_preparation import *

In [None]:
# Q_A_DATASET_SIZE = 50

# # AS Class Type
# prepare_as_type_datasets(Q_A_DATASET_SIZE)

# # AS Rels
# prepare_as_rels_q_a_dataset(Q_A_DATASET_SIZE)

# # Valley-Free
# as_rel_data_path = 'datasets/as_rel/20250701.as-rel2.txt'
# create_vf_inference_q_a_dataset(Q_A_DATASET_SIZE, as_rel_data_path)

Generated 50 paths: 48 Valley-Free, 2 Non-Valley-Free
Generated 50 valley-free Q&A pairs → datasets/valley_free/vf_qas.json


**Task 1 - AS class type inference**

In [None]:
PROMPT_FILE = Path("prompts/reasoning/as_class_type_prompts.yaml")

In [None]:
with PROMPT_FILE.open(encoding="utf-8") as fp:
    prompts = yaml.safe_load(fp)

In [None]:
as_class_type_dataset_name = "AS Class Type Q&A dataset"

In [None]:
datasets = client.list_datasets()
as_class_type_dataset = None
for dataset in datasets:
  if dataset.name == as_class_type_dataset_name:
    as_class_type_dataset = dataset
    break

if as_class_type_dataset is None:
    # Handle the case where the dataset is not found, perhaps create it
    print(f"Dataset '{as_class_type_dataset_name}' not found.")
    # You might want to add logic here to create the dataset if it doesn't exist
    as_class_type_dataset = client.create_dataset(
        dataset_name=as_class_type_dataset_name, description="CAIDA AS Class Type Q&A dataset in LangSmith."
    )
    as_class_type_dataset_path = 'datasets/as_class_type/caida_as_class_type_qas.json'
    with open(as_class_type_dataset_path, 'r') as f:
        largest_as_in_org_q_a = json.load(f)

    examples = convert_to_examples(largest_as_in_org_q_a)
    client.create_examples(dataset_id=as_class_type_dataset.id, examples=examples)
    print(f"Dataset '{as_class_type_dataset_name}' created.")

Dataset 'AS Class Type Q&A dataset' not found.
Dataset 'AS Class Type Q&A dataset' created.


Direct Task Prompting

In [None]:
direct_task_prompt_as_type = prompts["direct_task_prompt_as_type"]

In [None]:
experiment = "zero-shot-prompting-as-class-type"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  direct_task_prompt_as_type,
                                  " ",
                                  as_class_type_dataset,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'zero-shot-prompting-as-class-type-temp-0.0-cb02c665' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/5f49b9b0-6ebd-4bd8-93ee-dab2db740130/compare?selectedSessions=1ab49503-59fa-4d62-9cb6-4f155ed56ca2




0it [00:00, ?it/s]

Role-Based Prompting

In [None]:
role_based_prompt_as_type = prompts["role_based_prompt_as_type"]

In [None]:
experiment = "role-based-prompting-as-class-type"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  role_based_prompt_as_type,
                                  " ",
                                  as_class_type_dataset,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'role-based-prompting-as-class-type-temp-0.0-4abd4ec4' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/5f49b9b0-6ebd-4bd8-93ee-dab2db740130/compare?selectedSessions=547a2f42-0dd8-4897-81f5-7b1161b48094




0it [00:00, ?it/s]

Multi-Step Reasoning

In [None]:
multi_step_reasoning_prompt_as_type = prompts["multi_step_reasoning_prompt_as_type"]

In [None]:
experiment = "multi-step-prompting-as-class-type"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  multi_step_reasoning_prompt_as_type,
                                  " ",
                                  as_class_type_dataset,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'multi-step-prompting-as-class-type-temp-0.0-1fbc8e58' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/5f49b9b0-6ebd-4bd8-93ee-dab2db740130/compare?selectedSessions=4fc6ea5c-52ec-47d8-8c6f-d51d282d958e




0it [00:00, ?it/s]

Few-Shot Learning

In [None]:
few_shots_prompt_as_type = prompts["few_shots_prompt_as_type"]

In [None]:
experiment = "few-shots-prompting-as-class-type"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  few_shots_prompt_as_type,
                                  " ",
                                  as_class_type_dataset,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'few-shots-prompting-as-class-type-temp-0.0-e362d64b' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/5f49b9b0-6ebd-4bd8-93ee-dab2db740130/compare?selectedSessions=83519837-1455-46c5-9109-f39f4b0d0954




0it [00:00, ?it/s]

Chain-of-Thought (CoT) Prompting

In [None]:
cot_prompt_as_type = prompts["cot_prompt_as_type"]

In [None]:
experiment = "cot-prompting-as-class-type"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  cot_prompt_as_type,
                                  " ",
                                  as_class_type_dataset,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'cot-prompting-as-class-type-temp-0.0-db3cbd5f' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/5f49b9b0-6ebd-4bd8-93ee-dab2db740130/compare?selectedSessions=18f6320d-74c6-49e5-b067-7cf55f17aa8c




0it [00:00, ?it/s]

**Task 2 - AS Tier Inference**

In [None]:
PROMPT_FILE = Path("prompts/reasoning/as_tier_prompts.yaml")

In [None]:
with PROMPT_FILE.open(encoding="utf-8") as fp:
    prompts = yaml.safe_load(fp)

In [None]:
as_tiers_dataset_name = "AS Tiers Q&A dataset"

In [None]:
datasets = client.list_datasets()
as_tiers_dataset_name = None
for dataset in datasets:
  if dataset.name == as_tiers_dataset_name:
    as_tiers_dataset = dataset
    break

if as_tiers_dataset is None:
    # Handle the case where the dataset is not found, perhaps create it
    print(f"Dataset '{as_tiers_dataset_name}' not found.")
    # You might want to add logic here to create the dataset if it doesn't exist
    # Loading Q&A Dataset (If not loaded)
    as_tiers_dataset = client.create_dataset(
        dataset_name=as_tiers_dataset_name, description="AS Tiers Q&A dataset in LangSmith."
    )
    as_tiers_dataset_path = 'datasets/as_tier/as_tier_qas.json'
    with open(as_tiers_dataset_path, 'r') as f:
        as_tiers = json.load(f)

    examples = convert_to_examples(as_tiers)
    client.create_examples(dataset_id=as_tiers_dataset.id, examples=examples)
    print(f"Dataset '{as_tiers_dataset_name}' created.")

In [None]:
as_tier_context = prompts["as_tier_context"]

Direct Task Prompting

In [None]:
zs_prompt_as_tier = prompts["direct_task_prompt_as_tier"]

In [None]:
experiment = "zero-shot-prompting-as-tiers"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  zs_prompt_as_tier,
                                  " ",
                                  as_tiers_dataset,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'zero-shot-prompting-as-tiers-temp-0.0-c9d11621' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/a40df487-1d0e-449f-84e8-d5f78422da99/compare?selectedSessions=cac2280d-a438-4095-844c-9559aa1501d7




0it [00:00, ?it/s]

Role-Based Prompting

In [None]:
role_based_prompt_as_tier = prompts["role_based_prompt_as_tier"]

In [None]:
experiment = "role-based-prompting-as-tiers"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  role_based_prompt_as_tier,
                                  " ",
                                  as_tiers_dataset,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'role-based-prompting-as-tiers-temp-0.0-6097fed1' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/a40df487-1d0e-449f-84e8-d5f78422da99/compare?selectedSessions=8cf7146b-b7aa-49e0-ba35-853037f7889c




0it [00:00, ?it/s]

Multi-Step Reasoning

In [None]:
multi_step_reasoning_prompt_as_tier = prompts["multi_step_reasoning_prompt_as_tier"]

In [None]:
experiment = "role-based-prompting-as-tiers"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  multi_step_reasoning_prompt_as_tier,
                                  " ",
                                  as_tiers_dataset,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'role-based-prompting-as-tiers-temp-0.0-3e2223e6' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/a40df487-1d0e-449f-84e8-d5f78422da99/compare?selectedSessions=95f98e48-d672-4cf0-9e97-152e98c6608b




0it [00:00, ?it/s]

Few-Shot Learning

In [None]:
few_shots_prompt_as_tier = prompts["few_shots_prompt_as_tier"]

In [None]:
experiment = "role-based-prompting-as-tiers"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  few_shots_prompt_as_tier,
                                  " ",
                                  as_tiers_dataset,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'role-based-prompting-as-tiers-temp-0.0-833ecbac' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/a40df487-1d0e-449f-84e8-d5f78422da99/compare?selectedSessions=2cda5f86-60b0-48b8-9b7f-ab9b5bddd61a




0it [00:00, ?it/s]

Chain-of-Thought (CoT) Prompting

In [None]:
cot_prompt_as_tier = prompts["cot_prompt_as_tier"]

In [None]:
experiment = "role-based-prompting-as-tiers"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  cot_prompt_as_tier,
                                  " ",
                                  as_tiers_dataset,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'role-based-prompting-as-tiers-temp-0.0-c30a6808' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/a40df487-1d0e-449f-84e8-d5f78422da99/compare?selectedSessions=c47ec0ed-fcd1-43a9-897e-c7d1724e0eb1




0it [00:00, ?it/s]

**Task 3 - AS relationship inference**

In [None]:
PROMPT_FILE = Path("prompts/reasoning/as_rels_prompts.yaml")

In [None]:
with PROMPT_FILE.open(encoding="utf-8") as fp:
    prompts = yaml.safe_load(fp)

In [None]:
as_rels_dataset_name = "AS Relationships Q&A dataset"

In [None]:
datasets = client.list_datasets()
as_rels_dataset = None
for dataset in datasets:
  if dataset.name == as_rels_dataset_name:
    as_rels_dataset = dataset
    break

if as_rels_dataset is None:
    # Handle the case where the dataset is not found, perhaps create it
    print(f"Dataset '{as_rels_dataset_name}' not found.")
    # You might want to add logic here to create the dataset if it doesn't exist
    # Loading Q&A Dataset (If not loaded)
    as_rels_dataset = client.create_dataset(
        dataset_name=as_rels_dataset_name, description="AS Relationships Q&A dataset in LangSmith."
    )
    as_rels_dataset_path = 'datasets/as_rel/as_rels_qas.json'
    with open(as_rels_dataset_path, 'r') as f:
        as_rels = json.load(f)

    examples = convert_to_examples(as_rels)
    client.create_examples(dataset_id=as_rels_dataset.id, examples=examples)
    print(f"Dataset '{as_rels_dataset_name}' created.")

Dataset 'AS Relationships Q&A dataset' not found.
Dataset 'AS Relationships Q&A dataset' created.


Direct Task Prompting

In [None]:
zs_prompt_as_rel = prompts["direct_task_prompt_as_rel"]

In [None]:
experiment = "zero-shot-prompting-as-rels"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  zs_prompt_as_rel,
                                  " ",
                                  as_rels_dataset,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'zero-shot-prompting-as-rels-temp-0.0-f7760346' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/0249612e-13c2-47c9-899d-9552c66f5c9f/compare?selectedSessions=a615298d-b91f-4b9a-8773-4ad35a107e20




0it [00:00, ?it/s]

Role-Based Prompting

In [None]:
role_based_prompt_as_rels = prompts["role_based_prompt_as_rels"]

In [None]:
experiment = "role-based-prompting-as-rels"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  role_based_prompt_as_rels,
                                  " ",
                                  as_rels_dataset,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'role-based-prompting-as-rels-temp-0.0-5bf730ba' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/0249612e-13c2-47c9-899d-9552c66f5c9f/compare?selectedSessions=24236687-d141-4341-a0e5-6cf19a536bfe




0it [00:00, ?it/s]

Multi-Step Reasoning

In [None]:
multi_step_reasoning_prompt_as_rels = prompts["multi_step_reasoning_prompt_as_rels"]

In [None]:
experiment = "multi-step-prompting-as-rels"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  multi_step_reasoning_prompt_as_rels,
                                  " ",
                                  as_rels_dataset,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'multi-step-prompting-as-rels-temp-0.0-940990eb' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/0249612e-13c2-47c9-899d-9552c66f5c9f/compare?selectedSessions=c0f049de-e535-48d0-81c2-01e936102964




0it [00:00, ?it/s]

Few-Shot Learning

In [None]:
few_shots_prompt_as_rels = prompts["few_shots_prompt_as_rels"]

In [None]:
experiment = "few-shot-prompting-as-rels"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  few_shots_prompt_as_rels,
                                  " ",
                                  as_rels_dataset,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'few-shot-prompting-as-rels-temp-0.0-34940b12' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/0249612e-13c2-47c9-899d-9552c66f5c9f/compare?selectedSessions=35af6748-aa9b-48cd-a23f-ae8f05a99c00




0it [00:00, ?it/s]

Chain-of-Thought (CoT) Prompting

In [None]:
cot_prompt_as_rels = prompts["cot_prompt_as_rels"]

In [None]:
experiment = "cot-prompting-as-rels"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  cot_prompt_as_rels,
                                  " ",
                                  as_rels_dataset,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'cot-prompting-as-rels-temp-0.0-d727a2fb' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/0249612e-13c2-47c9-899d-9552c66f5c9f/compare?selectedSessions=66a82ae2-113c-45cd-948f-ed688f078005




0it [00:00, ?it/s]

**Task 4 - Valley-Free Validation**

In [None]:
PROMPT_FILE = Path("prompts/reasoning/vf_prompts.yaml")

In [None]:
with PROMPT_FILE.open(encoding="utf-8") as fp:
    prompts = yaml.safe_load(fp)

In [None]:
vf_dataset_name = "Valley free path validation Q&A dataset"

In [None]:
datasets = client.list_datasets()
vf_dataset = None
for dataset in datasets:
  if dataset.name == vf_dataset_name:
    vf_dataset = dataset
    break

if vf_dataset is None:
    # Handle the case where the dataset is not found, perhaps create it
    print(f"Dataset '{vf_dataset_name}' not found.")
    # You might want to add logic here to create the dataset if it doesn't exist
    # Loading Q&A Dataset (If not loaded)
    vf_dataset = client.create_dataset(
        dataset_name=vf_dataset_name, description="Valley free path validation Q&A dataset in LangSmith."
    )
    vf_dataset_path = 'datasets/valley_free/vf_qas.json'
    with open(vf_dataset_path, 'r') as f:
        vf = json.load(f)

    examples = convert_to_examples(vf)
    client.create_examples(dataset_id=vf_dataset.id, examples=examples)
    print(f"Dataset '{vf_dataset_name}' created.")

Dataset 'Valley free path validation Q&A dataset' not found.
Dataset 'Valley free path validation Q&A dataset' created.


Direct Task Prompting

In [None]:
zs_prompt_vf = prompts["direct_task_prompt_vf"]

In [None]:
experiment = "zs-prompting-vf"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  zs_prompt_vf,
                                  " ",
                                  vf_dataset,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'zs-prompting-vf-temp-0.0-25d0ee8e' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/441003b1-2608-40e1-a3a7-78b9cd3e2a34/compare?selectedSessions=1f74daf3-5ce1-40ce-8ea8-a391c3ec4296




0it [00:00, ?it/s]

Role-Based Prompting

In [None]:
role_based_prompt_vf = prompts["role_based_prompt_vf"]

In [None]:
experiment = "role-based-prompting-vf"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  role_based_prompt_vf,
                                  " ",
                                  vf_dataset,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'role-based-prompting-vf-temp-0.0-30f0ce59' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/441003b1-2608-40e1-a3a7-78b9cd3e2a34/compare?selectedSessions=0222aa60-1717-459d-85e6-073ae568b9ff




0it [00:00, ?it/s]

Multi-Step Reasoning

In [None]:
multi_step_prompt_vf = prompts["multi_step_prompt_vf"]

In [None]:
experiment = "multi-step-prompting-vf"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  multi_step_prompt_vf,
                                  " ",
                                  vf_dataset,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'multi-step-prompting-vf-temp-0.0-6d6222f1' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/441003b1-2608-40e1-a3a7-78b9cd3e2a34/compare?selectedSessions=df01d33d-8dc9-4ab2-8c6d-932113b23402




0it [00:00, ?it/s]

Few-Shot Learning

In [None]:
few_shots_prompt_vf = prompts["few_shots_prompt_vf"]

In [None]:
experiment = "few-shots-prompting-vf"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  few_shots_prompt_vf,
                                  " ",
                                  vf_dataset,
                                  experiment,
                                  [])

View the evaluation results for experiment: 'few-shots-prompting-vf-temp-0.0-8b57bc44' at:
https://smith.langchain.com/o/d3cb8df4-aea8-404a-b7f7-3c7ee52670a1/datasets/441003b1-2608-40e1-a3a7-78b9cd3e2a34/compare?selectedSessions=a8a90fb9-6ca9-4587-9e38-bc559d9e3448




0it [00:00, ?it/s]

Chain-of-Thought (CoT) Prompting

In [None]:
cot_prompt_vf = prompts["cot_prompt_vf"]

In [None]:
experiment = "cot-prompting-vf"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  cot_prompt_vf,
                                  " ",
                                  vf_dataset,
                                  experiment,
                                  [])

**Task 5 - Top Providers / Customers / Peers for a given ASN**

In [None]:
PROMPT_FILE = Path("prompts/prompt_engineering/reasoning/combined_prompts.yaml")

In [None]:
with PROMPT_FILE.open(encoding="utf-8") as fp:
    prompts = yaml.safe_load(fp)

In [None]:
top_x_rel_dataset_name = "Top x relationship Q&A dataset"

In [None]:
# Loading Q&A Dataset (If not loaded)
top_x_rel_dataset = client.create_dataset(
    dataset_name=top_x_rel_dataset_name, description="Top x relationship Q&A dataset in LangSmith."
)

In [None]:
top_x_rel_dataset_path = 'evaluations/top_x_rels/top_x_rels_qas.json'
with open(top_x_rel_dataset_path, 'r') as f:
    vf = json.load(f)

examples = convert_to_examples(vf[:5])
client.create_examples(dataset_id=top_x_rel_dataset.id, examples=examples)

In [None]:
x_rels_context = prompts["x_rels_context"]
X_RELS_HALLUCINATION_PROMPT = prompts["x_rels_hall_prompt"]

Direct Task Prompting

In [None]:
direct_task_prompt_x_rels = prompts["direct_task_prompt_x_rels"]

In [None]:
experiment = "zs-prompting-top-x-rels"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  direct_task_prompt_x_rels,
                                  X_RELS_HALLUCINATION_PROMPT,
                                  top_x_rel_dataset,
                                  experiment)

Role-Based Prompting

In [None]:
role_based_x_rels = prompts["role_based_x_rels"]

In [None]:
experiment = "role-based-prompting-top-x-rels"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  role_based_x_rels,
                                  X_RELS_HALLUCINATION_PROMPT,
                                  top_x_rel_dataset,
                                  experiment)

Multi-Step Reasoning

In [None]:
multi_step_prompt_x_rels = prompts["multi_step_prompt_x_rels"]

In [None]:
experiment = "multi-step-prompting-top-x-rels"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  multi_step_prompt_x_rels,
                                  X_RELS_HALLUCINATION_PROMPT,
                                  top_x_rel_dataset,
                                  experiment)

Few-Shot Learning

In [None]:
few_shots_prompt_x_rels = prompts["few_shots_prompt_x_rels"]

In [None]:
experiment = "few-shots-prompting-top-x-rels"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  few_shots_prompt_x_rels,
                                  X_RELS_HALLUCINATION_PROMPT,
                                  top_x_rel_dataset,
                                  experiment)

Chain-of-Thought (CoT) Prompting

In [None]:
cot_prompt_x_rels = prompts["cot_prompt_x_rels"]

In [None]:
experiment = "cot-prompting-top-x-rels"
eval_prompt_engineering_on_openai(client,
                                  model,
                                  cot_prompt_x_rels,
                                  X_RELS_HALLUCINATION_PROMPT,
                                  top_x_rel_dataset,
                                  experiment)