# 1 - Set Up

## 1.1 - Map to Root

In [None]:
REL_PATH_TO_ROOT = "../"

In [None]:
import sys
import os

In [None]:
sys.path.insert(0,REL_PATH_TO_ROOT)

In [None]:
from src.utils import get_root_dir, test_root_dir
from local_variables import ROOT_DIR

In [None]:
test_root_dir(REL_PATH_TO_ROOT)

## 1.2 - Set-Up Imports

In [None]:
from prompt_manager.manager import PromptManager
from prompt_manager.fetcher import fetch_prompt
from src.api import generate_outputs_openai

In [None]:
import pandas as pd

## 1.3 - Fictious Hallucination Dataset

In [None]:
# List of contexts for the chatbot
contexts = [
    "Provide information on the largest ocean on Earth.",
    "Explain the causes of the American Civil War.",
    "Give details about a historic battle that involved dragons.",
    "Summarize the work done by Nikola Tesla on wireless power transmission.",
    "Who won the FIFA World Cup in 2030?",
]

# List of chatbot responses to the contexts
responses = [
    "The Pacific Ocean is the largest ocean, covering more than 63 million square miles.",
    "The American Civil War was caused by multiple factors, including economic differences, states' rights, and the issue of slavery.",
    "The Battle of the Dragon Isles in the 14th century was a historic battle involving dragons and knights in medieval Europe.",
    "Nikola Tesla worked extensively on wireless power transmission, including his famous Wardenclyffe Tower project.",
    "The FIFA World Cup in 2030 was won by Argentina, defeating France in the finals with a score of 3-2."
]

# Ground truth list indicating if each response contains a hallucination
# True = contains a hallucination, False = accurate
is_hallucination_ground_truths = [
    False,  # Correct information about the Pacific Ocean
    False,  # Correct summary of American Civil War causes
    True,   # Hallucination: no historic battle with dragons is recorded
    False,  # Accurate summary of Tesla's work on wireless power
    True    # Hallucination: World Cup 2030 has not occurred yet
]

In [None]:
hal_df = pd.DataFrame({"context" : contexts, "chatbot_response" : responses, "hallucination_ground_truth" : is_hallucination_ground_truths})

In [None]:
hal_df.head()

# 2 - Run Your Evaluation 

## 2.1 - Get Your Prompts

In [None]:
SEQUENCE = ["task_1","hallucination_detector"]

prompt_template = fetch_prompt(SEQUENCE,use_latest_version=True)

print(prompt_template)

In [None]:
evaluator_responses = []

# Loop through dataset
for _, row in hal_df.iterrows():

    # Get inputs and place into dictionary format
    context = row["context"]

    response = row["chatbot_response"]

    row_inputs = {"CONTEXT" : context, "RESPONSE" : response}

    # Initialise prompt to validate and format inputs
    prompt = PromptManager(template=prompt_template,inputs=row_inputs)
    prompt.validate_inputs()
    prompt.format_inputs()

    # Send prompt and collect response
    response = generate_outputs_openai(prompt.prompt)
    evaluator_responses.append(response)

hal_df["evaluator_response"] = evaluator_responses
display(hal_df.head(5))