### **Import libraries**

In [None]:
import sys
import os
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(project_root)

In [None]:
import time
import json
from uuid import uuid4
from datetime import datetime
from src.utils import dump_json
from src.workflow import get_graph_builder
from src.rag import (
  embed_pdf, 
  get_llm, 
  get_embedding_function, 
  get_rerank_function
)

from langchain.schema import HumanMessage

In [None]:
# Configuration passed to the Graph
def create_config() -> dict:
  llm = get_llm()
  embedding_function = get_embedding_function()
  rerank_function = get_rerank_function()
  config = {
    "configurable": {
      "thread_id": str(uuid4()),
      "llm": llm,
      "embedding_function": embedding_function,
      "rerank_function": rerank_function
    }
  }
  return config

config = create_config()

# Get current timestamp
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

# Define constants
pdf_file = "../data/halueval.pdf"
qa_folder_path = "../data/qa"
eval_folder_path = "../data/eval"
chunk_size = 1000
chunk_overlap = int(0.1 * chunk_size)

# Create experiment folder
os.makedirs(eval_folder_path, exist_ok=True)

### **Create evaluation dataset**

**Load the QA dataset**

In [None]:
with open(file=f"{qa_folder_path}/qa_dataset.json", mode="r", encoding="utf-8") as file:
  qa_pairs = json.load(file)

**Construct the evaluation dataset**

In [None]:
# Init model
llm = config["configurable"]["llm"]

# Init Graph
graph_builder = get_graph_builder()

# Embed document
embed_pdf(
  pdf_file=pdf_file,
  config=config,
  chunk_size=chunk_size,
  chunk_overlap=chunk_overlap
)

In [None]:
def get_response_and_retrieved_contexts(question: str) -> tuple:
  response = ""
  input_message = HumanMessage(content=question)
  for msg, metadata in graph_builder.stream({"messages": [input_message]}, config, stream_mode="messages"):
    if metadata["langgraph_node"] == "chatbot":
      response = msg.content
  
  output = (
    response,
    graph_builder.get_state(config).values["retrieved_docs"]
  )
  return output

In [None]:
# Generate responses for each QA pair
eval_dataset = []

for i, pair in enumerate(qa_pairs):
  # Avoid rate limit
  if i != 0 and i % 7 == 0:
    time.sleep(60)
    
  response, retrieved_contexts = get_response_and_retrieved_contexts(pair["question"])
  data = {
    "user_input": pair["question"],
    "retrieved_contexts": retrieved_contexts,
    "response": response,
    "reference": pair["answer"]
  }
  eval_dataset.append(data)
  dump_json(data=data, output_path=f"{eval_folder_path}/eval_dataset_{timestamp}.json")