In [None]:
import warnings
warnings.filterwarnings('ignore')

import dotenv
dotenv.load_dotenv(dotenv.find_dotenv(), override=True)

import pandas as pd
import numpy as np

from langchain.docstore.document import Document
from langchain.schema.language_model import BaseLanguageModel
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings

import os
import sys
from pathlib import Path

sys.path.append("../backend/")
sys.path.append("../")
sys.path.append(Path.cwd())

# How do generated QA pairs look like

In [None]:
import json
with open("../tmp/eval_data.json", "r", encoding="utf-8") as file:
    # Load existing data into a list
    gt_dataset = json.load(file)

# each QA pair contains generated question, answer, context in document with corresponding source and attached id
gt_dataset[0]

# Load and split data into chunks

In [None]:
from backend.utils import load_and_chunk_doc, get_qa_llm, get_retriever
from backend.commons.configurations import Hyperparameters, CVRetrieverSearchType
import glob
import pandas as pd

hp_dict = {
        "id": 0,
        "chunk_size": 512,
        "chunk_overlap": 10,
        "num_retrieved_docs": 3,
        "use_llm_grader": False,
        "search_type": "mmr",
        "length_function_name": "text-embedding-ada-002",
        "grade_answer_prompt": "few_shot",
        "grade_docs_prompt": "default",
        "embedding_model": "text-embedding-ada-002",
        "qa_llm": "gpt-3.5-turbo",
        "grader_llm": "gpt-3.5-turbo"
    }

hp = Hyperparameters.from_dict(hp_dict)

llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo")
emb = OpenAIEmbeddings()

chunks = load_and_chunk_doc(hp, glob.glob("../tmp/document_store/*.pdf")[1])
retriever = get_retriever(chunks,emb,3, search_type=CVRetrieverSearchType.MMR)
qa_llm = get_qa_llm(retriever, llm)

print(f"number of tokens in document: {sum([llm.get_num_tokens(chunk.page_content) for chunk in chunks])}\
\nnumber of chunks: {len(chunks)}\
\naverage number of tokens per chunk: {np.average([llm.get_num_tokens(chunk.page_content) for chunk in chunks])}")

# Generate Q/A pairs

In [None]:
from backend.commons.configurations import BaseConfigurations, QAConfigurations, Hyperparameters
from langchain.chains import QAGenerationChain
from backend.commons.prompts import QA_GENERATION_PROMPT_SELECTOR
from backend.testsetgen.test_set_generator import get_qa_from_chunk
import itertools

qa_params = {
        "chunk_size": 2048,
        "chunk_overlap": 0,
        "qa_generator_llm": "gpt-3.5-turbo",
        "length_function_name": "text-embedding-ada-002",
        "generate_eval_set": True,
        "persist_to_vs": True,
        "embedding_model_list": ["text-embedding-ada-002","text-embedding-ada-002"]
    }

hp_qa = QAConfigurations.from_dict(qa_params)

In [None]:
eval_set = []
qa_chain = QAGenerationChain.from_llm(hp_qa.qa_generator_llm, prompt=QA_GENERATION_PROMPT_SELECTOR.get_prompt(hp_qa.qa_generator_llm))

qa_pairs = [await get_qa_from_chunk(chunks[i], qa_chain) for i in range(len(chunks))]
qa_pairs = list(itertools.chain.from_iterable(qa_pairs))

# Set up vectorstore and retriever

In [None]:
from eval_backend.utils import get_retriever

llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo")
chunks_vs = split_data(data=data, chunk_size=512)
retriever = get_retriever(chunks_vs, OpenAIEmbeddings(model="text-embedding-ada-002"), 3)

# LLM chain for query answering based on document chunks

In [None]:
from backend.utils import get_qa_llm
qa_llm = get_qa_llm(retriever, hp.qa_llm)

# also returns source document chunks by default
await qa_llm.acall("What is TRAIL.X?")

QA grading functions like embedding similarity can be found in backend.evaluation.evaluation_metrics.py