**To Do**
- Add parts to Testing PDF Loaders<br>
 <small>(PDF Loaders + llm model version 을 복합적으로 비교할 수 있도록 테스트 만들기. 현재는 llm model 성능만 비교하도록 구성하였음)</small><br>

- Make human-made questions for testing<br>
 <small>(모델 성능 비교할 수 있도록 임베딩한 document의 내용을 기반으로 한 questions - ground truth 쌍 제작하기)</small>

- Make visualization plot for comparing performances of models<br>
 <small>(모델 성능 비교 결과 시각화 만들기.. PPT 발표용...)</small>

In [22]:
import os
from dotenv import load_dotenv
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate

In [23]:
## make embeddings using pdf loaders below :

# the vesion of PDF Loaders
# (1) PyPDFLoader
# (2) PyMyPDF4llm
# (3) LlamaParser

In [None]:
class ChatBot_test():
    def __init__(self, chat_llm):
        load_dotenv()

        self.llm_version = chat_llm
        # the version of OpenAI models
        # (1) gpt-3.5-turbo
        # (2) gpt-3.5-turbo-0613
        # (3) gpt-3.5-turbo-16k-0613
        # (4) gpt-3.5-turbo-instruct-0914
        # (5) gpt-4
        # (6) gpt-4o-mini

        OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

        if OPENAI_API_KEY:
            print("API key loaded successfully.")
        else:
            raise ValueError("Error loading API key. Check that OPENAI_API_KEY is set inside .env")

        CORPUS_PATH = os.path.join(os.getcwd(), "corpus")  
        CHROMA_PATH = os.path.join(os.getcwd(), "data") 

        self.chat_history = [] 
        self.llm = ChatOpenAI(model=self.llm_version, temperature=0, openai_api_key=OPENAI_API_KEY)
        # => if time left, I'll do experiments on which temperature is ideal between 0 and 1

        self.vectorstore = Chroma(persist_directory=CHROMA_PATH, embedding_function=OpenAIEmbeddings())
        self.retriever = self.vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})

        self.prompt = PromptTemplate(
            input_variables=["history", "context", "question"],
            template="""
            You are a knowledgeable assistant. Use the following pieces of retrieved context to answer the question.
            If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.

            Conversation history:
            {history}

            Context:
            {context}

            Question:
            {question}

            Answer:
        """)

        print("ChatBot initialized successfully!")    

    def format_docs(self, docs):
            """Format the retrieved documents into a single context string for the prompt."""
            return "\n\n".join(doc.page_content for doc in docs)
        
    def format_history(self):
        """Format the chat history into a string for inclusion in the prompt."""
        return "\n".join(
            f"Q: {item['question']}\nA: {item['answer']}" for item in self.chat_history[-3:]
        )

    def answer(self, question):
        """Generate an answer using the RAG chain."""
        rag_chain = (
            {
                "history": RunnableLambda(lambda _: self.format_history()), 
                "context": self.retriever | self.format_docs,
                "question": RunnablePassthrough()
            }
            | self.prompt
            | self.llm
            | RunnableLambda(lambda x: x.content) 
        )

        response = rag_chain.invoke(question)
        self.chat_history.append({"question": question, "answer": response})

        return response

#### Performance Testing Example

In [28]:
bot = ChatBot_test("gpt-3.5-turbo")

API key loaded successfully.
ChatBot initialized successfully!


In [33]:
from datasets import Dataset
from ragas import EvaluationDataset

questions = ["What is the topic of this paper?", 
             "What is the main keywords of this paper?",
             "What is the main sentence of this paper?",
            ]
ground_truths = ["The president said that Justice Breyer has dedicated his life to serve the country and thanked him for his service.",
                "The president said that Pat Gelsinger is ready to increase Intel's investment to $100 billion.",
                "The president asked Congress to pass proven measures to reduce gun violence."]
answers = []
contexts = []

vectorstore = Chroma(persist_directory=CHROMA_PATH, embedding_function=OpenAIEmbeddings())
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})

# Inference
for query in questions:
  answers.append(bot.answer(query))
  contexts.append([docs.page_content for docs in retriever.get_relevant_documents(query)])

# To dict
data = {
    "user_input": questions,
    "reference": ground_truths,
    "response": answers,
    "retrieved_contexts": contexts
}

# Convert dict to dataset
dataset = Dataset.from_dict(data)
eval_dataset = EvaluationDataset.from_hf_dataset(dataset)

In [34]:
eval_dataset 

EvaluationDataset(features=['user_input', 'retrieved_contexts', 'response', 'reference'], len=3)

In [35]:
from ragas import evaluate
from ragas.metrics import faithfulness, answer_relevancy
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI

llm_model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0, openai_api_key=OPENAI_API_KEY)

result = evaluate(
    eval_dataset, metrics = [faithfulness,answer_relevancy], llm = llm_model, embeddings=OpenAIEmbeddings(), raise_exceptions=False
)

result.to_pandas()

Evaluating: 100%|██████████| 6/6 [00:05<00:00,  1.19it/s]


Unnamed: 0,user_input,retrieved_contexts,response,reference,faithfulness,answer_relevancy
0,What is the topic of this paper?,[],"I'm sorry, but without any specific context or...",The president said that Justice Breyer has ded...,1.0,0.0
1,What is the main keywords of this paper?,[],"I'm sorry, but without any specific context or...",The president said that Pat Gelsinger is ready...,0.0,0.0
2,What is the main sentence of this paper?,[],"I'm sorry, but without any specific context or...",The president asked Congress to pass proven me...,0.0,0.0


In [None]:
result2 = result.to_pandas()

In [None]:
result2["faithfulness"].mean(), result2["faithfulness"].var()

(0.5594733088364436, 0.13833670200011863)

In [None]:
result2["answer_relevancy"].mean(), result2["answer_relevancy"].var()

(0.964662193901968, 0.0013356076790130502)