In [1]:
from datasets import load_dataset
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.document_loaders import UnstructuredFileLoader, DirectoryLoader
from langchain_community.vectorstores import FAISS
import pandas as pd
import re
from langchain.tools.retriever import create_retriever_tool
from langchain.agents import AgentExecutor, create_openai_tools_agent
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain import hub
import os
from langchain.prompts import PromptTemplate
from deep_translator import GoogleTranslator

2024-04-03 09:33:47.247929: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
references = load_dataset('csv', data_files=r'/Users/adrianfolge/Documents/lokal:skole/Master/data/synthetic_data/vol2_questions_and_answers.csv', split="train[:400]")

In [4]:
os.environ["OPENAI_API_KEY"] = "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"

In [5]:
loader = DirectoryLoader('data/', glob="**/*.pdf", show_progress=True, loader_cls=UnstructuredFileLoader)
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
databases = {}
for doc in documents:
    source = doc.metadata['source']
    match = re.search(r'\/([A-Za-z_]+)\.pdf', source)
    if match:
        municipality_name = match.group(1)
    docs = text_splitter.split_documents([doc])
    for document in docs:
        page_content = document.page_content
        translated_content = GoogleTranslator(source='no', target='en').translate(text=page_content)
        document.page_content = translated_content
    for index, doc in enumerate(docs):
        if isinstance(doc.page_content, type(None)):
            docs[index].page_content = ""
    embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
    db = FAISS.from_documents(docs, embeddings)
    databases[municipality_name] = db

100%|██████████| 10/10 [00:06<00:00,  1.44it/s]
Created a chunk of size 1147, which is longer than the specified 500
Created a chunk of size 1570, which is longer than the specified 500
Created a chunk of size 639, which is longer than the specified 500
Created a chunk of size 610, which is longer than the specified 500
Created a chunk of size 1008, which is longer than the specified 500
Created a chunk of size 545, which is longer than the specified 500
Created a chunk of size 536, which is longer than the specified 500
Created a chunk of size 583, which is longer than the specified 500
Created a chunk of size 1162, which is longer than the specified 500
Created a chunk of size 607, which is longer than the specified 500
Created a chunk of size 1206, which is longer than the specified 500
Created a chunk of size 697, which is longer than the specified 500
Created a chunk of size 734, which is longer than the specified 500
Created a chunk of size 916, which is longer than the specified

In [6]:
prompt = hub.pull("hwchase17/openai-tools-agent")
prompt.messages

llm = ChatOpenAI(temperature=0)

In [7]:
list_of_answers = []
for i in range(400):
    question = references["spørsmål"][i]
    translated_query = GoogleTranslator(source='no', target='en').translate(text=question)
    kommunenavn = references["kommunenavn"][i]
    db = databases[kommunenavn]
    retriever = db.as_retriever()

    tool = create_retriever_tool(
        retriever,
        "search_state_of_union",
        "Searches and returns excerpts from the 2022 State of the Union.",
    )
    tools = [tool]

    agent = create_openai_tools_agent(llm, tools, prompt)
    agent_executor = AgentExecutor(agent=agent, tools=tools)
    
    result = agent_executor.invoke(
        {
            "input": f"{translated_query}"
        }
    )
    translated_answer = GoogleTranslator(source='en', target='no').translate(text=result["output"])
    list_of_answers.append(translated_answer)

In [8]:
preds = list_of_answers
refs = references["svar"]
content_list = []

# Choose the LLM that will drive the agent
llm = ChatOpenAI(model="gpt-3.5-turbo-1106")

# Define the prompt template
prompt_template = PromptTemplate(
    template="""Task: Answer Evaluation
You are given a reference answer and a predicted answer. Your task is to determine whether the predicted answer matches the reference answer correctly. It does not have to be an exact match, but it should be somewhat the same.
- The reference answer is the correct answer.
- The predicted answer is the answer generated by a model or provided by a user.
Your response should indicate whether the predicted answer is correct or not.
Reference answer: {reference}
Predicted answer: {prediction}
Is the predicted answer correct? [Yes/No]
agent_scratchpad: This is the scratchpad where you can store intermediate information.""",
    input_variables=["prediction", "reference"]
)
chain = prompt_template | llm

for num in range(400):
    score = chain.invoke(
        {
            "reference": refs[num],
            "prediction": list_of_answers[num],
        }
    )
    content_list.append(score.content)

In [9]:
count_yes = content_list.count('Yes')
count_no = content_list.count('No')

# Displaying the counts
print("Number of 'Yes':", count_yes)
print("Number of 'No':", count_no)

Number of 'Yes': 230
Number of 'No': 146
