In [None]:
from datasets import load_dataset
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.document_loaders import UnstructuredFileLoader, DirectoryLoader
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_community.vectorstores import FAISS
import pandas as pd
import re
from langchain.tools.retriever import create_retriever_tool
from langchain.agents import AgentExecutor, create_openai_tools_agent
from langchain_openai import ChatOpenAI
from langchain import hub
import os
from langchain.prompts import PromptTemplate

In [None]:
references = load_dataset('csv', data_files=r'/Users/adrianfolge/Documents/lokal:skole/Master/data/synthetic_data/vol2_questions_and_answers.csv', split="train[:400]")

In [None]:
loader = DirectoryLoader('data/', glob="**/*.pdf", show_progress=True, loader_cls=UnstructuredFileLoader)
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
databases = {}
for doc in documents:
    source = doc.metadata['source']
    match = re.search(r'\/([A-Za-z_]+)\.pdf', source)
    if match:
        municipality_name = match.group(1)
    docs = text_splitter.split_documents([doc])
    embeddings = SentenceTransformerEmbeddings(model_name='sentence-transformers/paraphrase-multilingual-mpnet-base-v2')
    db = FAISS.from_documents(docs, embeddings)
    databases[municipality_name] = db

In [None]:
os.environ["OPENAI_API_KEY"] = "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"

In [None]:
prompt = hub.pull("hwchase17/openai-tools-agent")
prompt.messages

llm = ChatOpenAI(temperature=0)

In [None]:
list_of_answers = []
for i in range(400):
    question = references["spørsmål"][i]
    kommunenavn = references["kommunenavn"][i]
    db = databases[kommunenavn]
    retriever = db.as_retriever()

    tool = create_retriever_tool(
        retriever,
        "search_state_of_union",
        "Searches and returns excerpts from the 2022 State of the Union.",
    )
    tools = [tool]

    agent = create_openai_tools_agent(llm, tools, prompt)
    agent_executor = AgentExecutor(agent=agent, tools=tools)
    
    result = agent_executor.invoke(
        {
            "input": f"{question}"
        }
    )
    list_of_answers.append(result["output"])

In [None]:
preds = list_of_answers
refs = references["svar"]
content_list = []

# Choose the LLM that will drive the agent
llm = ChatOpenAI(model="gpt-3.5-turbo-1106")

# Define the prompt template
prompt_template = PromptTemplate(
    template="""Task: Answer Evaluation
You are given a reference answer and a predicted answer. Your task is to determine whether the predicted answer matches the reference answer correctly. It does not have to be an exact match, but it should be somewhat the same.
- The reference answer is the correct answer.
- The predicted answer is the answer generated by a model or provided by a user.
Your response should indicate whether the predicted answer is correct or not.
Reference answer: {reference}
Predicted answer: {prediction}
Is the predicted answer correct? [Yes/No]
agent_scratchpad: This is the scratchpad where you can store intermediate information.""",
    input_variables=["prediction", "reference"]
)
chain = prompt_template | llm

for num in range(400):
    score = chain.invoke(
        {
            "reference": refs[num],
            "prediction": list_of_answers[num],
        }
    )
    content_list.append(score.content)

In [None]:
content_list

In [None]:
count_yes = content_list.count('Yes')
count_no = content_list.count('No')

# Displaying the counts
print("Number of 'Yes':", count_yes)
print("Number of 'No':", count_no)