In [None]:
## Load packages
from langchain.agents import AgentExecutor, create_react_agent
from datasets import load_dataset
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.document_loaders import UnstructuredFileLoader, DirectoryLoader
from langchain_community.vectorstores import FAISS
import re
from langchain.tools.retriever import create_retriever_tool
from langchain.agents import AgentExecutor, create_openai_tools_agent
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
import os
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.document_loaders import UnstructuredFileLoader, DirectoryLoader
from langchain_community.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain import hub

In [None]:
instances = 400

In [None]:
## Initalize OS key
os.environ["OPENAI_API_KEY"] = "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"

In [None]:
## Load training dataset
references = load_dataset('csv', data_files=r'/Users/adrianfolge/Documents/lokal:skole/Master/data/synthetic_data/vol2_questions_and_answers_ytterligere_revidert.csv', split=f"train[:{instances}]")

In [None]:
## Split and chunk the data
loader = DirectoryLoader('../data/', glob="**/*.pdf", show_progress=True, loader_cls=UnstructuredFileLoader)
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
databases = {}
for doc in documents:
    source = doc.metadata['source']
    match = re.search(r'\/([A-Za-z_]+)\.pdf', source)
    if match:
        municipality_name = match.group(1)
    docs = text_splitter.split_documents([doc])
    embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
    db = FAISS.from_documents(docs, embeddings)
    databases[municipality_name] = db

In [None]:
## Use RAG with React few shots at the same time(?)
# Get the prompt to use - you can modify this!
llm = ChatOpenAI(model="gpt-3.5-turbo-1106")
prompt = PromptTemplate(template="""Answer the following questions as best you can. You have access to the following tools:

{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question. Answer in Norwegian

Few-shot examples:
Question: Hva er datoen for vedtaket av Kommunedelplan for sentrum av bystyret?
Final Answer: Datoen for vedtaket av Kommunedelplan for sentrum av bystyret er 26.8.2021.

Question: Hvor kan man finne felles bestemmelser i dokumentet om Kristiansund?
Final Answer: Felles bestemmelser kan finnes på side 3 i dokumentet om Kristiansund.

Question: Hva er hovedtemaet for avsnitt 3.6 i dokumentet?
Final Answer: Hovedtemaet for avsnitt 3.6 i dokumentet er offentlige områder.

Question: Hva er emnet for kapittel 4 i dokumentet fra Kristiansund?
Final Answer: Emnet for kapittel 4 i dokumentet fra Kristiansund er bebyggelse og anlegg.
                        
Begin!

Question: {input}
Thought: {agent_scratchpad} 
""", input_variables=["tool_names", "tools", "input", "agent_scratchpad"])

In [None]:
list_of_answers_react = []
for num in range(instances):
    answer_and_similar_docs = {}
    query = references["spørsmål"][num]
    kommunenavn = references["kommunenavn"][num]
    db = databases[kommunenavn]
    found_docs = db.similarity_search(query)
    all_page_contents = []
    # Iterate over each document in found_docs
    for doc in found_docs:
        # Extract the content of the document
        content = doc.page_content
        # Append the content to the list
        all_page_contents.append(content)

    # Join all the extracted contents together into one big chunk of text
    big_chunk_of_text = '\n'.join(all_page_contents)
    retriever = db.as_retriever()
    tool = create_retriever_tool(
        retriever,
        "search_planning_regulations",
        "Searches and returns excerpts planning regulation documents from different municipalities",
    )
    tools = [tool]
    agent = create_react_agent(llm, tools, prompt)
    agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True, handle_parsing_errors=True)
    answer = agent_executor.invoke({"input": {query}})
    print("########### HER ER ANSWER")
    print(answer)
    answer_and_similar_docs["svar"] = answer["output"]
    answer_and_similar_docs["kontekst"] = big_chunk_of_text
    list_of_answers_react.append(answer_and_similar_docs)

In [None]:
## Use the RAG with more context
list_of_answers_RAG_more_context = []

# Choose the LLM that will drive the agent
llm = ChatOpenAI(model="gpt-3.5-turbo-1106")

# Define the prompt template
prompt_template = PromptTemplate(
    template="""Task: Provide an answer
You are going to provide an answer to this question: {question}, based off this context: {context}.
Keep the answer to no longer than a sentence. Give the answer in Norwegian.
agent_scratchpad: This is the scratchpad where you can store intermediate information.""",
    input_variables=["question", "context"]
)
chain = prompt_template | llm

for num in range(instances):
    answer_and_similar_docs = {}
    query = references["spørsmål"][num]
    kommunenavn = references["kommunenavn"][num]
    db = databases[kommunenavn]
    found_docs = db.similarity_search(query)
    context="Context"
    for doc in found_docs:
        context+=doc.page_content
    print(context)
    answer = chain.invoke(
        {
            "question": query,
            "context": context,
        }
    )
    answer_and_similar_docs["svar"] = answer.content
    answer_and_similar_docs["kontekst"] = context
    list_of_answers_RAG_more_context.append(answer_and_similar_docs)

In [None]:
## RAG with simple agent
prompt = hub.pull("hwchase17/openai-tools-agent")
prompt.messages

llm = ChatOpenAI(temperature=0)

list_of_answers_with_simple_agent = []
for i in range(instances):
    question = references["spørsmål"][i]
    kommunenavn = references["kommunenavn"][i]
    db = databases[kommunenavn]
    retriever = db.as_retriever()

    tool = create_retriever_tool(
        retriever,
        "search_state_of_union",
        "Searches and returns excerpts from the 2022 State of the Union.",
    )
    tools = [tool]

    agent = create_openai_tools_agent(llm, tools, prompt)
    agent_executor = AgentExecutor(agent=agent, tools=tools)
    
    result = agent_executor.invoke(
        {
            "input": f"{question}"
        }
    )
    list_of_answers_with_simple_agent.append(result["output"])

In [None]:
## Evaluate the answers and make it resonate.
## Use the RAG again for the questions that are wrong
## Evaulate again

In [None]:
content_list = []
preds_react =list_of_answers_react
preds_more_context = list_of_answers_RAG_more_context
preds_simple_agent = list_of_answers_with_simple_agent
refs = references["svar"]
# Choose the LLM that will drive the agent
llm = ChatOpenAI(model="gpt-3.5-turbo-1106")

# Define the prompt template
prompt_template = PromptTemplate(
    template="""Task: Answer Evaluation
You are given a question, three prediction answers, and two chunks of context. Your task is to determine if any of the answers correctly answer the question and which predicted answer better answers the question based on the provided context. If none of the answers answer the question correctly, return None. Only answer with Prediction 1 or Prediction 2 or Prediction 3 or None.
- Question: {question}
- Prediction 1: {prediction_1}
- Prediction 2: {prediction_2}
- Prediction 3: {prediction_3}
- Context 1: {context_1}
- Context 2: {context_2}
Which predicted answer better answers the question based on the provided context? [Prediction 1/Prediction 2/Prediction 3/None]
agent_scratchpad: This is the scratchpad where you can store intermediate information.""",
    input_variables=["question", "prediction_1", "prediction_2", "prediction_3", "context_1", "context_2"]
)
chain = prompt_template | llm

for num in range(instances):
    score = chain.invoke(
        {
            "question": refs[num],  # Assuming refs contains the questions
            "prediction_1": preds_react[num]["svar"],
            "prediction_2": preds_more_context[num]["svar"],
            "prediction_3": preds_simple_agent[num],
            "context_1": preds_react[num]["kontekst"],  # Assuming context_chunk_1 contains the first context
            "context_2": preds_more_context[num]["kontekst"],  # Assuming context_chunk_2 contains the second context
        }
    )
    content_list.append(score.content)

In [None]:
## Check through if any of the answers from the previous model is NONE. This means that none of the predicted answers were deemed as correct. We therefore try to extract the correct answer again.
indexer_for_none = []
for num in range(instances):
    if ("None" in content_list[num]):
        indexer_for_none.append(num)

In [None]:
## Get answers for the indexes the evaluation model deemed as not answering the question
revised_answers = {}
preds_react =list_of_answers_react
preds_more_context = list_of_answers_RAG_more_context
preds_simple_agent = list_of_answers_with_simple_agent
refs = references["svar"]
# Choose the LLM that will drive the agent
llm = ChatOpenAI(model="gpt-3.5-turbo-1106")

# Define the prompt template
prompt_template = PromptTemplate(
    template="""Task: Answer Evaluation
You are given a question and two chunks of context. Your task is to best answer the questions based on the information in the chunks of context. Only use the context to answer the question and give the answer in Norwegian.
- Question: {question}
- Context 1: {context_1}
- Context 2: {context_2}
agent_scratchpad: This is the scratchpad where you can store intermediate information.""",
    input_variables=["question", "context_1", "context_2"]
)
chain = prompt_template | llm

for num in indexer_for_none:
    score = chain.invoke(
        {
            "question": refs[num],  # Assuming refs contains the questions
            "context_1": preds_react[num]["kontekst"],  # Assuming context_chunk_1 contains the first context
            "context_2": preds_more_context[num]["kontekst"],  # Assuming context_chunk_2 contains the second context
        }
    )
    revised_answers[num] = score.content

In [None]:
endelig_liste_med_svar = []
for num in range(instances):
    if ("Prediction 1" in content_list[num]):
        endelig_liste_med_svar.append(list_of_answers_react[num]["svar"])
    elif ("Prediction 2" in content_list[num]):
        endelig_liste_med_svar.append(preds_more_context[num]["svar"])
    elif ("Prediction 3" in content_list[num]):
        endelig_liste_med_svar.append(preds_simple_agent[num])
    else:
        endelig_liste_med_svar.append(revised_answers[num])
    


In [None]:
ja_nei_liste = []
# Choose the LLM that will drive the agent
llm = ChatOpenAI(model="gpt-3.5-turbo-1106")

# Define the prompt template
prompt_template = PromptTemplate(
    template="""Task: Answer Evaluation
You are given a reference answer and a predicted answer. Your task is to determine whether the predicted answer matches the reference answer correctly. It does not have to be an exact match, but it should be somewhat the same.
- The reference answer is the correct answer.
- The predicted answer is the answer generated by a model or provided by a user.
Your response should indicate whether the predicted answer is correct or not.
Reference answer: {reference}
Predicted answer: {prediction}
Is the predicted answer correct? [Yes/No]
agent_scratchpad: This is the scratchpad where you can store intermediate information.""",
    input_variables=["prediction", "reference"]
)
chain = prompt_template | llm

for num in range(instances):
    score = chain.invoke(
        {
            "reference": refs[num],
            "prediction": endelig_liste_med_svar[num],
        }
    )
    ja_nei_liste.append(score.content)

In [None]:
count_of_yes = 0

# Iterate over content_list
for content in ja_nei_liste:
    # Check if "Yes" is present in the content
    if "Yes" in content:
        # Increment count if "Yes" is found
        count_of_yes += 1

print("Count of 'Yes':", count_of_yes)

count_of_no = 0

# Iterate over content_list
for content in ja_nei_liste:
    # Check if "Yes" is present in the content
    if "No" in content:
        # Increment count if "Yes" is found
        count_of_no += 1

print("Count of 'no':", count_of_no)