In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import UnstructuredFileLoader, DirectoryLoader
from langchain_community.embeddings import SentenceTransformerEmbeddings
import os
from langchain_community.vectorstores import FAISS
from datasets import load_dataset
import pandas as pd

In [None]:
os.environ["OPENAI_API_KEY"] = "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"

In [None]:
loader = DirectoryLoader('data/', glob="**/*.pdf", show_progress=True, loader_cls=UnstructuredFileLoader)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
docs = text_splitter.split_documents(documents)
embeddings = SentenceTransformerEmbeddings(model_name='sentence-transformers/paraphrase-multilingual-mpnet-base-v2')

In [None]:
db = FAISS.from_documents(docs, embeddings)
retriever = db.as_retriever()

In [None]:
from langchain.tools.retriever import create_retriever_tool

tool = create_retriever_tool(
    retriever,
    "search_state_of_union",
    "Searches and returns excerpts from the 2022 State of the Union.",
)
tools = [tool]

In [None]:
from langchain import hub

prompt = hub.pull("hwchase17/openai-tools-agent")
prompt.messages

In [None]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(temperature=0)

In [None]:
from langchain.agents import AgentExecutor, create_openai_tools_agent

agent = create_openai_tools_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools)

In [None]:
result = agent_executor.invoke({"input": "Hei, jeg heter Adrian"})

In [None]:
result["output"]

In [None]:
result = agent_executor.invoke(
    {
        "input": "Hva er hovedintensjonene i planen som er beskrevet i dokumentet?"
    }
)

In [None]:
result["output"]

In [None]:
dataset = load_dataset('csv', data_files=r'/Users/adrianfolge/Documents/lokal:skole/Master/data/synthetic_data/question_with_answers.csv', split="train[:10]")

In [None]:
list_of_answers = []
for i in range(10):
    question = dataset["Question"][i]
    result = agent_executor.invoke(
        {
            "input": f"{question}"
        }
    )
    list_of_answers.append(result["output"])



In [None]:
list_of_answers

In [None]:
# Convert the list to a pandas DataFrame
df = pd.DataFrame(list_of_answers, columns=['Text'])

# Specify the file path
file_path = "/Users/adrianfolge/Documents/lokal:skole/Master/data/Results/FAISS_with_agents.csv"

# Write the DataFrame to a CSV file
df.to_csv(file_path, index=False)