In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import UnstructuredFileLoader, DirectoryLoader
from langchain_community.embeddings import SentenceTransformerEmbeddings
import os
from langchain_community.vectorstores import Qdrant
from datasets import load_dataset
from langchain.tools.retriever import create_retriever_tool

In [None]:
os.environ["OPENAI_API_KEY"] = "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"

In [None]:
loader = DirectoryLoader('data/', glob="**/*.pdf", show_progress=True, loader_cls=UnstructuredFileLoader)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=20)
docs = text_splitter.split_documents(documents)
embeddings = SentenceTransformerEmbeddings(model_name='sentence-transformers/paraphrase-multilingual-mpnet-base-v2')

In [None]:
db = Qdrant.from_documents(
    docs,
    embeddings,
    location=":memory:",  # Local mode with in-memory storage only
    collection_name="my_documents",
)
retriever = db.as_retriever()

In [None]:
tool = create_retriever_tool(
    retriever,
    "search_state_of_union",
    "Searches and returns excerpts from the 2022 State of the Union.",
)
tools = [tool]

In [None]:
from langchain import hub

prompt = hub.pull("hwchase17/openai-tools-agent")
prompt.messages

In [None]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(temperature=0)

In [None]:
from langchain.agents import AgentExecutor, create_openai_tools_agent

agent = create_openai_tools_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools)

In [None]:
dataset = load_dataset('csv', data_files=r'/Users/adrianfolge/Documents/lokal:skole/Master/data/synthetic_data/question_with_answers.csv', split="train[:50]")

In [None]:
list_of_answers = []
for i in range(50):
    question = dataset["Question"][i]
    result = agent_executor.invoke(
        {
            "input": f"{question}"
        }
    )
    list_of_answers.append(result["output"])



In [None]:
# Convert the list to a pandas DataFrame
#df = pd.DataFrame(list_of_answers, columns=['Text'])

# Specify the file path
#file_path = "/Users/adrianfolge/Documents/lokal:skole/Master/data/Results/Chroma_with_agents.csv"

# Write the DataFrame to a CSV file
#df.to_csv(file_path, index=False)

In [None]:
from scipy.spatial import distance
from sentence_transformers import SentenceTransformer, util
import tensorflow_hub as hub
def embed(input, model):
    return model(input)

def SAS(preds, refs, model):
    similarities = []
    embeddings_preds = model.encode(preds)
    embeddings_refs = model.encode(refs)
    for i in range(len(embeddings_preds)):
        similarity = util.pytorch_cos_sim(embeddings_preds[i], embeddings_refs[i])
        similarities.append(similarity[0][0].item())
    average_similarity_score = sum(similarities) / len(similarities)
    return average_similarity_score

def evaluate_predictions(references, predictions):
    ## SAS encoder score
    module_url = "https://www.kaggle.com/models/google/universal-sentence-encoder/frameworks/TensorFlow2/variations/universal-sentence-encoder/versions/2"
    encoder_model = hub.load(module_url)
    
    list_of_similarity_scores = []
    for i in range(len(predictions)):
        similarity_score = 1-distance.cosine(embed([predictions[i]], encoder_model)[0, :],embed([references[i]], encoder_model)[0, :])
        list_of_similarity_scores.append(similarity_score)
    average_score = sum(list_of_similarity_scores) / len(list_of_similarity_scores)

    ## SAS transformer score
    transformer_model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-mpnet-base-v2')


    data = {
        "Metric": "Average SAS transformer Score",
        "Score":  SAS(predictions, references, transformer_model)
    }
    return data

In [None]:
references = dataset["Answer"]
preds = list_of_answers
evaluate_predictions(references,preds)

In [None]:
jeg_vet_ikke = [references[0]]
i_dont_know = ["Jeg vet ikke"]
evaluate_predictions(jeg_vet_ikke, i_dont_know)

In [None]:
from langchain.agents import create_openai_functions_agent
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser

content_list = []

# Choose the LLM that will drive the agent
llm = ChatOpenAI(model="gpt-3.5-turbo-1106")

# Define the prompt template
prompt_template = PromptTemplate(
    template="""Task: Answer Evaluation
You are given a reference answer and a predicted answer. Your task is to determine whether the predicted answer matches the reference answer correctly. It does not have to be an exact match, but it should be somewhat the same.
- The reference answer is the correct answer.
- The predicted answer is the answer generated by a model or provided by a user.
Your response should indicate whether the predicted answer is correct or not.
Reference answer: {reference}
Predicted answer: {prediction}
Is the predicted answer correct? [Yes/No]
agent_scratchpad: This is the scratchpad where you can store intermediate information.""",
    input_variables=["prediction", "reference"]
)
chain = prompt_template | llm

for num in range(50):
    score = chain.invoke(
        {
            "reference": references[num],
            "prediction": list_of_answers[num],
        }
    )
    content_list.append(score.content)


In [None]:
content_list

In [None]:
count_yes = content_list.count('Yes')
count_no = content_list.count('No')

# Displaying the counts
print("Number of 'Yes':", count_yes)
print("Number of 'No':", count_no)