In [None]:
## Pakker og OS key
from corrective_rag import corrective_rag_translated
from simple_agent_rag import simple_agent_rag, simple_agent_rag_translated
from naive_rag import naive_rag_translated, naive_rag
from react_rag import react_rag, react_rag_translated
from init_vectorstore import init_vectorstore, init_semantic_vectorstore
from evaluate_model import evaluate_model
from ensemble_model import ensemble_models
from semantic_model import semantic_model
from ragas_func import ragas_with_params
from helper_functions import create_predictions_dict, average_RAGAS_score
from langchain_openai import ChatOpenAI
from langchain_community.embeddings import SentenceTransformerEmbeddings
import os
from langchain_text_splitters import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from datasets import load_dataset
import matplotlib.pyplot as plt
os.environ["OPENAI_API_KEY"] = "OPENAI_API_KEY"

In [None]:
## Instances and file path
instances = 100
file_path = "/Users/adrianfolge/Documents/lokal:skole/Master/data/synthetic_data/version_3_dataset.csv"

In [None]:
## Text splitter, embeddings, llm
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
embeddings_nor = SentenceTransformerEmbeddings(model_name='sentence-transformers/paraphrase-multilingual-mpnet-base-v2')
embeddings_trans = OpenAIEmbeddings(model="text-embedding-3-small")
llm = ChatOpenAI(model="gpt-3.5-turbo-1106")

In [None]:
## Init databases
databases = init_vectorstore(embeddings_nor, text_splitter)
databases_translated = init_vectorstore(embeddings_trans, text_splitter, translate=True)
semantic_databases = init_semantic_vectorstore()

In [None]:
## Loading the reference question/answers
references = load_dataset('csv', data_files=file_path, split=f"train[:{instances}]")
refs = references["svar"]
questions = references["spørsmål"]

In [None]:
## Getting the answers and contexts from the models
corrective_rag_translated_list, corrective_rag_translated_context = corrective_rag_translated(instances, file_path, databases_translated, llm)
simple_agent_rag_list = simple_agent_rag(instances, file_path, databases, llm)
simple_agent_rag_translated_list = simple_agent_rag_translated(instances, file_path, databases_translated, llm)
naive_rag_list = naive_rag(instances, file_path, databases)
naive_rag_translated_list = naive_rag_translated(instances, file_path, databases_translated)
react_rag_translated_list,react_rag_translated_context  = react_rag_translated(instances, file_path, databases_translated, llm)
react_rag_list, react_rag_context = react_rag(instances, file_path, databases, llm)
semantic_rag_list, semantic_rag_context = semantic_model(instances, file_path, semantic_databases)

In [None]:
## Init ensemble model
predictions_dict = create_predictions_dict(corrective_rag_translated_list, simple_agent_rag_list, simple_agent_rag_translated_list, naive_rag_list, naive_rag_translated_list, react_rag_translated_list, react_rag_list, semantic_rag_list)
ensembling_models_list = ensemble_models(predictions_dict, references, instances, react_rag_translated_context, llm)

In [None]:
## Initializing RAGAS scores
simple_agent_rag_score_RAGAS = ragas_with_params(simple_agent_rag_list, questions, simple_agent_rag_list, refs)
corrective_rag_translated_score_RAGAS = ragas_with_params(corrective_rag_translated_list, questions, corrective_rag_translated_context, refs)
simple_agent_translated_rag_score_RAGAS = ragas_with_params(simple_agent_rag_translated_list, questions, simple_agent_rag_translated_list, refs)
naive_rag_score_RAGAS = ragas_with_params(naive_rag_list, questions, naive_rag_list, refs)
naive_rag_translated_score_RAGAS = ragas_with_params(naive_rag_translated_list, questions, naive_rag_translated_list, refs)
react_rag_translated_score_RAGAS = ragas_with_params(react_rag_translated_list, questions, react_rag_translated_context, refs)
react_rag_score_RAGAS = ragas_with_params(react_rag_list, questions, react_rag_context, refs)
semantic_rag_score_RAGAS = ragas_with_params(semantic_rag_list, questions, semantic_rag_context, refs)
ensemble_models_score_RAGAS = ragas_with_params(ensembling_models_list, questions, react_rag_translated_context, refs)

In [None]:
## Printing the RAGAS scores
print(simple_agent_rag_score_RAGAS)
avg_simple_agent_rag_score_RAGAS = average_RAGAS_score(simple_agent_rag_score_RAGAS)
print(corrective_rag_translated_score_RAGAS) 
avg_corrective_rag_translated_score_RAGAS = average_RAGAS_score(corrective_rag_translated_score_RAGAS)
print(simple_agent_translated_rag_score_RAGAS)
avg_simple_agent_translated_rag_score_RAGAS = average_RAGAS_score(simple_agent_translated_rag_score_RAGAS)
print(naive_rag_score_RAGAS) 
avg_naive_rag_score_RAGAS = average_RAGAS_score(naive_rag_score_RAGAS)
print(naive_rag_translated_score_RAGAS) 
avg_naive_rag_translated_score_RAGAS = average_RAGAS_score(naive_rag_translated_score_RAGAS)
print(react_rag_translated_score_RAGAS) 
avg_react_rag_translated_score_RAGAS = average_RAGAS_score(react_rag_translated_score_RAGAS)
print(react_rag_score_RAGAS)
avg_react_rag_score_RAGAS = average_RAGAS_score(react_rag_score_RAGAS)
print(semantic_rag_score_RAGAS) 
avg_semantic_rag_score_RAGAS = average_RAGAS_score(semantic_rag_score_RAGAS)
print(ensemble_models_score_RAGAS)
avg_ensemble_models_score_RAGAS = average_RAGAS_score(ensemble_models_score_RAGAS)

In [None]:
## Getting the yes/no evals
print("Scores for corrective RAG")
corrective_rag_score = evaluate_model(corrective_rag_translated_list, refs, instances)
print("#############")
print("Scores for simple agent")
simple_agent_score = evaluate_model(simple_agent_rag_list, refs, instances)
print("#############")
print("Scores for simple agent translated")
simple_agent_translated_score = evaluate_model(simple_agent_rag_translated_list, refs, instances)
print("#############")
print("Scores for naive rag")
naive_rag_score = evaluate_model(naive_rag_list, refs, instances)
print("#############")
print("Scores for naive rag translated")
naive_rag_translated_score = evaluate_model(naive_rag_translated_list, refs, instances)
print("#############")
print("Scores for react RAG")
react_rag_score = evaluate_model(react_rag_list, refs, instances)
print("#############")
print("Scores for react RAG translated")
react_rag_translated_score = evaluate_model(react_rag_translated_list, refs, instances)
print("#############")
print("Scores for semantic RAG")
semantic_rag_score = evaluate_model(semantic_rag_list, refs, instances)
print("#############")
print("Scores for ensembling models")
ensemble_models_score = evaluate_model(ensembling_models_list, refs, instances)
print("#############")

In [None]:
## Getting the answer relevancy for all the models:
answer_relevancy_ensemble = (ensemble_models_score_RAGAS["answer_relevancy"])
answer_relevancy_corrective =(corrective_rag_translated_score_RAGAS["answer_relevancy"])
answer_relevancy_simple_agent_translated = (simple_agent_translated_rag_score_RAGAS["answer_relevancy"])
answer_relevancy_simple_agent = (simple_agent_rag_score_RAGAS["answer_relevancy"])
answer_relevancy_naive_rag = (naive_rag_score_RAGAS["answer_relevancy"])
answer_relevance_naive_rag_translated = (naive_rag_translated_score_RAGAS["answer_relevancy"])
answer_relevancy_react = (react_rag_score_RAGAS["answer_relevancy"])
answer_relevancy_react_translated = (react_rag_translated_score_RAGAS["answer_relevancy"])
answer_relevancy_semantic = (semantic_rag_score_RAGAS["answer_relevancy"])
# Scores for each model (example data)
model_names = ["Corrective RAG", "Simple agent", "Simple agent translated", "Naive rag", "Naive rag translated", "React rag", "React rag translated", "Semantic rag","Model ensembling"]
scores = [answer_relevancy_corrective, answer_relevancy_simple_agent, answer_relevancy_simple_agent_translated, answer_relevancy_naive_rag, answer_relevance_naive_rag_translated, answer_relevancy_react, answer_relevancy_react_translated, answer_relevancy_semantic,answer_relevancy_ensemble]

# Plotting
plt.figure(figsize=(10, 6))
plt.bar(model_names, scores, color='skyblue')
plt.xlabel('Models')
plt.ylabel('Scores')
plt.title('Answer relevancy scores for the models')
plt.ylim(0.5, 1)  # Set y-axis limits to 0 and 1
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.xticks(rotation=45, ha='right')
plt.show()

In [None]:
# avg RAGAS Scores for each model (example data)
model_names = ["Corrective RAG", "Simple agent", "Simple agent translated", "Naive rag", "Naive rag translated", "React rag", "React rag translated", "Semantic rag","Model ensembling"]
scores = [avg_corrective_rag_translated_score_RAGAS, avg_simple_agent_rag_score_RAGAS, avg_simple_agent_translated_rag_score_RAGAS, avg_naive_rag_score_RAGAS, avg_naive_rag_translated_score_RAGAS, avg_react_rag_score_RAGAS, avg_react_rag_translated_score_RAGAS, avg_semantic_rag_score_RAGAS,avg_ensemble_models_score_RAGAS]

# Plotting
plt.figure(figsize=(10, 6))
plt.bar(model_names, scores, color='skyblue')
plt.xlabel('Models')
plt.ylabel('Scores')
plt.title('Average RAGAS scores for the models')
plt.ylim(0.8, 1)  # Set y-axis limits to 0 and 1
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.xticks(rotation=45, ha='right')
plt.show()

In [None]:
# Correct Scores for each model (example data)
model_names = ["Corrective RAG", "Simple agent", "Simple agent translated", "Naive rag", "Naive rag translated", "React rag", "React rag translated", "Semantic rag","Model ensembling"]
scores = [corrective_rag_score, simple_agent_score, simple_agent_translated_score, naive_rag_score, naive_rag_translated_score, react_rag_score, react_rag_translated_score, semantic_rag_score,ensemble_models_score]

# Plotting
plt.figure(figsize=(10, 6))
plt.bar(model_names, scores, color='skyblue')
plt.xlabel('Models')
plt.ylabel('Scores')
plt.title('Correct/Not correct Scores of Different Models')
plt.ylim(0, 1)  # Set y-axis limits to 0 and 1
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.xticks(rotation=45, ha='right')
plt.show()

In [None]:
## Skriver alle resultatene til en tekst-fil
import math


answer_relevancy_scores = [answer_relevancy_corrective, answer_relevancy_simple_agent, answer_relevancy_simple_agent_translated, answer_relevancy_naive_rag, answer_relevance_naive_rag_translated, answer_relevancy_react, answer_relevancy_react_translated, answer_relevancy_semantic,answer_relevancy_ensemble]
average_ragas_scores = [avg_corrective_rag_translated_score_RAGAS, avg_simple_agent_rag_score_RAGAS, avg_simple_agent_translated_rag_score_RAGAS, avg_naive_rag_score_RAGAS, avg_naive_rag_translated_score_RAGAS, avg_react_rag_score_RAGAS, avg_react_rag_translated_score_RAGAS, avg_semantic_rag_score_RAGAS,avg_ensemble_models_score_RAGAS]
correct_not_scores = [corrective_rag_score, simple_agent_score, simple_agent_translated_score, naive_rag_score, naive_rag_translated_score, react_rag_score, react_rag_translated_score, semantic_rag_score,ensemble_models_score]
# Open a text file in write mode
with open('/Users/adrianfolge/Documents/lokal:skole/Master/master_folder/eval_data/final_eval_data.txt', 'w') as file:
    # Write the elements of list1 to the file
    file.write("Answer relevancy scores:\n")
    for item in answer_relevancy_scores:
        file.write(str(item) + '\n')
    
    # Write a separator
    file.write("\n")
    
    # Write the elements of list2 to the file
    file.write("Average ragas scores:\n")
    for item in average_ragas_scores:
        if math.isnan(item):
            file.write("0\n")
        else:
            file.write(str(item) + '\n')
    
    # Write a separator
    file.write("\n")
    
    # Write the elements of list3 to the file
    file.write("Correct not correct scores:\n")
    for item in correct_not_scores:
        file.write(str(item) + '\n')
    
print("Lists have been saved to 'lists.txt' file.")
