In [None]:
import os

#os.environ["CUDA_VISIBLE_DEVICES"] = "1,6,7,8,9" # important to set before imports using torch

from ragas import evaluate, llms
from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
)
from datasets import Dataset
# from chatmodel import RAG
from tqdm.auto import tqdm
import pandas as pd
# from ragas.llms.base import BaseRagasLLM
# from ragas.embeddings.base import BaseRagasEmbeddings
from llm import HuggingfaceLlama2, ChatGPT
# from langchain.embeddings.huggingface import HuggingFaceEmbeddings
# from transformers import pipeline
# from transformers import LlamaTokenizer
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv, find_dotenv
from io import StringIO
import requests

_ = load_dotenv()

In [None]:
overall_result_values = None
for i in range(0, 40):
    dataset_pd = pd.read_pickle('only_recursive_splitter_evalresults.pkl')
    dataset_pd = dataset_pd.rename(columns={'questions': 'question', 'context': 'contexts', 'sample_answers': 'answer', 'model_answers': 'ground_truth'})
    dataset = Dataset.from_pandas(dataset_pd.iloc[i:i+1])
    print("dataset", dataset)
    result = evaluate(dataset, metrics=[
            context_precision,
            faithfulness,
            answer_relevancy,
            context_recall,
        ],
        raise_exceptions=False,
        #max_workers=4,
        llm=llms.llm_factory(model='gpt-3.5-turbo-0125')
    )
    result_pandas = result.to_pandas()
    if overall_result_values is None:
        overall_result_values = result_pandas
    else:
        print("overall_result_values.columns=", overall_result_values.columns)
        print("result_pandas.columns=", result_pandas.columns)
        overall_result_values.loc[i] = result_pandas
    overall_result_values.to_pickle("only_recursive_splitter_evalresults_ragas.pkl")
    print(overall_result_values)

In [None]:
pd.read_pickle("only_recursive_splitter_evalresults_ragas.pkl")

In [None]:
nlp_rag_reindexed = pd.read_pickle("nlp-rag-reindexedresults_with_context1_ragas.pkl")
nlp_rag_reindexed_2 = pd.read_pickle("nlp-rag-reindexedresults_with_context2_ragas.pkl")
only_recursive = pd.read_pickle("only_recursive_splitter_evalresults_ragas.pkl")
new_embedding = pd.read_pickle("new_embeddingresults_combined.pkl")
vals = {}
for k in ["context_precision", "context_recall", "context_relevancy", "faithfulness", "answer_relevancy", "answer_correctness"]:
    vals[k] = []
    vals[k].append(nlp_rag_reindexed[k].mean())
    vals[k].append(nlp_rag_reindexed_2[k].mean())
    vals[k].append(only_recursive[k].mean())
    vals[k].append(new_embedding[k].mean())
results = pd.DataFrame(vals, )
results.index = ["Semantic-Splitting-1N", "Semantic-Splitting-2N", "NaiveSplitting", "Semantic-Splitting-2N-LE"]
print(results)
results.to_csv("results.csv")
print(results.transpose().to_html())
# for i in range(39, 30, -1):
#     print(nlp_rag_reindexed_2.sort_values("answer_correctness").iloc[i][["question", "answer", "ground_truth", "answer_correctness"]].values)

In [None]:
ax = results.transpose().plot.bar(figsize=(12, 7))
ax.legend(loc="best")
ax.tick_params(axis="x", labelrotation=0)
ax.figure.savefig("comparison.png")
#results.plot.bar()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

data = np.array([[19, 14, 6, 36, 3],
                 [12, 12, 1, 32, 1],
                 [18, 25, 0, 33, 0],
                 [13, 19, 0, 32, 5],
                 [12, 14, 0, 33, 0],
                 [16, 14, 7, 30, 0],
                 [11, 18, 5, 31, 2],
                 [17, 11, 3, 46, 7]])
x = np.arange(data.shape[0])
dx = (np.arange(data.shape[1])-data.shape[1]/2.)/(data.shape[1]+2.)
d = 1./(data.shape[1]+2.)


fig, ax=plt.subplots()
for i in range(data.shape[1]):
    ax.bar(x+dx[i],data[:,i], width=d, label="label {}".format(i))

plt.legend(framealpha=1)
plt.show()