In [None]:
import pandas as pd
from trulens.core import TruSession
from trulens.core import Feedback
from trulens.core.schema.select import Select
from trulens.feedback import GroundTruthAgreement
from trulens.providers.openai import OpenAI as fOpenAI
from trulens.apps.custom import TruCustomApp
from trulens.apps.custom import instrument
from trulens.dashboard import run_dashboard
from utils.chunk_scorer import score_chunk



class retriever_evaluator:
    """
    
    """

    def __init__(self,name, ground_truth, rag_app , reset_db = False):
        self.name = name
        self.rag_app = rag_app
        self.session = self._init_db(reset_db)
        self.ground_truth = self._init_ground_truth(ground_truth) 
        self.feedback = self._feedback_init()
        self.tru_app = self._init_app()

### Move the addition of the scores  to prepare ground truth 
    def _init_ground_truth(self,ground_truth):
        for i in range(len(ground_truth["query"])):
            queries =  ground_truth["query"]
            expected_responses =  ground_truth["expected_response"]
            expected_chunks = ground_truth["expected_chunks"]
            expected_chunks[i] = [{"text":expected_chunk, "title":expected_chunk, "expected_score":score_chunk(expected_chunk,expected_responses[i])} for expected_chunk in expected_chunks[i] ]
            df={"query":[queries[i]],"expected_response":[expected_responses[i]],"expected_chunks":[expected_chunks[i]],"query_id":[str(i+1)]}
            self.session.add_ground_truth_to_dataset(
                dataset_name="groundtruth",
                ground_truth_df=pd.DataFrame(df),
                dataset_metadata={"domain": "Data from Ministry of Health UAE"},)

        
        return self.session.get_ground_truth("groundtruth")

    def _init_db(self, reset_db):
        session = TruSession()
        session.reset_database() if reset_db else None

        return session
    
    def _feedback_init(self):
        arg_query_selector = (
            Select.RecordCalls.retrieve_and_generate.args.query
        )  # 1st argument of retrieve_and_generate function
        arg_retrieval_k_selector = (
            Select.RecordCalls.retrieve_and_generate.args.k
        )  # 2nd argument of retrieve_and_generate function

        arg_completion_str_selector = Select.RecordCalls.retrieve_and_generate.rets[
            0
        ]  # 1st returned value from retrieve_and_generate function
        arg_retrieved_context_selector = Select.RecordCalls.retrieve_and_generate.rets[
            1
        ]  # 2nd returned value from retrieve_and_generate function
        arg_relevance_scores_selector = Select.RecordCalls.retrieve_and_generate.rets[
            2
        ]  # last returned value from retrieve_and_generate function

        f_ir_hit_rate = (
            Feedback(
                GroundTruthAgreement(self.ground_truth, provider=fOpenAI()).ir_hit_rate,
                name="IR hit rate",
            )
            .on(arg_query_selector)
            .on(arg_retrieved_context_selector)
            .on(arg_retrieval_k_selector)
        )

        f_ndcg_at_k = (
            Feedback(
                GroundTruthAgreement(self.ground_truth, provider=fOpenAI()).ndcg_at_k,
                name="NDCG@k",
            )
            .on(arg_query_selector)
            .on(arg_retrieved_context_selector)
            .on(arg_relevance_scores_selector)
            .on(arg_retrieval_k_selector)
        )


        f_recall_at_k = (
                Feedback(
                GroundTruthAgreement(self.ground_truth, provider=fOpenAI()).recall_at_k,
                name="Recall@k",
            )
            .on(arg_query_selector)
            .on(arg_retrieved_context_selector)
            .on(arg_relevance_scores_selector)
            .on(arg_retrieval_k_selector)
        )
        f_groundtruth_answer = (
            Feedback(
            GroundTruthAgreement(self.ground_truth).agreement_measure,
            name="Ground Truth answer (semantic similarity)",
            )
            .on(arg_query_selector)
            .on(arg_completion_str_selector))
        return [f_ir_hit_rate, f_ndcg_at_k, f_recall_at_k, f_groundtruth_answer]

    def _init_app(self):

        tru_app = TruCustomApp(
            self.rag_app,
            app_name=self.name,
            feedbacks=self.feedback,
            )
        return tru_app
    def run(self ):
        queries = self.ground_truth["query"]
        for i,query in enumerate(queries):
            with self.tru_app as recording:
                self.rag_app.retrieve_and_generate(query,10)
    def leaderboard(self):
        self.session.get_leaderboard(app_ids=[self.tru_app.app_id])



class rag_app:
    def __init__(self, retriever, generator, expected_responses,queries):
        self.retriever = retriever
        self.generator = generator
        self.expected_responses = expected_responses
        self.queries = queries
    
    def _get_scores(self,chunks,expected_response):
        chunks = [chunk["metadata"]["text"] for chunk in chunks]
        return [ score_chunk( chunk , expected_response)  for chunk in chunks]







    @instrument
    def retrieve_and_generate(self, query, k,):
        chunks = self.retriever.get_Chunks(query)
        chunks_dict = [chunk["metadata"]["text"] for chunk in chunks]
        response = self.generator.generate(query, chunks_dict)
        i = self.queries.index(query)
        expected_response = self.expected_responses[i]
        scores = self._get_scores(chunks,expected_response)

        return response, chunks_dict, scores


    



In [None]:
from cohere_ret.cohere_ret import cohere_retriever
from cohere_ret.generator import cohere_generator
from gemini.retrieve import gemini_retriever
from openai_class.retriever import openai_retriever
from voyageai_ret.retrieve import voyage_retriever
from gemini.generator import gemini_generator
from utils.prepare_ground_truth import LatestGroundTruthCSV
from evaluator.ret_eval import rag_app, retriever_evaluator
from utils.chunk_scorer import score_chunk
csv_filepath = 'GroundTruths_Dataset - Sheet1.csv'
json_filepath = 'URL-chunk_map.json'

processor = LatestGroundTruthCSV(csv_filepath, json_filepath)
ground_truth = processor.get_latest_ground_truth()



ret = cohere_retriever()
gen = cohere_generator()
rag_app = rag_app(ret, gen,ground_truth["expected_response"],ground_truth["query"])


ret_eval = retriever_evaluator(name="eval_cohere2_cohere",ground_truth=ground_truth,rag_app=rag_app)




                                      ground_truth_id  \
0   ground_truth_hash_61d89bb73141b372c1d35ab29c0e...   
1   ground_truth_hash_86f6b8551102b6d18293be593e9b...   
2   ground_truth_hash_12c36d6a2fbb73e671cfe814b3db...   
3   ground_truth_hash_e5a167d713d7858ed9e8c6e74f5b...   
4   ground_truth_hash_0dad6c914ddb1d5248355c5499bb...   
5   ground_truth_hash_e61b5b2f36280abefc00df7554c5...   
6   ground_truth_hash_8714bd9ce7ee36bf8c1f9541fa3b...   
7   ground_truth_hash_d17304ff0c441ec955012a1f4514...   
8   ground_truth_hash_a39c5ec013ff6149fe7aafa4d478...   
9   ground_truth_hash_2908a3c58d7d02c338e95ebc8008...   
10  ground_truth_hash_80b3d599bcf887b87806ac5faeff...   
11  ground_truth_hash_716eb0af6b3447690e561184291a...   
12  ground_truth_hash_87a6da649ede154f6aa52a82fa1c...   
13  ground_truth_hash_a06602996a5dfb64515d882720f4...   
14  ground_truth_hash_1e9653e7fed501dfc85dc86d1824...   
15  ground_truth_hash_3abec1a0480195159a569c428970...   
16  ground_truth_hash_cdc146d32

In [None]:
# ret_eval.run()


In [35]:
ret_eval.session.get_leaderboard(app_ids=[ret_eval.tru_app.app_id,"app_hash_ff8b170f597ed0688129aafc72a66596","app_hash_54d3e06551724bf80729fddacf6fb2ad","app_hash_3149e752fa175d4feff2bca54fa3e414","app_hash_fb9ccd4df04ceb87e7e13ff8ab4a7d87"])

Unnamed: 0_level_0,Unnamed: 1_level_0,Ground Truth answer (semantic similarity),IR hit rate,NDCG@k,Recall@k,latency,total_cost
app_name,app_version,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
eval_cohere_cohere,base,0.752632,0.947368,1.0,0.274351,5.443205,0.0
eval_cohere2_cohere,base,0.721053,0.894737,1.0,0.358413,5.165441,0.0
eval_openai_cohere,base,0.710526,0.947368,1.0,0.274351,5.977364,0.0
eval_gemini_cohere,base,0.684211,0.947368,1.0,0.346678,5.720447,0.0
eval_voyage_cohere,base,0.584211,1.0,1.0,0.269593,5.577336,0.0


In [36]:
ret_eval.tru_app.app_id

'app_hash_4addebb53dead30189a4299b3f47c63a'