In [None]:
import pandas as pd
from trulens.core import TruSession
from trulens.core import Feedback
from trulens.core.schema.select import Select
from trulens.feedback import GroundTruthAgreement
from trulens.providers.openai import OpenAI as fOpenAI
from trulens.apps.custom import TruCustomApp
from trulens.apps.custom import instrument
from trulens.dashboard import run_dashboard
from utils.chunk_scorer import score_chunk



class retriever_evaluator:
    """
    
    """

    def __init__(self,name, ground_truth, rag_app , reset_db = False):
        self.name = name
        self.rag_app = rag_app
        self.session = self._init_db(reset_db)
        self.ground_truth = self._init_ground_truth(ground_truth) 
        self.feedback = self._feedback_init()
        self.tru_app = self._init_app()

### Move the addition of the scores  to prepare ground truth 
    def _init_ground_truth(self,ground_truth):
        for i in range(len(ground_truth["query"])):
            queries =  ground_truth["query"]
            expected_responses =  ground_truth["expected_response"]
            expected_chunks = ground_truth["expected_chunks"]
            expected_chunks[i] = [{"text":expected_chunk, "title":expected_chunk, "expected_score":score_chunk(expected_chunk,expected_responses[i])} for expected_chunk in expected_chunks[i] ]
            df={"query":[queries[i]],"expected_response":[expected_responses[i]],"expected_chunks":[expected_chunks[i]],"query_id":[str(i+1)]}
            self.session.add_ground_truth_to_dataset(
                dataset_name="groundtruth",
                ground_truth_df=pd.DataFrame(df),
                dataset_metadata={"domain": "Data from Ministry of Health UAE"},)

        
        return self.session.get_ground_truth("groundtruth")

    def _init_db(self, reset_db):
        session = TruSession()
        session.reset_database() if reset_db else None

        return session
    
    def _feedback_init(self):
        arg_query_selector = (
            Select.RecordCalls.retrieve_and_generate.args.query
        )  # 1st argument of retrieve_and_generate function
        arg_retrieval_k_selector = (
            Select.RecordCalls.retrieve_and_generate.args.k
        )  # 2nd argument of retrieve_and_generate function

        arg_completion_str_selector = Select.RecordCalls.retrieve_and_generate.rets[
            0
        ]  # 1st returned value from retrieve_and_generate function
        arg_retrieved_context_selector = Select.RecordCalls.retrieve_and_generate.rets[
            1
        ]  # 2nd returned value from retrieve_and_generate function
        arg_relevance_scores_selector = Select.RecordCalls.retrieve_and_generate.rets[
            2
        ]  # last returned value from retrieve_and_generate function

        f_ir_hit_rate = (
            Feedback(
                GroundTruthAgreement(self.ground_truth, provider=fOpenAI()).ir_hit_rate,
                name="IR hit rate",
            )
            .on(arg_query_selector)
            .on(arg_retrieved_context_selector)
            .on(arg_retrieval_k_selector)
        )

        f_ndcg_at_k = (
            Feedback(
                GroundTruthAgreement(self.ground_truth, provider=fOpenAI()).ndcg_at_k,
                name="NDCG@k",
            )
            .on(arg_query_selector)
            .on(arg_retrieved_context_selector)
            .on(arg_relevance_scores_selector)
            .on(arg_retrieval_k_selector)
        )


        f_recall_at_k = (
                Feedback(
                GroundTruthAgreement(self.ground_truth, provider=fOpenAI()).recall_at_k,
                name="Recall@k",
            )
            .on(arg_query_selector)
            .on(arg_retrieved_context_selector)
            .on(arg_relevance_scores_selector)
            .on(arg_retrieval_k_selector)
        )
        f_groundtruth_answer = (
            Feedback(
            GroundTruthAgreement(self.ground_truth).agreement_measure,
            name="Ground Truth answer (semantic similarity)",
            )
            .on(arg_query_selector)
            .on(arg_completion_str_selector))
        return [f_ir_hit_rate, f_ndcg_at_k, f_recall_at_k, f_groundtruth_answer]

    def _init_app(self):

        tru_app = TruCustomApp(
            self.rag_app,
            app_name=self.name,
            feedbacks=self.feedback,
            )
        return tru_app
    def run(self ):
        queries = self.ground_truth["query"]
        for i,query in enumerate(queries):
            with self.tru_app as recording:
                self.rag_app.retrieve_and_generate(query,10)
    def leaderboard(self):
        self.session.get_leaderboard(app_ids=[self.tru_app.app_id])



class rag_app:
    def __init__(self, retriever, generator, expected_responses,queries):
        self.retriever = retriever
        self.generator = generator
        self.expected_responses = expected_responses
        self.queries = queries
    
    def _get_scores(self,chunks,expected_response):
        chunks = [chunk["metadata"]["text"] for chunk in chunks]
        return [ score_chunk( chunk , expected_response)  for chunk in chunks]







    @instrument
    def retrieve_and_generate(self, query, k,):
        chunks = self.retriever.get_Chunks(query)
        chunks_dict = [chunk["metadata"]["text"] for chunk in chunks]
        response = self.generator.generate(query, chunks_dict)
        i = self.queries.index(query)
        expected_response = self.expected_responses[i]
        scores = self._get_scores(chunks,expected_response)

        return response, chunks_dict, scores


    



In [1]:
from cohere_ret.cohere_ret import cohere_retriever
from cohere_ret.generator import cohere_generator
from gemini.retrieve import gemini_retriever
from openai_class.retriever import openai_retriever
from openai_class.generator import openai_generator
from voyageai_ret.retrieve import voyage_retriever
from gemini.generator import gemini_generator
from utils.prepare_ground_truth import LatestGroundTruthCSV
from evaluator.ret_eval import rag_app, retriever_evaluator
from utils.chunk_scorer import score_chunk
csv_filepath = 'GroundTruths_Dataset - Sheet1.csv'
json_filepath = 'url_chunk_mapping_1000_v2.0.json'

processor = LatestGroundTruthCSV(csv_filepath, json_filepath)
ground_truth = processor.get_latest_ground_truth()



ret = openai_retriever()
gen = gemini_generator()
rag_app = rag_app(ret, gen,ground_truth["expected_response"],ground_truth["query"])

#eval-{Retriever}-{generator}-{chunksize}
ret_eval = retriever_evaluator(name="eval_openai_gemini-1000",ground_truth=ground_truth,rag_app=rag_app, reset_db=True)




23
🦑 Initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `TruSession` to prevent this.


Updating app_name and app_version in apps table: 0it [00:00, ?it/s]
Updating app_id in records table: 0it [00:00, ?it/s]
Updating app_json in apps table: 0it [00:00, ?it/s]


✅ In IR hit rate, input query will be set to __record__.app.retrieve_and_generate.args.query .
✅ In IR hit rate, input retrieved_context_chunks will be set to __record__.app.retrieve_and_generate.rets[1] .
✅ In IR hit rate, input k will be set to __record__.app.retrieve_and_generate.args.k .
✅ In NDCG@k, input query will be set to __record__.app.retrieve_and_generate.args.query .
✅ In NDCG@k, input retrieved_context_chunks will be set to __record__.app.retrieve_and_generate.rets[1] .
✅ In NDCG@k, input relevance_scores will be set to __record__.app.retrieve_and_generate.rets[2] .
✅ In NDCG@k, input k will be set to __record__.app.retrieve_and_generate.args.k .
✅ In Recall@k, input query will be set to __record__.app.retrieve_and_generate.args.query .
✅ In Recall@k, input retrieved_context_chunks will be set to __record__.app.retrieve_and_generate.rets[1] .
✅ In Recall@k, input relevance_scores will be set to __record__.app.retrieve_and_generate.rets[2] .
✅ In Recall@k, input k will be 

In [2]:
ret_eval.run()


retrieved and evaluated 4.3478260869565215% "How do I register for controlled or semi-controlled drugs custody?"
retrieved and evaluated 8.695652173913043% "What are the requirements for renewing the registration of a conventional pharmaceutical product?"
retrieved and evaluated 13.043478260869565% "How do I appeal a decision made by the Medical Licensing Committee?"
retrieved and evaluated 17.391304347826086% "What is the process for obtaining a certificate of amendment for registered pharmaceutical products?"
retrieved and evaluated 21.73913043478261% "How can I get a product classified?"
retrieved and evaluated 26.08695652173913% "What are the steps to re-license a pharmaceutical facility?"
retrieved and evaluated 30.434782608695652% "How can I renew my license as a nurse or medical professional?"
retrieved and evaluated 34.78260869565217% "What's the process for getting a permit to import medical equipment?"
retrieved and evaluated 39.130434782608695% "How can I renew my health fac

In [3]:
ret_eval.session.get_leaderboard(app_ids=[ret_eval.tru_app.app_id,"app_hash_fb4995701b8d40691d749df9464d2b3b","app_hash_5900e9c221404923bfe3b4748a08c898","app_hash_43aa1da8d29ea67fe751b00bcbc81856","app_hash_44ad3fc2973d346fbba7ffff72375bee","app_hash_7fc04a06e84483e4843c2d36e2441d58","app_hash_06489213760ec9904f851ebc0e4a6681","app_hash_85fca6bb738e30758f6f96559de669d6","app_hash_7bb306d11afd98b36c726ea4147bcea3","app_hash_af2d2f2c512504fcede40b939672677d","app_hash_c8146a11589aeb100704fed81a3b8ec7"])

Unnamed: 0_level_0,Unnamed: 1_level_0,Ground Truth answer (semantic similarity),IR hit rate,NDCG@k,Recall@k,latency,total_cost
app_name,app_version,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
eval_openai_gemini-1000,base,0.759091,0.956522,1.0,0.367902,4.877454,0.0


In [4]:
ret_eval.tru_app.app_id

'app_hash_1a0ce2a495f8f8278d9ed0d117e7ec87'