In [None]:
import pandas as pd
from trulens.core import TruSession
from trulens.core import Feedback
from trulens.core.schema.select import Select
from trulens.feedback import GroundTruthAgreement
from trulens.providers.openai import OpenAI as fOpenAI
from trulens.apps.custom import TruCustomApp
from trulens.apps.custom import instrument
from trulens.dashboard import run_dashboard
from utils.chunk_scorer import score_chunk



class retriever_evaluator:
    """
    
    """

    def __init__(self,name, ground_truth, rag_app , reset_db = False):
        self.name = name
        self.rag_app = rag_app
        self.session = self._init_db(reset_db)
        self.ground_truth = self._init_ground_truth(ground_truth) 
        self.feedback = self._feedback_init()
        self.tru_app = self._init_app()

### Move the addition of the scores  to prepare ground truth 
    def _init_ground_truth(self,ground_truth):
        for i in range(len(ground_truth["query"])):
            queries =  ground_truth["query"]
            expected_responses =  ground_truth["expected_response"]
            expected_chunks = ground_truth["expected_chunks"]
            expected_chunks[i] = [{"text":expected_chunk, "title":expected_chunk, "expected_score":score_chunk(expected_chunk,expected_responses[i])} for expected_chunk in expected_chunks[i] ]
            df={"query":[queries[i]],"expected_response":[expected_responses[i]],"expected_chunks":[expected_chunks[i]],"query_id":[str(i+1)]}
            self.session.add_ground_truth_to_dataset(
                dataset_name="groundtruth",
                ground_truth_df=pd.DataFrame(df),
                dataset_metadata={"domain": "Data from Ministry of Health UAE"},)

        print(self.session.get_ground_truth("groundtruth"))
        print(type(self.session.get_ground_truth("groundtruth")))
        return self.session.get_ground_truth("groundtruth")

    def _init_db(self, reset_db):
        session = TruSession()
        session.reset_database() if reset_db else None

        return session
    
    def _feedback_init(self):
        arg_query_selector = (
            Select.RecordCalls.retrieve_and_generate.args.query
        )  # 1st argument of retrieve_and_generate function
        arg_retrieval_k_selector = (
            Select.RecordCalls.retrieve_and_generate.args.k
        )  # 2nd argument of retrieve_and_generate function

        arg_completion_str_selector = Select.RecordCalls.retrieve_and_generate.rets[
            0
        ]  # 1st returned value from retrieve_and_generate function
        arg_retrieved_context_selector = Select.RecordCalls.retrieve_and_generate.rets[
            1
        ]  # 2nd returned value from retrieve_and_generate function
        arg_relevance_scores_selector = Select.RecordCalls.retrieve_and_generate.rets[
            2
        ]  # last returned value from retrieve_and_generate function

        f_ir_hit_rate = (
            Feedback(
                GroundTruthAgreement(self.ground_truth, provider=fOpenAI()).ir_hit_rate,
                name="IR hit rate",
            )
            .on(arg_query_selector)
            .on(arg_retrieved_context_selector)
            .on(arg_retrieval_k_selector)
        )

        f_ndcg_at_k = (
            Feedback(
                GroundTruthAgreement(self.ground_truth, provider=fOpenAI()).ndcg_at_k,
                name="NDCG@k",
            )
            .on(arg_query_selector)
            .on(arg_retrieved_context_selector)
            .on(arg_relevance_scores_selector)
            .on(arg_retrieval_k_selector)
        )


        f_recall_at_k = (
                Feedback(
                GroundTruthAgreement(self.ground_truth, provider=fOpenAI()).recall_at_k,
                name="Recall@k",
            )
            .on(arg_query_selector)
            .on(arg_retrieved_context_selector)
            .on(arg_relevance_scores_selector)
            .on(arg_retrieval_k_selector)
        )
        f_groundtruth_answer = (
            Feedback(
            GroundTruthAgreement(self.ground_truth).agreement_measure,
            name="Ground Truth answer (semantic similarity)",
            )
            .on(arg_query_selector)
            .on(arg_completion_str_selector))
        return [f_ir_hit_rate, f_ndcg_at_k, f_recall_at_k, f_groundtruth_answer]

    def _init_app(self):

        tru_app = TruCustomApp(
            self.rag_app,
            app_name=self.name,
            feedbacks=self.feedback,
            )
        return tru_app
    def run(self ):
        queries = self.ground_truth["query"]
        for i,query in enumerate(queries):
            with self.tru_app as recording:
                self.rag_app.retrieve_and_generate(query,10)
    def leaderboard(self):
        self.session.get_leaderboard(app_ids=[self.tru_app.app_id])



class rag_app:
    def __init__(self, retriever, generator, expected_responses,queries):
        self.retriever = retriever
        self.generator = generator
        self.expected_responses = expected_responses
        self.queries = queries
    
    def _get_scores(self,chunks,expected_response):
        chunks = [chunk["metadata"]["text"] for chunk in chunks]
        return [ score_chunk( chunk , expected_response)  for chunk in chunks]







    @instrument
    def retrieve_and_generate(self, query, k,):
        chunks = self.retriever.get_Chunks(query)
        chunks_dict = [chunk["metadata"]["text"] for chunk in chunks]
        response = self.generator.generate(query, chunks_dict)
        i = self.queries.index(query)
        expected_response = self.expected_responses[i]
        scores = self._get_scores(chunks,expected_response)
        print(f"retrieved and evaluated {((i + 1) * 100) / 23}% \"{query}\"")

        return response, chunks_dict, scores


    



In [1]:
# from cohere_ret.cohere_ret import cohere_retriever
# from cohere_ret.generator import cohere_generator
# from gemini.retrieve import gemini_retriever
# from openai_class.retriever import openai_retriever
# from openai_class.generator import openai_generator
from voyageai_ret.retrieve import voyage_retriever
from gemini.generator import gemini_generator
from utils.prepare_ground_truth import LatestGroundTruthCSV
from evaluator.ret_eval import rag_app, retriever_evaluator
from utils.chunk_scorer import score_chunk
import json
csv_filepath = 'GroundTruths_Dataset - Sheet1.csv'
json_filepath = 'url_chunk_mapping_500.json'

processor = LatestGroundTruthCSV(csv_filepath, json_filepath)
ground_truth = processor.get_latest_ground_truth()


# with open('data.json', 'r') as f:
#    ground_truth = json.loads(f.read())




ret = voyage_retriever()
gen = gemini_generator()
rag_app = rag_app(ret, gen,ground_truth["expected_response"],ground_truth["query"])

#eval-{Retriever}-{generator}-{chunksize}
ret_eval = retriever_evaluator(name="eval_cohere_cohere-528",ground_truth=ground_truth,rag_app=rag_app, reset_db=True)




🦑 Initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `TruSession` to prevent this.


Updating app_name and app_version in apps table: 0it [00:00, ?it/s]
Updating app_id in records table: 0it [00:00, ?it/s]
Updating app_json in apps table: 0it [00:00, ?it/s]

length: 23 length of child 5 
type:<class 'list'> type_child <class 'list'> 
[{'text': 'Register for Controlled or Semi-Controlled Drugs CustodyStart Service Bookmark service Service completion duration3 working daysView moreService feesRegister Fee AED300View moreExport asExport as \xa0PDFExport as \xa0XLSService Rating Average Rating : From 0 usersAbout the ServiceThis service enables clinics, pharmacies, and medical warehouses to submit application to obtain a register of controlled and semi-controlled drugs custody.Service Process1Login to the MoHAP website or smart app using the UAE PASS2The customer must fill', 'title': 'Register for Controlled or Semi-Controlled Drugs CustodyStart Service Bookmark service Service completion duration3 working daysView moreService feesRegister Fee AED300View moreExport asExport as \xa0PDFExport as \xa0XLSService Rating Average Rating : From 0 usersAbout the ServiceThis service enables clinics, pharmacies, and medical warehouses to submit applicati




[{'text': 'Renewal of Registration of a Conventional Pharmaceutical ProductStart Service Bookmark service Service completion duration5 working daysView moreService feesApplication: AED 100Registration of a conventional biological pharmace...View moreExport asExport as \xa0PDFExport as \xa0XLSService Rating Average Rating : From 0 usersAbout the ServiceThis service enables clients to submit applications to renew the registration of conventional, biological or other human pharmaceutical products for importation and trading within the', 'title': 'Renewal of Registration of a Conventional Pharmaceutical ProductStart Service Bookmark service Service completion duration5 working daysView moreService feesApplication: AED 100Registration of a conventional biological pharmace...View moreExport asExport as \xa0PDFExport as \xa0XLSService Rating Average Rating : From 0 usersAbout the ServiceThis service enables clients to submit applications to renew the registration of conventional, biological o

In [11]:
# from cohere_ret.cohere_ret import cohere_retriever
# from cohere_ret.generator import cohere_generator
# from gemini.retrieve import gemini_retriever
# from openai_class.retriever import openai_retriever
# from openai_class.generator import openai_generator
from voyageai_ret.retrieve import voyage_retriever
from gemini.generator import gemini_generator
from utils.prepare_ground_truth import LatestGroundTruthCSV
from evaluator.ret_eval import rag_app, retriever_evaluator
from utils.chunk_scorer import score_chunk
# import json
# csv_filepath = 'GroundTruths_Dataset - Sheet1.csv'
# json_filepath = 'url_chunk_mapping_500.json'

# processor = LatestGroundTruthCSV(csv_filepath, json_filepath)
# ground_truth = processor.get_latest_ground_truth()


with open('data.json', 'r') as f:
   ground_truth = json.loads(f.read())




ret = voyage_retriever()
gen = gemini_generator()
rag_app = rag_app(ret, gen,ground_truth["expected_response"],ground_truth["query"])

#eval-{Retriever}-{generator}-{chunksize}
ret_eval = retriever_evaluator(name="voyage-528@3",ground_truth=ground_truth,rag_app=rag_app, reset_db=True)




Updating app_name and app_version in apps table: 0it [00:00, ?it/s]
Updating app_id in records table: 0it [00:00, ?it/s]
Updating app_json in apps table: 0it [00:00, ?it/s]


[{'text': 'Register for Controlled or Semi-Controlled Drugs CustodyStart Service Bookmark service Service completion duration3 working daysView moreService feesRegister Fee AED300View moreExport asExport as  PDFExport as  XLSService Rating Average Rating : From 0 usersAbout the ServiceThis service enables clinics, pharmacies, and medical warehouses to submit application to obtain a register of controlled and semi-controlled drugs custody.Service Process1Login to the MoHAP website or smart app using the UAE PASS2The customer must fill', 'title': 'Register for Controlled or Semi-Controlled Drugs CustodyStart Service Bookmark service Service completion duration3 working daysView moreService feesRegister Fee AED300View moreExport asExport as  PDFExport as  XLSService Rating Average Rating : From 0 usersAbout the ServiceThis service enables clinics, pharmacies, and medical warehouses to submit application to obtain a register of controlled and semi-controlled drugs custody.Service Process1L

In [12]:
ret_eval.run()


retrieved and evaluated 4.3478260869565215% "How do I register for controlled or semi-controlled drugs custody?"
retrieved and evaluated 8.695652173913043% "What are the requirements for renewing the registration of a conventional pharmaceutical product?"
retrieved and evaluated 13.043478260869565% "How do I appeal a decision made by the Medical Licensing Committee?"


In [13]:
ret_eval.session.get_leaderboard(app_ids=[ret_eval.tru_app.app_id])

Unnamed: 0_level_0,Unnamed: 1_level_0,Ground Truth answer (semantic similarity),IR hit rate,NDCG@k,Recall@k,latency,total_cost
app_name,app_version,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
voyage-528@3,base,0.9,0.666667,1.0,0.388889,3.271334,0.0


In [14]:
ret_eval.tru_app.app_id

'app_hash_c8c256ee7aacaf113569d796b9aea11d'

In [18]:
res = rag_app.retrieve_and_generate("How do I register for controlled or semi-controlled drugs custody?", k=10)
print(res)
print(type(res))

retrieved and evaluated 4.3478260869565215% "How do I register for controlled or semi-controlled drugs custody?"
('To register for controlled or semi-controlled drugs custody in the UAE, follow these steps:\n\n1. **Login:** Access the Ministry of Health and Prevention (MoHAP) website or smart app using your UAE PASS.\n\n2. **Complete Application:** Fill out the application form requesting the issuance of the register and submit it.\n\n3. **Review and Verification:** The Drug Department will review and verify your application.\n\n4. **Payment:** If approved, pay the AED 300 registration fee online.\n\n5. **Delivery:** Once payment is processed, the controlled register will be sent to you via courier.\n\n\n**Conditions and Requirements:**\n\n* Your healthcare facility must hold a valid license from MOHAP or DHCC.  This applies to pharmacies within private hospitals, pharmaceutical warehouses, and clinics.\n\n**Service Details:**\n\n* **Service Name:** Register for Controlled or Semi-Cont

In [19]:
res[1]

['Register for Controlled or Semi-Controlled Drugs CustodyStart Service Bookmark service Service completion duration3 working daysView moreService feesRegister Fee AED300View moreExport asExport as \xa0PDFExport as \xa0XLSService Rating Average Rating : From 0 usersAbout the ServiceThis service enables clinics, pharmacies, and medical warehouses to submit application to obtain a register of controlled and semi-controlled drugs custody.Service Process1Login to the MoHAP website or smart app using the UAE PASS2The customer must fill',
 'semi-controlled drugs custody.Service Process1Login to the MoHAP website or smart app using the UAE PASS2The customer must fill in the application form requesting the issuance of the register, and submit it3The application will be reviewed and verified by the Drug Department4If the application is approved, the customer must pay the associated fees online5Once the payment is processed, the controlled register will be sent to the customer via courierConditi

In [17]:
from  trulens.dashboard import run_dashboard
run_dashboard()



Starting dashboard ...
Dashboard already running at path:   Network URL: http://192.168.1.12:33057



<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>