In [1]:
import os 


In [2]:
%pwd


'd:\\Projects\\Mresult\\Phenomix\\notebooks'

In [3]:
os.chdir("../")

In [4]:
%pwd

'd:\\Projects\\Mresult\\Phenomix'

In [5]:
from tqdm import tqdm

from src.database.load_vector_data import DataLoader

ModuleNotFoundError: No module named 'langchain_chroma'

In [7]:
dataloader = DataLoader()
records = dataloader.get_records()
document = dataloader.data_chuncking(records)
vector_db = dataloader.get_vector_db()

# Process documents in batches
BATCH_SIZE = 500
for i in tqdm(range(0, len(document), BATCH_SIZE), desc="Embedding Batches"):
    batch = document[i:i + BATCH_SIZE]  # Get batch slice
    vector_db.add_documents(batch)

Extracting Data
Data Extraction Completed
Chunking Completed


Embedding Batches: 100%|██████████| 5/5 [01:53<00:00, 22.63s/it]


In [28]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_chroma import Chroma
from src.prompts.Prompts import Filter_template ,Generator_template
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI,GoogleGenerativeAIEmbeddings
from src.config import VECTORDB_DIR
from datasets import Dataset
from ragas import evaluate,EvaluationDataset
from ragas.metrics import Faithfulness,LLMContextRecall,FactualCorrectness,LLMContextPrecisionWithReference,NoiseSensitivity
from ragas.llms import LangchainLLMWrapper
from google import generativeai as genai


In [21]:
api_key

'AIzaSyDkX0O1Sqd9L1FKvctL-qBFoCNU8hnoNGI'

In [None]:

load_dotenv()
# api_key = os.getenv("API_KEY")
api_key = "AIzaSyDkX0O1Sqd9L1FKvctL-qBFoCNU8hnoNGI"
genai.configure(api_key=api_key)

class ChatBot:
    def __init__(self):

        self.dataset = []
        self.embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
        self.vector_db = Chroma(embedding_function=self.embeddings, persist_directory=r'data\Chatbot_vector_db')
        self.llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", api_key=api_key)
        self.parser = JsonOutputParser()
        self.filtering_chain = Filter_template | self.llm | self.parser
        self.generator_chain = Generator_template | self.llm
        self.evaluator_llm = LangchainLLMWrapper(self.llm)
    
    def get_result(self, query):

        self.query = query
        lower_query = query.lower()
        filtering_result = self.filtering_chain.invoke({"query" : lower_query})
        
        if filtering_result:
            metadata_filter = filtering_result
        else:
            metadata_filter = None
        retriever = self.vector_db.as_retriever(search_type="mmr", search_kwargs = {"k": 4, "filter":metadata_filter, 'fetch_k':1000})
        response = retriever.invoke(query)

        retrieved_contexts = [f"content:{doc.page_content}, metadata: {doc.metadata} " for doc in response]
        
        reference = ', '.join([doc.page_content for doc in response])

        tupled_doc = [(doc.metadata,doc.page_content) for doc in response]

        result = self.generator_chain.invoke({"query": query, "content" : tupled_doc})

        

        # self.dataset.append(data)

        
        # self.ragas(query, result.content, retrieved_contexts,reference)
        return query, result.content, retrieved_contexts,reference
    

    def ragas(self,data):

        self.dataset = EvaluationDataset.from_list(data)

        # self.dataset = Dataset.from_dict(self.data)

        self.metrics = [
            Faithfulness(),
            LLMContextRecall(),
            LLMContextPrecisionWithReference(),
            NoiseSensitivity()
        ]

        result = evaluate(dataset=self.dataset,metrics=self.metrics,llm=self.evaluator_llm)
        
        print(result)




In [53]:

chatbot = ChatBot()


In [121]:
dataset = []
queries = []
responses = []
retrieved_contexts = []
references = []

In [122]:
query = 'What is data sources and coding system of BMI?'
query, response, retrieved_context,reference = chatbot.get_result(query)

In [67]:
chatbot.get_result(query)

('What is data sources and coding system of BMI?',
 'The data source for BMI is QResearch, and the coding system is Read codes v2.',
 ['content:{"detail_props": {"Implementation": "Unknown value", "Phenoflow_id": "Unknown value", "Name": "BMI", "Tags": "Unknown value", "Status": 2, "Is_deleted": "Unknown value"}, "concept_props": {"Description": "Body mass index centile", "Concept_history_id": "[4752]", "Logical_type": "[1]", "Coding_system_id": "[5]", "Coding_system_name": "[\\"Read codes v2\\"]", "PIDs": "[\\"HP000636\\"]", "Component_history_id": "[2698]", "Concept_history_date": "[\\"2021-10-06T16:19:42.629679Z\\"]", "Code": "[\\"22K9\\"]", "Codelist_history_id": "[2644]", "CID": "HC032527", "Phenotype_version_id": "[896]", "Phenotype_name": "[\\"BMI\\"]", "Codelist_id": "[2011]", "Coding_system_description": "[\\"Read codes v2\\"]", "Component_id": "[2011]", "ID": 96406, "Phenotype_id": "[\\"PH448\\"]", "Concept_id": "[1807]"}}, metadata: {\'phenotype_name\': \'bmi\', \'website_na

In [126]:

# queries.append(query),
# responses.append(response),
# retrieved_contexts.append(retrieved_context),
# references.append(reference)

data = {
"user_input":query,
"response":response,
"retrieved_contexts":retrieved_context,
"reference":reference
}
dataset.append(data)

In [125]:
query = 'What is BMI?'
query, response, retrieved_contexts,reference = chatbot.get_result(query)

In [115]:
query = 'What are the PID of Acne?'
query, response, retrieved_contexts,reference = chatbot.get_result(query)

In [117]:
query = 'Give a brief on AIDS'
query, response, retrieved_contexts,reference = chatbot.get_result(query)

In [None]:
sample_queries = [
    'What is data sources and coding system of BMI?',
    'What is BMI?',
    'What are the PID of Acne?',
    'Give a brief on AIDS'
]

expected_responses = [
{"detail_props": {"Implementation": "Unknown value", "Phenoflow_id": "Unknown value", "Name": "BMI", "Tags": "Unknown value", "Status": 2, "Is_deleted": "Unknown value"}, "concept_props": {"Description": "Body mass index centile", "Concept_history_id": "[4752]", "Logical_type": "[1]", "Coding_system_id": "[5]", "Coding_system_name": "[\"Read codes v2\"]", "PIDs": "[\"HP000636\"]", "Component_history_id": "[2698]", "Concept_history_date": "[\"2021-10-06T16:19:42.629679Z\"]", "Code": "[\"22K9\"]", "Codelist_history_id": "[2644]", "CID": "HC032527", "Phenotype_version_id": "[896]", "Phenotype_name": "[\"BMI\"]", "Codelist_id": "[2011]", "Coding_system_description": "[\"Read codes v2\"]", "Component_id": "[2011]", "ID": 96406, "Phenotype_id": "[\"PH448\"]", "Concept_id": "[1807]"}}, {"detail_props": {"Implementation": "Unknown value", "Phenoflow_id": "Unknown value", "Name": "BMI", "Tags": "Unknown value", "Status": 2, "Is_deleted": "Unknown value"}, "concept_props": {"Description": "Body Mass Index normal K/M2", "Concept_history_id": "[4752]", "Logical_type": "[1]", "Coding_system_id": "[5]", "Coding_system_name": "[\"Read codes v2\"]", "PIDs": "[\"HP000636\"]", "Component_history_id": "[2698]", "Concept_history_date": "[\"2021-10-06T16:19:42.629679Z\"]", "Code": "[\"22K1\"]", "Codelist_history_id": "[2644]", "CID": "HC032478", "Phenotype_version_id": "[896]", "Phenotype_name": "[\"BMI\"]", "Codelist_id": "[2011]", "Coding_system_description": "[\"Read codes v2\"]", "Component_id": "[2011]", "ID": 96411, "Phenotype_id": "[\"PH448\"]", "Concept_id": "[1807]"}}, {"pname": "bmi", "phenotype_props": {"id": "HXXXX0200", "name": "BMI", "hdruk_PID": "HP000636"}, "website_props": {"name": "hdruk", "pid": {"0": "HP000636"}}, "detail_props": {"Updated": "2021-10-06T16:19:43.330536Z", "Group_access": 1, "Event_end_date": "2013-12-31T00:00:00.00Z", "Defination": "A cohort study based on primary care data from the QResearch database will include patients with one of the three common tumours: breast, bowel and prostate. The primary outcome will be overall survival from the date of cancer diagnosis. Cox proportional hazards regression will be used to compare the survival of patients with cancer taking VGSC-inhibiting drugs (including anticonvulsants and class I antiarrhythmic agents) with patients with cancer not exposed to these drugs, adjusting for age and sex. Exposure to VGSC-inhibiting drugs will be defined as having at least one prescription for these drugs prior to cancer diagnosis. High and low exposure groups will be identified based on the length of use. A number of sensitivity and secondary analyses will be conducted.\n\n\n\n\n\n\n\n\n"}}, {"detail_props": {"PID": "HP000636", "Type": "Biomarker", "Validation": "Unknown value", "Group": "Unknown value", "Phenotype_version_id": 896, "author": "Caroline Fairhurst, Ian Watt, Fabiola Martin, Martin Bland, William J Brackenbury", "Created": "2021-10-06T16:19:43.330508Z", "World_access": 1, "Coding_system": "Read codes v2", "Phenotype_id": "PH448", "Sex": "Both", "Owner": "ieuan.scanlon", "Data_sources": "{\"QResearch\": \"https://web.www.healthdatagateway.org/dataset/777d6ac1-6879-4750-9a85-9e42d28bb8d4\"}", "Collections": {"0": "ClinicalCodes Repository", "1": "Phenotype Library"}, "Event_start_date": "1998-01-01T00:00:00.00Z", "Citation_requirements": "Unknown value", "Publications": "[\"{\\\"doi\\\": null, \\\"details\\\": \\\"Caroline Fairhurst, Ian Watt, Fabiola Martin, Martin Bland, William J Brackenburry, Exposure to sodium channel-inhibiting drugs and cancer survival protocol for a cohort study using the QResearch primary care database. BMJ Open, 4:e006604 2014.\\\"}\"]"}},
{"detail_props": {"Implementation": "Unknown value", "Phenoflow_id": "Unknown value", "Name": "BMI", "Tags": "Unknown value", "Status": 2, "Is_deleted": "Unknown value"}, "concept_props": {"Description": "Body mass index centile", "Concept_history_id": "[4752]", "Logical_type": "[1]", "Coding_system_id": "[5]", "Coding_system_name": "[\"Read codes v2\"]", "PIDs": "[\"HP000636\"]", "Component_history_id": "[2698]", "Concept_history_date": "[\"2021-10-06T16:19:42.629679Z\"]", "Code": "[\"22K9\"]", "Codelist_history_id": "[2644]", "CID": "HC032527", "Phenotype_version_id": "[896]", "Phenotype_name": "[\"BMI\"]", "Codelist_id": "[2011]", "Coding_system_description": "[\"Read codes v2\"]", "Component_id": "[2011]", "ID": 96406, "Phenotype_id": "[\"PH448\"]", "Concept_id": "[1807]"}}, {"detail_props": {"Implementation": "Unknown value", "Phenoflow_id": "Unknown value", "Name": "BMI", "Tags": "Unknown value", "Status": 2, "Is_deleted": "Unknown value"}, "concept_props": {"Description": "Body mass index less than 18.5", "Concept_history_id": "[4752]", "Logical_type": "[1]", "Coding_system_id": "[5]", "Coding_system_name": "[\"Read codes v2\"]", "PIDs": "[\"HP000636\"]", "Component_history_id": "[2698]", "Concept_history_date": "[\"2021-10-06T16:19:42.629679Z\"]", "Code": "[\"EMISNQBO29\"]", "Codelist_history_id": "[2644]", "CID": "HC032541", "Phenotype_version_id": "[896]", "Phenotype_name": "[\"BMI\"]", "Codelist_id": "[2011]", "Coding_system_description": "[\"Read codes v2\"]", "Component_id": "[2011]", "ID": 96403, "Phenotype_id": "[\"PH448\"]", "Concept_id": "[1807]"}}, {"detail_props": {"Implementation": "Unknown value", "Phenoflow_id": "Unknown value", "Name": "BMI", "Tags": "Unknown value", "Status": 2, "Is_deleted": "Unknown value"}, "concept_props": {"Description": "Body Mass Index normal K/M2", "Concept_history_id": "[4752]", "Logical_type": "[1]", "Coding_system_id": "[5]", "Coding_system_name": "[\"Read codes v2\"]", "PIDs": "[\"HP000636\"]", "Component_history_id": "[2698]", "Concept_history_date": "[\"2021-10-06T16:19:42.629679Z\"]", "Code": "[\"22K1\"]", "Codelist_history_id": "[2644]", "CID": "HC032478", "Phenotype_version_id": "[896]", "Phenotype_name": "[\"BMI\"]", "Codelist_id": "[2011]", "Coding_system_description": "[\"Read codes v2\"]", "Component_id": "[2011]", "ID": 96411, "Phenotype_id": "[\"PH448\"]", "Concept_id": "[1807]"}}, {"pname": "bmi", "phenotype_props": {"id": "HXXXX0200", "name": "BMI", "hdruk_PID": "HP000636"}, "website_props": {"name": "hdruk", "pid": {"0": "HP000636"}}, "detail_props": {"Updated": "2021-10-06T16:19:43.330536Z", "Group_access": 1, "Event_end_date": "2013-12-31T00:00:00.00Z", "Defination": "A cohort study based on primary care data from the QResearch database will include patients with one of the three common tumours: breast, bowel and prostate. The primary outcome will be overall survival from the date of cancer diagnosis. Cox proportional hazards regression will be used to compare the survival of patients with cancer taking VGSC-inhibiting drugs (including anticonvulsants and class I antiarrhythmic agents) with patients with cancer not exposed to these drugs, adjusting for age and sex. Exposure to VGSC-inhibiting drugs will be defined as having at least one prescription for these drugs prior to cancer diagnosis. High and low exposure groups will be identified based on the length of use. A number of sensitivity and secondary analyses will be conducted.\n\n\n\n\n\n\n\n\n"}},
{"pname": "acne", "phenotype_props": {"id": "HXCXX0024", "name": "Acne", "cprd_PID": "CP000003", "hdruk_PID": "HP001121"}, "website_props": {"name": "cprd", "pid": {"0": "CP000003"}}, "detail_props": {"Disease_num": 3, "Disease": "Acne", "PID": "CP000003"}, "concept_props": {"Disease_num": "[3]", "Description": "Iodine acne", "Disease": "[\"Acne\"]", "Med_code_id": "[309116010]", "PIDs": "[\"CP000003\"]", "System_num": "[15]", "Read_code": "M261400", "Snomed_ct_description_id": "[309116010]", "CID": "CC008401", "Category": "[\"Diagnosis of Acne\"]", "Snomed_ct_concept_id": "[201217006]", "Mapping": "[\"cleansedreadcode\"]", "System": "[\"Skin conditions\"]", "Med_code": "[67453.0]"}}, {"pname": "acne", "phenotype_props": {"id": "HXCXX0024", "name": "Acne", "cprd_PID": "CP000003", "hdruk_PID": "HP001121"}, "website_props": {"name": "hdruk", "pid": {"0": "HP001121"}}, "detail_props": {"Updated": "2021-10-06T16:02:41.256047Z", "Group_access": 1, "Event_end_date": "2016-07-01T00:00:00.00Z", "Defination": "At the specified date, a patient is defined as having had 'Acne' IF they meet the criteria for any of the following on or before the specified date. The earliest date on which the individual meets any of the following criteria on or before the specified date is defined as the first event date:\n\n\nPrimary care\n\n\n1. 'Acne' diagnosis or history of diagnosis during a consultation \n\nOR\nSecondary care (ICD10)\n\n\n1. ALL diagnoses of 'Acne' or history of diagnosis during a hospitalization\n\n\n\n\n\n\n\n\n\n\n\n\n", "PID": "HP001121", "Type": "Disease or syndrome", "Validation": "Unknown value", "Group": "Unknown value", "Phenotype_version_id": 182, "author": "Kuan V, Denaxas S, Gonzalez-Izquierdo A, Direk K, Bhatti O, Husain S, Sutaria S, Hingorani M, Nitsch D, Parisinos C, Lumbers T, Mathur R, Sofat R, Casas JP, Wong I, Hemingway H, Hingorani A"}}, {"detail_props": {"Status": 2, "Is_deleted": "Unknown value"}, "concept_props": {"Description": "Other acne NOS", "Concept_history_id": "[3011]", "Logical_type": "[1]", "Coding_system_id": "[5]", "Coding_system_name": "[\"Read codes v2\"]", "PIDs": "[\"HP001121\"]", "Component_history_id": "[1827]", "Concept_history_date": "[\"2021-10-06T16:02:39.384881Z\"]", "Code": "[\"M261z00\"]", "Codelist_history_id": "[1773]", "CID": "HC153364", "Phenotype_version_id": "[182]", "Phenotype_name": "[\"Acne\"]", "Codelist_id": "[1140]", "Coding_system_description": "[\"Read codes v2\"]", "Component_id": "[1140]", "ID": 56290, "Phenotype_id": "[\"PH91\"]", "Concept_id": "[936]"}}, {"detail_props": {"Created": "2021-10-06T16:02:41.256018Z", "World_access": 1, "Coding_system": "ICD10 codes", "Phenotype_id": "PH91", "Sex": "Both", "Owner": "ieuan.scanlon", "Data_sources": "{\"CPRD GOLD\": \"https://web.www.healthdatagateway.org/dataset/a29feafa-7bdd-44e9-b977-c9d26425e67f\", \"HES Admitted Patient Care data for CPRD GOLD\": \"https://web.www.healthdatagateway.org/dataset/4bcf64a6-f404-4ef4-ae6e-172512ab5f43\"}", "Collections": {"0": "CALIBER", "1": "Phenotype Library"}, "Event_start_date": "1999-01-01T00:00:00.00Z", "Citation_requirements": "Unknown value", "Publications": "[\"{\\\"doi\\\": \\\"10.1016/S2589-7500(19)30012-3\\\", \\\"details\\\": \\\"Kuan V., Denaxas S., Gonzalez-Izquierdo A. et al. A chronological map of 308 physical and mental health conditions from 4 million individuals in the National Health Service. The Lancet Digital Health - DOI 10.1016/S2589-7500(19)30012-3\\\"}\"]", "Implementation": "Unknown value", "Phenoflow_id": 268, "Name": "Acne", "Tags": "Unknown value"}},
{"detail_props": {"Implementation": "Unknown value", "Phenoflow_id": "Unknown value", "Name": "AIDS", "Tags": "Unknown value", "Status": 2, "Is_deleted": "Unknown value"}, "concept_props": {"Description": "[X]HIV disease resulting in other viral infections", "Concept_history_id": "[5420]", "Logical_type": "[1]", "Coding_system_id": "[5]", "Coding_system_name": "[\"Read codes v2\"]", "PIDs": "[\"HP000940\"]", "Component_history_id": "[3032]", "Concept_history_date": "[\"2021-10-06T16:32:18.258334Z\"]", "Code": "[\"AyuC100\"]", "Codelist_history_id": "[2978]", "CID": "HC224331", "Phenotype_version_id": "[1442]", "Phenotype_name": "[\"AIDS\"]", "Codelist_id": "[2345]", "Coding_system_description": "[\"Read codes v2\"]", "Component_id": "[2345]", "ID": 140811, "Phenotype_id": "[\"PH721\"]", "Concept_id": "[2141]"}}, {"detail_props": {"Implementation": "Unknown value", "Phenoflow_id": "Unknown value", "Name": "AIDS", "Tags": "Unknown value", "Status": 2, "Is_deleted": "Unknown value"}, "concept_props": {"Description": "HIV disease resulting in Burkitt's lymphoma", "Concept_history_id": "[5420]", "Logical_type": "[1]", "Coding_system_id": "[5]", "Coding_system_name": "[\"Read codes v2\"]", "PIDs": "[\"HP000940\"]", "Component_history_id": "[3032]", "Concept_history_date": "[\"2021-10-06T16:32:18.258334Z\"]", "Code": "[\"A789600\"]", "Codelist_history_id": "[2978]", "CID": "HC092016", "Phenotype_version_id": "[1442]", "Phenotype_name": "[\"AIDS\"]", "Codelist_id": "[2345]", "Coding_system_description": "[\"Read codes v2\"]", "Component_id": "[2345]", "ID": 140798, "Phenotype_id": "[\"PH721\"]", "Concept_id": "[2141]"}}, {"pname": "aids", "phenotype_props": {"id": "HXXXX0006", "name": "AIDS", "hdruk_PID": "HP000940, HP001077"}, "website_props": {"name": "hdruk", "pid": {"0": "HP000940", "1": "HP001077"}}, "detail_props": {"Updated": "2021-10-06T16:32:18.941027Z", "Group_access": 1, "Event_end_date": "2014-03-31T00:00:00.00Z", "Defination": "Aims: \n\nTo determine association between HbA1C variability and hypoglycemia requiring hospitalization (HH) in adults with type 1 diabetes (T1D) and type 2 diabetes (T2D).\n\n\nMethods: \n\nUsing nested case-control design in electronic health record data in England, one case with first or recurrent HH was matched to one control who had not experienced HH in incident T1D and T2D adults. HbA1C variability was determined by standard deviation of \u22653 HbA1C results. Conditional logistic models were applied to determine association of HbA1C variability with first and recurrent HH.\n\n\nResults: \n\nIn T1D, every 1.0% increase in HbA1C variability was associated with 90% higher first HH risk (95% CI, 1.25\u20132.89) and 392% higher recurrent HH risk (95% CI, 1.17\u201320.61). In T2D, a 1.0% increase in HbA1C variability was associated with 556% higher first HH risk (95% CI, 3.88\u201311.08) and 573% higher recurrent HH risk (95% CI,1.59\u201328.51). In T2D for first HH, the association was the strongest in non-insulin non-sulfonylurea users (P b 0.0001); for recurrent HH, the association was stronger in insulin users than sulfonylurea users (P = 0.07). The HbA1C variability-HH association was stronger in more recent years in T2D (P \u2264 0.004).\n\n\nConclusions: \n\nHbA1C variability is a strong predictor for HH in T1D and T2D.\n\n\n\n\n\n\n\n\n"}}, {"pname": "aids", "phenotype_props": {"id": "HXXXX0006", "name": "AIDS", "hdruk_PID": "HP000940, HP001077"}, "website_props": {"name": "hdruk", "pid": {"0": "HP000940", "1": "HP001077"}}, "detail_props": {"Updated": "2022-04-04T07:07:23.583436Z", "Group_access": 1, "Event_end_date": "2014-12-31T00:00:00.00Z", "Defination": "Background:\n\nSelf-harm is a major risk factor for suicide, with older adults (older than 65 years) having reportedly greater suicidal intent than any other age group. With the aging population rising and paucity of research focus in this age group, the extent of the problem of self-harm needs to be established. In a primary care cohort of older adults we aimed to investigate the incidence of self-harm, subsequent clinical management, prevalence of mental and physical diagnoses, and unnatural-cause mortality risk, including suicide.\n\n\nMethods:\n\nThe UK Clinical Practice Research Datalink contains anonymised patient records from general practice that routinely capture clinical information pertaining to both primary and secondary care services. We identified 4124 adults aged 65 years and older with a self-harm episode ascertained from Read codes recorded during 2001\u201314. We calculated standardised incidence and in 2854 adults with at least 12 months follow-up examined the frequency of psychiatric referrals and prescription of psychotropic medication after self-harm. We estimated prevalence of mental and physical illness diagnoses before and after self-harm and, using Cox regression in a matched cohort, we examined cause-specific mortality risks.\n\n\nFindings:\n\nOverall incidence of self-harm in older adults aged 65 years and older was 4\u00b71 per 10 000 person-years with stable gender-specific rates observed over the 13-year period. After self-harm, 335 (11\u00b77%) of 2854 adults were referred to mental health services, 1692 (59\u00b73%) were prescribed an antidepressant, and 336 (11\u00b78%) were prescribed a tricyclic antidepressant (TCA). Having a diagnosed previous mental illness was twice as prevalent in the self-harm cohort as in the comparison cohort (prevalence ratio 2\u00b710 [95% CI 2\u00b703\u20132\u00b717]) and with a previous physical health condition prevalence was 20% higher in the self-harm cohort compared to the comparison cohort (1\u00b720 [1\u00b717\u20131\u00b723]). Adults from the self-harm cohort (n=2454) died from unnatural causes an estimated 20 times more frequently than the comparison cohort (n=48 921) during the first year. A markedly elevated risk of suicide (hazard ratio 145\u00b74 [95% CI 53\u00b79\u2013392\u00b73]) was observed in the self-harm cohort.\n\n\nInterpretation: \n\nWithin primary care, we have identified a group of older adults at high risk from unnatural death, particularly within the first year of self-harm. We have highlighted a high frequency of prescription of TCAs, known to be potentially fatally toxic in overdose. We emphasise the need for early intervention, careful alternative prescribing, and increased support when older adults consult after an episode of self-harm and with other health conditions.\n\n\n\n\n\n\n\n\n"}}
]

In [None]:
dataset = [{
"user_input":queries,
"response":responses,
"retrieved_contexts":retrieved_contexts,
"reference":references
}
]

In [127]:
dataset

[{'user_input': 'What is data sources and coding system of BMI?',
  'response': 'The data source for BMI is QResearch, and the coding system is Read codes v2.',
  'retrieved_contexts': ['content:{"detail_props": {"Implementation": "Unknown value", "Phenoflow_id": "Unknown value", "Name": "BMI", "Tags": "Unknown value", "Status": 2, "Is_deleted": "Unknown value"}, "concept_props": {"Description": "Body mass index centile", "Concept_history_id": "[4752]", "Logical_type": "[1]", "Coding_system_id": "[5]", "Coding_system_name": "[\\"Read codes v2\\"]", "PIDs": "[\\"HP000636\\"]", "Component_history_id": "[2698]", "Concept_history_date": "[\\"2021-10-06T16:19:42.629679Z\\"]", "Code": "[\\"22K9\\"]", "Codelist_history_id": "[2644]", "CID": "HC032527", "Phenotype_version_id": "[896]", "Phenotype_name": "[\\"BMI\\"]", "Codelist_id": "[2011]", "Coding_system_description": "[\\"Read codes v2\\"]", "Component_id": "[2011]", "ID": 96406, "Phenotype_id": "[\\"PH448\\"]", "Concept_id": "[1807]"}}, m

In [None]:
evaluation_metric = chatbot.ragas(data)

ValidationError: 13 validation errors for MultiTurnSample
user_input.0.HumanMessage
  Input should be a valid dictionary or instance of HumanMessage [type=model_type, input_value='What is data sources and coding system of BMI?', input_type=str]
    For further information visit https://errors.pydantic.dev/2.10/v/model_type
user_input.0.AIMessage
  Input should be a valid dictionary or instance of AIMessage [type=model_type, input_value='What is data sources and coding system of BMI?', input_type=str]
    For further information visit https://errors.pydantic.dev/2.10/v/model_type
user_input.0.ToolMessage
  Input should be a valid dictionary or instance of ToolMessage [type=model_type, input_value='What is data sources and coding system of BMI?', input_type=str]
    For further information visit https://errors.pydantic.dev/2.10/v/model_type
user_input.1.HumanMessage
  Input should be a valid dictionary or instance of HumanMessage [type=model_type, input_value='What is BMI?', input_type=str]
    For further information visit https://errors.pydantic.dev/2.10/v/model_type
user_input.1.AIMessage
  Input should be a valid dictionary or instance of AIMessage [type=model_type, input_value='What is BMI?', input_type=str]
    For further information visit https://errors.pydantic.dev/2.10/v/model_type
user_input.1.ToolMessage
  Input should be a valid dictionary or instance of ToolMessage [type=model_type, input_value='What is BMI?', input_type=str]
    For further information visit https://errors.pydantic.dev/2.10/v/model_type
user_input.2.HumanMessage
  Input should be a valid dictionary or instance of HumanMessage [type=model_type, input_value='What are the PID of Acne?', input_type=str]
    For further information visit https://errors.pydantic.dev/2.10/v/model_type
user_input.2.AIMessage
  Input should be a valid dictionary or instance of AIMessage [type=model_type, input_value='What are the PID of Acne?', input_type=str]
    For further information visit https://errors.pydantic.dev/2.10/v/model_type
user_input.2.ToolMessage
  Input should be a valid dictionary or instance of ToolMessage [type=model_type, input_value='What are the PID of Acne?', input_type=str]
    For further information visit https://errors.pydantic.dev/2.10/v/model_type
user_input.3.HumanMessage
  Input should be a valid dictionary or instance of HumanMessage [type=model_type, input_value='Give a brief on AIDS', input_type=str]
    For further information visit https://errors.pydantic.dev/2.10/v/model_type
user_input.3.AIMessage
  Input should be a valid dictionary or instance of AIMessage [type=model_type, input_value='Give a brief on AIDS', input_type=str]
    For further information visit https://errors.pydantic.dev/2.10/v/model_type
user_input.3.ToolMessage
  Input should be a valid dictionary or instance of ToolMessage [type=model_type, input_value='Give a brief on AIDS', input_type=str]
    For further information visit https://errors.pydantic.dev/2.10/v/model_type
reference
  Input should be a valid string [type=string_type, input_value=['{"detail_props": {"Impl...n\\n\\n\\n\\n\\n\\n"}}'], input_type=list]
    For further information visit https://errors.pydantic.dev/2.10/v/string_type

In [128]:
evaluation_dataset = EvaluationDataset.from_list(dataset)

In [145]:
metrics = [
            Faithfulness(),
            # FactualCorrectness(),
            LLMContextRecall(),
            LLMContextPrecisionWithReference()
            # NoiseSensitivity()
        ]


In [146]:
evaluator_llm = LangchainLLMWrapper(chatbot.llm)

In [147]:

result = evaluate(dataset=evaluation_dataset,metrics=metrics,llm=evaluator_llm)
        

Evaluating:   0%|          | 0/6 [00:00<?, ?it/s]

In [149]:
result

{'faithfulness': 0.8750, 'context_recall': 0.8750, 'llm_context_precision_with_reference': 0.9583}