In [1]:
%pip install mistralai pandas tqdm requests langchain openai datasets ragas langchain_mistralai langchain_huggingface

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import pandas as pd
import tqdm
import requests
from datasets import Dataset
import os

from langchain_mistralai import ChatMistralAI
from mistralai.client import MistralClient
from mistralai import Mistral

from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper

from ragas import evaluate

from ragas.metrics import LLMContextRecall, Faithfulness, FactualCorrectness, SemanticSimilarity

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
data_eval = pd.read_csv("eval_data.csv")
data_eval.head()

Unnamed: 0,user_input,context,response
0,what are Cholera symptoms?,Individuals infected with V. cholerae O1 or O1...,"Cholera is known by its symptoms, such as the ..."
1,what are Shigellosis symptoms?,Shigella causes self-limited disease character...,"Shigellosis is known by its symptoms, such as ..."
2,what are Tuberculous meningitis symptoms?,Tuberculous meningitis occurs in persons of al...,Tuberculous meningitis is known by its symptom...
3,what are Tularemia symptoms?,"If tularemia goes untreated, symptoms usually ...","Tularemia is known by its symptoms, such as th..."
4,what are Cutaneous anthrax symptoms?,(Table 156-1) Cutaneous anthrax begins as a pr...,"Cutaneous anthrax is known by its symptoms, su..."


In [4]:
def get_system_responses(questions):
    """
    Send requests to the /ask endpoint and collect responses.
    """
    for question in tqdm.tqdm(questions.itertuples()):
        try:
            response = requests.post("http://localhost:8000/query", json={"query": question[1]})
            response.raise_for_status()
            response_data = response.json()
            print(response_data)
            yield {
                    "question": question[1],
                    "response": response_data["response"],
                    "ground_truth": question[3],
                    "context": response_data["context"], 
                }
        except Exception as e:
            print(f"Error while sending query: {question[0]}, error message: {e}")
            continue

In [5]:
results = [items for items in get_system_responses(data_eval)]

1it [00:14, 14.07s/it]

{'context': 'Individuals infected with V. cholerae O1 or O139 exhibit a range of clinical manifestations. Some individuals are asymptomatic or have only mild diarrhea; others present with the sudden onset of explosive and life-threatening diarrhea (cholera gravis). The reasons for the range in signs and symptoms of disease are incompletely understood but include the level of preexisting immunity, blood type, and nutritional status. In a nonimmune individual, after a 24to 48-h incubation period, cholera characteristically begins with the sudden onset of painless watery diarrhea that may quickly become voluminous. Patients often vomit. In severe cases, volume loss can exceed 250 mL/ kg in the first 24 h. If fluids and electrolytes are not replaced, hypovolemic shock and death may ensue. Fever is usually absent. Muscle cramps due to electrolyte disturbances are common. The stool has a characteristic appearance: a nonbilious, gray, slightly cloudy fluid with flecks of mucus, no blood, and 

2it [00:21, 10.17s/it]

{'context': 'Shigella causes self-limited disease characterized by about 6 days of diarrhea, fever, and abdominal pain. After an incubation period of 1 to 7 days, the initial watery diarrhea progresses to a dysenteric phase in approximately 50% of patients, and constitutional symptoms can persist for as long as 1 month. A subacute presentation also can develop in a minority of adults. Antibiotic treatment shortens the clinical course and reduces the duration over which organisms are shed in the stool, but anti-diarrheal medications are contraindicated because they can prolong symptoms by delaying bacterial clearance.\nThe pathogenesis of Shigella is essentially determined by a large diarrhea, dysentery, and the postinfectious phase. The incubation virulence plasmid of 214 kb comprising ~100 genes, of which 25 period usually lasts 1–4 days but may be as long as 8 days. Typical iniencode a type III secretion system that inserts into the mem-tial manifestations are transient fever, limite

3it [00:30,  9.46s/it]

{'context': 'Tuberculous meningitis occurs in persons of all ages. Formerly, it was more frequent in young children, but now it is more frequent in adults, at least in the United States. The early manifestations are usually low-grade fever, malaise, headache (more than 50 percent of cases), lethargy, confusion, and stiff neck (75 percent of cases), with Kernig and Brudzinski signs. Characteristically, these symptoms evolve much less rapidly in tuberculous than in bacterial meningitis, usually over a period of a week or two, sometimes longer. In young children and infants, apathy, hyperirritability, vomiting, and seizures are the usual symptoms; however, stiff neck may not be prominent or may be absent altogether.\nSeveral pathogens, including mycobacteria, some spirochetes, and fungi, cause a chronic meningitis; infections with these organisms also may involve the brain parenchyma. Tuberculous meningitis usually manifests with generalized signs and symptoms of headache, malaise, mental

4it [00:38,  8.88s/it]

{'context': 'If tularemia goes untreated, symptoms usually last 1–4 weeks but may continue for months. The mortality rate from severe untreated infection (including all cases of untreated pulmonary and typhoidal tularemia) can be as high as 30%. However, the overall mortality rate for untreated tularemia is <8%. With appropriate treatment, the mortality rate is <1%. Poor outcomes are often associated with long delays in diagnosis and treatment. Lifelong immunity usually follows tularemia.\nas a biologic weapon would be as an aerosol, as has occurred in a number of natural outbreaks in rural areas, including Martha’s Vineyard in the United States. Approximately 1–14 days following exposure by this route, one would expect to see inflammation of the airways with pharyngitis, pleuritis, and bronchopneumonia. Typical symptoms would include the abrupt onset of fever, fatigue, chills, headache, and malaise (Table 261e-3). Some patients might experience conjunctivitis with ulceration, pharyngi

5it [00:47,  9.13s/it]

{'context': '(Table 156-1) Cutaneous anthrax begins as a pruritic papule, which develops within days into an ulcer with surrounding vesicles and edema and then into an enlarging ulcer with a black eschar. Cutaneous anthrax may cause chronic nonhealing ulcers with an overlying dirty-gray membrane, although lesions may also mimic psoriasis, eczema, or impetigo. Ulceroglandular tularemia may have associated ulcerated skin lesions with painful regional adenopathy. Although buboes are the major cutaneous manifestation of plague, ulcers with eschars, papules, or pustules are also present in 25% of cases.\nThe three major clinical forms of anthrax are gastrointestinal, cutaneous, and inhalational. Gastrointestinal anthrax typically results from the ingestion of contaminated meat; the condition is rarely seen and is unlikely to be the result of a bioterrorism event. The lesion of cutaneous anthrax typically begins as a papule following the introduction of spores through an opening in the skin.

6it [00:55,  8.77s/it]

{'context': 'CLINICAL MANIFESTATIONS Bubonic Plague After an incubation period of 2–6 days, the onset of bubonic plague is sudden and is characterized by fever (>38°C), malaise, myalgia, dizziness, and increasing pain due to progressive lymphadenitis in the regional lymph nodes near the fleabite or other inoculation site. Lymphadenitis manifests as a tense, tender swelling (bubo) that, when palpated, has a boggy consistency with an underlying hard core. Generally, there is one painful and erythematous bubo with surrounding periganglionic edema. The bubo is most commonly inguinal but can also be crural, axillary (Fig. 196-2), cervical, or submaxillary, depending on the site of the bite. Abdominal pain from intraabdominal node involvement can occur without other visible signs. Children are most likely to present with cervical or axillary buboes.\nThe clinical syndromes of plague generally reflect the mode of infection. Bubonic plague is the consequence of an insect bite; primary pneumoni

7it [01:04,  8.62s/it]

{'context': 'B. abortus infections may be more insidious in onset and more likely to become chronic. B. canis infections are reported to present frequently with acute gastrointestinal symptoms. The incubation period varies from 1 week to several months, and the onset of fever and other symptoms may be abrupt or insidious. In addition to experiencing fever and sweats, patients become increasingly apathetic and fatigued; lose appetite and weight; and have nonspecific myalgia, headache, and chills. Overall, the presentation of brucellosis often fits one of three patterns: febrile illness that resembles typhoid but is less severe; fever and acute monoarthritis, typically of the hip or knee, in a young child; and long-lasting fever, misery, and low-back or hip pain in an older man. In an endemic area (e.g., much of the Middle East), a patient with fever and difficulty walking into the clinic would be regarded as having brucellosis until it was proven otherwise.\nBrucellosis almost invariabl

8it [01:13,  8.81s/it]

{'context': 'Most patients with leptospirosis recover. However, post-leptospirosis symptoms, mainly of a depression-like nature, may occur and persist for years after the acute disease. Mortality rates are highest among patients who are elderly and those who have severe disease (pulmonary hemorrhage, Weil’s syndrome). Leptospirosis during pregnancy is associated with high fetal mortality rates. Long-term follow-up of patients with renal failure and hepatic dysfunction has documented good recovery of renal and hepatic function. Individuals who may be exposed to Leptospira through their occupations or their involvement in recreational freshwater activities should be informed about the risks. Measures for controlling leptospirosis include avoidance of exposure to urine and tissues from infected animals through proper eyewear, footwear, and other protective equipment. Targeted rodent control strategies could be considered.\nPhysical examination may include any of the following findings, no

9it [01:22,  9.01s/it]

{'context': 'Usually a self-limited illness, cat-scratch disease (CSD) has two general clinical presentations. Typical CSD, the more common, is characterized by subacute regional lymphadenopathy; atypical CSD is the collective designation for numerous extranodal manifestations involving various organs. B. henselae is the principal etiologic agent of CSD. Rare cases have been associated with Afipia felis and other Bartonella species.\nCat-scratch disease typically presents with a cutaneous papule or conjunctival granuloma at the site of bacterial inoculation, followed by lymphadenopathy of thedraining regional nodes. The nodes are tender, with suppuration in approximately 10% of cases. Lymphadenopathy may persist 1 to 4 months. Less common features ofcat-scratch disease include erythema nodosum, osteolytic lesions, encephalitis, oculoglandular (Parinaud) syndrome,hepatic or splenic granulomas, endocarditis, polyneuritis,and transverse myelitis. Lymphadenitis caused by nontuberculous myc

10it [01:32,  9.16s/it]

{'context': 'Other complications of diphtheria include pneumonia, renal failure, encephalitis, cerebral infarction, pulmonary embolism, and serum sickness from antitoxin therapy. The diagnosis of diphtheria is based on clinical signs and symptoms plus laboratory confirmation. Respiratory diphtheria should be considered in patients with sore throat, pharyngeal exudates, and fever. Other symptoms may include hoarseness, stridor, or palatal paralysis. The presence of a pseudomembrane should prompt strong consideration of diphtheria. Once a clinical diagnosis of diphtheria is made, diphtheria antitoxin should be obtained and administered as rapidly as possible. Laboratory diagnosis of diphtheria is based either on cultivation of\nDiphtheria is caused by the bacteria Corynebacterium diphtheriae (Chap. 175). Infected individuals present with flulike symptoms of generalized myalgias, headache, fatigue, low-grade fever, and irritability within a week to 10 days of the exposure. About 20–70% of

11it [01:39,  8.52s/it]

{'context': 'in recent years, although strains of GAS that produce pyrogenic exotoxins continue to be prevalent in the population. The symptoms of scarlet fever are the same as those of pharyngitis alone. The rash typically begins on the first or second day of illness over the upper trunk, spreading to involve the extremities but sparing the palms and soles. The rash is made up of minute papules, giving a characteristic “sandpaper” feel to the skin. Associated findings include circumoral pallor, “strawberry tongue” (enlarged papillae on a coated tongue, which later may become denuded), and accentuation of the rash in skinfolds (Pastia’s lines). Subsidence of the rash in 6–9 days is followed after several days by desquamation of the palms and soles. The differential diagnosis of scarlet fever includes other causes of fever and generalized rash, such as measles and other viral exanthems, Kawasaki disease, TSS, and systemic allergic reactions (e.g., drug eruptions).\nScarlet Fever Scarlet

12it [01:49,  8.97s/it]

{'context': 'The incubation period for Legionnaires’ disease is usually 2–10 days, although slightly longer incubation periods have been documented. Fever is almost universal. In one observational study, 20% of patients had temperatures in excess of 40°C (104°F). The symptoms and signs may range from a mild cough and a slight fever to stupor with widespread pulmonary infiltrates and multisystem failure. The mild cough of Legionnaires’ disease is only slightly productive. Sometimes the sputum is streaked with blood. Chest pain—either pleuritic or nonpleuritic—can be a prominent feature and, when coupled with hemoptysis, can lead to an incorrect diagnosis of pulmonary embolism. Shortness of breath is reported by one-third to one-half of patients. Gastrointestinal difficulties are often pronounced; abdominal pain, nausea, and vomiting affect 10–20% of patients. Diarrhea (watery rather than bloody) is reported in 25–50% of cases. The most common neurologic abnormalities are confusion or ch

13it [01:56,  8.48s/it]

{'context': 'C. trachomatis in this syndrome have declined.\nCommunities with blinding trachoma often experience seasonal epidemics of conjunctivitis due to H. influenzae that contribute to the intensity of the inflammatory process. In such areas, the active infectious process usually resolves spontaneously in affected persons at 10–15 years of age, but conjunctival scars continue to shrink, producing trichiasis and entropion with subsequent corneal scarring in adults. In areas with milder and less prevalent disease, the process may be much slower, with active disease continuing into adulthood; blindness is rare in these cases.\nThe clinical diagnosis of classic trachoma can be made if two of the following signs are present: (1) lymphoid follicles on the upper tarsal conjunctiva; (2) typical conjunctival scarring; (3) vascular pannus; or (4) limbal follicles or their sequelae, Herbert’s pits. The clinical diagnosis of endemic trachoma should be confirmed by laboratory tests in children

14it [02:05,  8.45s/it]

{'context': 'Figure 88-1 Facial features of Kawa-saki disease with (A) morbilliform rash and nonsuppurative conjunctivitis and (B) red, chapped lips. AB The convalescent phase begins with the disappearance of clinical symptoms and continues until the ESR returns to normal, usually 6 to 8 weeks after the onset of illness. Beau lines of the fingernails may appear during this phase.\nCervical lymphadenopathy (often painful and unilateral, with at least one node > 1.5 cm). Diffuse mucous membrane erythema (e.g., “strawberry tongue”); dry, red, chapped lips. Erythema of the palms and soles; indurative edema of the hands and feet; late desquamation of the fingertips (in the subacute phase). Other manifestations include sterile pyuria, gallbladder hydrops, hepatitis, and arthritis. Untreated Kawasaki disease can lead to coronary aneurysms and even myocardial infarction! Conjunctivitis Rash Adenopathy Strawberry tongue Hands and feet (red, swollen, f aky skin) BURN (fever > 40°C for ≥ 5 days) 

15it [02:12,  8.84s/it]

{'context': 'Prodromal Features The clinical features of rabies begin with nonspecific prodromal manifestations, including fever, malaise, headache, nausea, and vomiting. Anxiety or agitation may also occur. The earliest specific neurologic symptoms of rabies include paresthesias, pain, or pruritus near the site of the exposure, one or more of which occur in 50–80% of patients and strongly suggest rabies. The wound has usually healed by this point, and these symptoms probably reflect infection with associated inflammatory changes in local dorsal root or cranial sensory ganglia.\nDid the animal bite the patient or did saliva contaminate a scratch, abrasion, open wound, or mucous membrane? Is rabies known or suspected to be present in the species and the geographic area? Was the animal captured? Does laboratory examination of the brain by fluorescent antibody staining confirm rabies? Was the animal a normally behaving dog, cat, or ferret? Does the animal become ill under observation over




In [6]:
data_samples = {
    "question": [response["question"] for response in results],
    "answer": [response["response"] for response in results],
    "contexts": [response["context"] if type(response["context"]) == list else [response["context"]] for response in results],
    "ground_truth": [response["ground_truth"] for response in results],
}

dataframe = pd.DataFrame().from_dict(data_samples)
dataframe.drop(index=[1], inplace=True)

dataset = Dataset.from_dict(dataframe)

In [7]:
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper

api_key = "0ZQ7qwDBzcpNlujFViFWjdN2WHSEkIva" #TO_DO find a better way to hide this one
model = "mistral-large-latest"

client = Mistral(api_key=api_key)
chat = ChatMistralAI(api_key=api_key)

evaluator_llm = LangchainLLMWrapper(langchain_llm=chat)

In [8]:
from sentence_transformers import SentenceTransformer

class SentenceTransformerMy(object):
    
    encode_kwargs = dict()
    # See also the Sentence Transformer documentation: https://sbert.net/docs/package_reference/SentenceTransformer.html#sentence_transformers.SentenceTransformer.encode"""
    multi_process: bool = False
    """Run encode() on multiple GPUs."""
    show_progress: bool = False
    """Whether to show a progress bar."""
    
    def __init__(self, model_path, **kwargs):
        self.client = SentenceTransformer(model_path, trust_remote_code=True, device="cpu",
                                          config_kwargs={"use_memory_efficient_attention": False, "unpad_inputs": False})
    
    def embed_documents(self, texts):
            """Compute doc embeddings using a HuggingFace transformer model.

            Args:
                texts: The list of texts to embed.

            Returns:
                List of embeddings, one for each text.
            """

            texts = list(map(lambda x: x.replace("\n", " "), texts))
            if self.multi_process:
                pool = self.client.start_multi_process_pool()
                embeddings = self.client.encode_multi_process(texts, pool)
                SentenceTransformer.stop_multi_process_pool(pool)
            else:
                embeddings = self.client.encode(
                    texts, show_progress_bar=self.show_progress, **self.encode_kwargs
                )

            return embeddings.tolist()
        
    def embed_query(self, text: str):
        """Compute query embeddings using a HuggingFace transformer model.

        Args:
            text: The text to embed.

        Returns:
            Embeddings for the text.
        """
        return self.embed_documents([text])[0]

In [9]:
embedding_model = SentenceTransformerMy(
    model_path="C:\\Users\\User\\Documents\\rag\\RagAssistant\\notebooks\\stella_en_400M_v5\\",
    config_kwargs={"use_memory_efficient_attention": False, "unpad_inputs": False})
evaluator_embeddings = LangchainEmbeddingsWrapper(embedding_model)


# SentenceTransformer(
#     "dunzhang/stella_en_400M_v5",
#     trust_remote_code=True,
#     device="cpu",
#     config_kwargs={"use_memory_efficient_attention": False, "unpad_inputs": False}
# )

Some weights of the model checkpoint at C:\Users\User\Documents\rag\RagAssistant\notebooks\stella_en_400M_v5\ were not used when initializing NewModel: ['new.pooler.dense.bias', 'new.pooler.dense.weight']
- This IS expected if you are initializing NewModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing NewModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
from ragas.run_config import RunConfig, add_async_retry, add_retry
from langchain_core.callbacks import BaseCallbackHandler

# Configure run_config for docstore
run_config=RunConfig(timeout=1000, max_retries=20, max_wait=100, max_workers=2)

metrics = [
    LLMContextRecall(llm=evaluator_llm),
    FactualCorrectness(llm=evaluator_llm),
    Faithfulness(llm=evaluator_llm),
    SemanticSimilarity(embeddings=evaluator_embeddings),
]

evluations = evaluate(
    dataset=dataset,
    metrics=metrics,
    run_config=run_config
)

Evaluating:   0%|          | 0/56 [00:00<?, ?it/s]Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt context_recall_classification_prompt failed to parse output: The output parser failed to parse the output including retries.
Exception raised in Job[0]: RagasOutputParserException(The output parser failed to parse the output including retries.)
Evaluating:   5%|▌         | 3/56 [03:27<56:38, 64.11s/it]   Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt fix_output_format failed to parse output: The output parser faile

In [None]:
evaluations