In [1]:
import os

from dotenv import load_dotenv
load_dotenv()

import nest_asyncio
nest_asyncio.apply()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

### Building the Tests

In [2]:
# load the documents
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader("data/Bio/").load_data()


In [3]:
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

# generator with openai models
generator_llm = OpenAI(model="gpt-3.5-turbo")
critic_llm = OpenAI(model="gpt-4")
embeddings = OpenAIEmbedding(model='text-embedding-ada-002')

generator = TestsetGenerator.from_llama_index(
    generator_llm=generator_llm,
    critic_llm=critic_llm,
    embeddings=embeddings,
)

In [4]:
# generate testset
testset = generator.generate_with_llamaindex_docs(
    documents,
    test_size=10,
    distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25},
)

embedding nodes:   0%|          | 0/272 [00:00<?, ?it/s]

Filename and doc_id are the same for all nodes.


Generating:   0%|          | 0/10 [00:00<?, ?it/s]

In [5]:
df = testset.to_pandas()
df.head()

Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,How can the sequential mathematical analysis b...,[[The use of the sequential mathematical analy...,The sequential mathematical analysis can be us...,simple,[{'file_path': '/Users/chris/Desktop/7980/CS79...,True
1,How do resistance mutations in the HCV NS3/4A ...,[[Resistance to protease inhibitors and effici...,Resistance mutations in the HCV NS3/4A region ...,simple,[{'file_path': '/Users/chris/Desktop/7980/CS79...,True
2,What is a symptom associated with the conditio...,[T1\tDISO 1 26\tGrowing teratoma syndrome\nT3\...,Polyuria is a symptom associated with the cond...,simple,[{'file_path': '/Users/chris/Desktop/7980/CS79...,True
3,What is the significance of proliferative acti...,[\nT107\tDISO 1176 1198\tproliferative activit...,The significance of proliferative activity in ...,simple,[{'file_path': '/Users/chris/Desktop/7980/CS79...,True
4,What is the significance of podocyte structura...,[ 1 8\tUrinary\nT60\tPHYS 1 18\tUrinary excret...,The significance of podocyte structural protei...,simple,[{'file_path': '/Users/chris/Desktop/7980/CS79...,True


### Building the query engine

In [6]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.settings import Settings

vector_index = VectorStoreIndex.from_documents(documents)

query_engine = vector_index.as_query_engine()

simple questions

In [7]:
# convert it to pandas dataset
df = testset.to_pandas()
df["question"][0]

"How can the sequential mathematical analysis be used to determine the driver's seat position inside the car passenger compartment based on injuries to the extremities in the case of a traffic accident?"

In [8]:
response_vector = query_engine.query(df["question"][0])

print(response_vector)

The sequential mathematical analysis can be utilized to determine the driver's seat position inside the car passenger compartment by assigning diagnostic coefficients to specific injuries on the extremities of the driver. By analyzing injuries such as bleeding from certain regions, skin scratches, wounds, fractures, and bruises, the diagnostic coefficients can provide valuable information to experts. These coefficients help in identifying the most informative features that characterize the driver of a vehicle involved in a traffic accident, thereby aiding in objectively determining the driver's seat position inside the car passenger compartment.


### Evaluate the query engine

In order to run an evaluation with Ragas and LlamaIndex:

- LlamaIndex QueryEngine: what we will be evaluating

- Metrics: Ragas defines a set of metrics that can measure different aspects of the QueryEngine. The available metrics and their meaning can be found here

- Questions: A list of questions that ragas will test the QueryEngine against.

In [9]:
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_precision,
    context_recall,
)
from ragas.metrics.critique import harmfulness

metrics = [
    faithfulness,
    answer_relevancy,
    context_precision,
    context_recall,
    harmfulness,
]

In [10]:
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

# using GPT 3.5, use GPT 4 / 4-turbo for better accuracy
evaluator_llm = OpenAI(model="gpt-3.5-turbo")

In [11]:
# convert to HF dataset
ds = testset.to_dataset()

ds_dict = ds.to_dict()
ds_dict["question"]
ds_dict["ground_truth"]

["The sequential mathematical analysis can be used to determine the driver's seat position inside the car passenger compartment based on injuries to the extremities in the case of a traffic accident by calculating diagnostic coefficients (DC) for specific injuries to the upper and lower extremities. These coefficients are derived from statistical and mathematical treatment of archival expert documents, allowing experts to identify the most informative features that characterize the driver of a vehicle. By analyzing injuries such as bleeding, skin scratches, wounds, fractures, and bruises, experts can objectively determine the driver's seat position and improve the quality of forensic medical expertise in traffic accidents.",
 'Resistance mutations in the HCV NS3/4A region can substantially reduce drug susceptibility in patients receiving antiviral therapy with protease inhibitors for chronic hepatitis C. Identifying these mutations is crucial for determining the efficiency of therapy a

evaluation

In [12]:
from ragas.integrations.llama_index import evaluate

result = evaluate(
    query_engine=query_engine,
    metrics=metrics,
    dataset=ds,
    llm=evaluator_llm,
    embeddings=OpenAIEmbedding(),
)

Running Query Engine:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/50 [00:00<?, ?it/s]

n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs


In [13]:
print(result)

{'faithfulness': 0.7633, 'answer_relevancy': 0.9393, 'context_precision': 0.8500, 'context_recall': 0.7667, 'harmfulness': 0.0000}


In [14]:
result.to_pandas()

Unnamed: 0,question,contexts,answer,ground_truth,faithfulness,answer_relevancy,context_precision,context_recall,harmfulness
0,How can the sequential mathematical analysis b...,[[The use of the sequential mathematical analy...,The sequential mathematical analysis can be ut...,The sequential mathematical analysis can be us...,1.0,0.952606,1.0,1.0,0
1,How do resistance mutations in the HCV NS3/4A ...,[[Resistance to protease inhibitors and effici...,Resistance mutations in the HCV NS3/4A region ...,Resistance mutations in the HCV NS3/4A region ...,1.0,0.912357,1.0,1.0,0
2,What is a symptom associated with the conditio...,[[Urinary excretion of markers for podocyte in...,Polydipsia is a symptom associated with the co...,Polyuria is a symptom associated with the cond...,0.0,0.909506,0.5,0.0,0
3,What is the significance of proliferative acti...,[[A correlation between diffusion kurtosis ima...,The significance of proliferative activity in ...,The significance of proliferative activity in ...,1.0,1.0,1.0,0.666667,0
4,What is the significance of podocyte structura...,[[Urinary excretion of markers for podocyte in...,The significance of podocyte structural protei...,The significance of podocyte structural protei...,0.833333,1.0,1.0,1.0,0
5,How can the social and economic impacts of pod...,[The feasibility of eliminating podoconiosis. ...,The social and economic impacts of podoconiosi...,The social and economic impacts of podoconiosi...,0.8,0.974679,1.0,1.0,0
6,How does blood examination help diagnose pulmo...,[[Pulmonary paecilomycosis: Diagnosis and trea...,Blood examination helps diagnose pulmonary pae...,Blood examination does not help diagnose pulmo...,0.5,1.0,1.0,0.5,0
7,What treatments are used for chronic hepatitis...,[[Resistance to protease inhibitors and effici...,Triple therapy with protease inhibitors (bocep...,The answer to given question is not present in...,0.5,0.826072,0.0,1.0,0
8,What is the significance of intramural hematom...,[[Internal carotid artery dissection as a caus...,Intramural hematoma in carotid artery dissecti...,The significance of intramural hematoma in car...,1.0,0.91998,1.0,1.0,0
9,How does coronary atherosclerosis affect heart...,[[Functional morphology of ischemic cardiomyop...,Coronary atherosclerosis leads to higher level...,Coronary atherosclerosis affects heart disease...,1.0,0.897993,1.0,0.5,0
