In [28]:
import os
import pickle
import nest_asyncio
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.settings import Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from ragas.integrations.llama_index import evaluate

nest_asyncio.apply()
os.environ["OPENAI_API_KEY"] = "sk-proj-VpoWx9hxbydMWtacix9qT3BlbkFJUeNz0FM5JGxVptcW5N6b"

# Load Dataset

In [3]:
# Laden des Objekts
with open('test_dataset.pkl', 'rb') as file:
    loaded_test_dataset = pickle.load(file)

In [7]:
df = loaded_test_dataset.to_pandas()
df.head()

Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,What is included in the insurance coverage for...,[ \nist ausschließlich der Wortlaut der Versic...,The answer to given question is not present in...,simple,"[{'page_label': '3', 'file_name': 'fahrrad.pdf...",True
1,"What services are included in the ""Telefonisch...",[ = versichert\nDiese Leistungsübersicht ist ...,Telefonische Erstberatung is included in the s...,simple,"[{'page_label': '3', 'file_name': 'cyberversic...",True
2,What determines the bike insurance premium?,[Leistungen BASIS-Tarif OPTIMAL-Tarif\nEntschä...,The height of the insurance premium depends on...,reasoning,"[{'page_label': '3', 'file_name': 'fahrrad.pdf...",True
3,What are the benefits of WGV's new bicycle ins...,[wgv.de/fahrrad\nDIE FAHRRAD-\nVERSICHERUNG DE...,The benefits of WGV's new bicycle insurance ta...,multi_context,"[{'page_label': '1', 'file_name': 'fahrrad.pdf...",True
4,What is the coverage area of my WGV insurance ...,[Leistungen BASIS-Tarif OPTIMAL-Tarif\nEntschä...,"Weltweite Geltung, zeitlich unbegrenzt bis 12 ...",reasoning,"[{'page_label': '3', 'file_name': 'fahrrad.pdf...",True


# Building the QueryEngine

In [11]:
documents = SimpleDirectoryReader("C:/Users/aydem/Documents/salesassistent/dataset/test/").load_data()
vector_index = VectorStoreIndex.from_documents(documents)
query_engine = vector_index.as_query_engine()

In [20]:
# convert it to pandas dataset
df = loaded_test_dataset.to_pandas()
df["question"][4]

'What is the coverage area of my WGV insurance policy?'

In [21]:
response_vector = query_engine.query(df["question"][0])

print(response_vector)

Verschleiss (wear and tear) is covered in both the BASIS and OPTIMAL tariffs.


# Evaluation

In [24]:
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_precision,
    context_recall,
)
from ragas.metrics.critique import harmfulness

metrics = [
    faithfulness,
    answer_relevancy,
    context_precision,
    context_recall,
    harmfulness,
]

evaluator_llm = OpenAI(model="gpt-3.5-turbo")

In [26]:
# convert to HF dataset
ds = loaded_test_dataset.to_dataset()

ds_dict = ds.to_dict()
ds_dict["question"]
ds_dict["ground_truth"]

['The answer to given question is not present in context',
 'Telefonische Erstberatung is included in the services of the WGV Cyberversicherung.',
 'The height of the insurance premium depends on the chosen tariff and the value of the bike.',
 "The benefits of WGV's new bicycle insurance tariff include improved coverage and benefits in the updated policy.",
 'Weltweite Geltung, zeitlich unbegrenzt bis 12 Monate']

In [29]:
result = evaluate(
    query_engine=query_engine,
    metrics=metrics,
    dataset=ds_dict,
    llm=evaluator_llm,
    embeddings=OpenAIEmbedding(),
)

Running Query Engine:   0%|          | 0/5 [00:00<?, ?it/s]

Running Query Engine: 100%|██████████| 5/5 [00:03<00:00,  1.46it/s]
Evaluating:   0%|          | 0/25 [00:00<?, ?it/s]n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
Evaluating:   4%|▍         | 1/25 [00:02<01:09,  2.90s/it]n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
Evaluating: 100%|██████████| 25/25 [00:10<00:00,  2.46it/s]


In [30]:
print(result)

{'faithfulness': 0.6400, 'answer_relevancy': 0.9681, 'context_precision': 0.7000, 'context_recall': 0.6000, 'harmfulness': 0.0000}


In [31]:
result.to_pandas()

Unnamed: 0,question,contexts,answer,ground_truth,faithfulness,answer_relevancy,context_precision,context_recall,harmfulness
0,What is included in the insurance coverage for...,[Leistungen BASIS-Tarif OPTIMAL-Tarif\nEntschä...,Verschleiss (wear and tear) is covered in both...,The answer to given question is not present in...,1.0,0.89781,0.0,0.0,0
1,"What services are included in the ""Telefonisch...",[= versichert\nDiese Leistungsübersicht ist le...,"The services included in the ""Telefonische Ers...",Telefonische Erstberatung is included in the s...,0.2,0.999348,1.0,1.0,0
2,What determines the bike insurance premium?,[Leistungen BASIS-Tarif OPTIMAL-Tarif\nEntschä...,The chosen tariff and the value of the bike de...,The height of the insurance premium depends on...,1.0,1.0,1.0,1.0,0
3,What are the benefits of WGV's new bicycle ins...,[= versichert – = nicht versichert\nDiese Leis...,The benefits of WGV's new bicycle insurance ta...,The benefits of WGV's new bicycle insurance ta...,0.0,0.97917,0.5,1.0,0
4,What is the coverage area of my WGV insurance ...,[Die Leistungsbeschreibungen in diesem Prospek...,The coverage areas of your WGV insurance polic...,"Weltweite Geltung, zeitlich unbegrenzt bis 12 ...",1.0,0.964,1.0,0.0,0
