In [1]:
pip list | findstr "llama-index"

llama-index                       0.9.48
Note: you may need to restart the kernel to use updated packages.


In [40]:
import nest_asyncio

In [42]:
nest_asyncio.apply()

from llama_index.evaluation import generate_question_context_pairs
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index.node_parser import SimpleNodeParser
from llama_index.evaluation import generate_question_context_pairs
from llama_index.evaluation import RetrieverEvaluator
from llama_index.llms import OpenAI

import os
import pandas as pd


In [38]:
### Set Your OpenAI API Key
os.environ['OPENAI_API_KEY'] = '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'


In [44]:
import urllib.request

# Create folder if it doesn't exist
os.makedirs("data/paul_graham", exist_ok=True)

# Download the text file
url = "https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt"
save_path = "data/paul_graham/paul_graham_essay.txt"
urllib.request.urlretrieve(url, save_path)

# Load data
from llama_index import SimpleDirectoryReader
documents = SimpleDirectoryReader("data/paul_graham").load_data()

print(f"Loaded {len(documents)} documents successfully!")

Loaded 1 documents successfully!


In [46]:
# Define an LLM
llm = OpenAI(model="gpt-3.5-turbo", temperature=0)  
# Build index with a chunk_size of 512
node_parser = SimpleNodeParser.from_defaults(chunk_size=512)
nodes = node_parser.get_nodes_from_documents(documents)
vector_index = VectorStoreIndex(nodes)

## Build a QueryEngine and start querying.

query_engine = vector_index.as_query_engine()
response_vector = query_engine.query("What did the author do growing up?")


## Check response.
response_vector.response


'The author, growing up, worked on writing short stories and programming on an IBM 1401 computer in 9th grade using an early version of Fortran.'

In [48]:

# By default it retrieves two similar nodes/ chunks. You can modify that in vector_index.as_query_engine(similarity_top_k=k).
# First retrieved node
response_vector.source_nodes[0].get_text()

# Second retrieved node
response_vector.source_nodes[1].get_text()




"It felt like I was doing life right. I remember that because I was slightly dismayed at how novel it felt. The good news is that I had more moments like this over the next few years.\n\nIn the summer of 2016 we moved to England. We wanted our kids to see what it was like living in another country, and since I was a British citizen by birth, that seemed the obvious choice. We only meant to stay for a year, but we liked it so much that we still live there. So most of Bel was written in England.\n\nIn the fall of 2019, Bel was finally finished. Like McCarthy's original Lisp, it's a spec rather than an implementation, although like McCarthy's Lisp it's a spec expressed as code.\n\nNow that I could write essays again, I wrote a bunch about topics I'd had stacked up. I kept writing essays through 2020, but I also started to think about other things I could work on. How should I choose what to do? Well, how had I chosen what to work on in the past? I wrote an essay for myself to answer that 

In [50]:

## Evaluation

qa_dataset = generate_question_context_pairs(
    nodes,
    llm=llm,
    num_questions_per_chunk=2
)

## Retrieval Evaluation:
# We use Hit Rate and MRR metrics to evaluate our Retriever.
#
# Hit Rate:
#
# Hit rate calculates the fraction of queries where the correct answer is found within the top-k retrieved documents. In simpler terms, it’s about how often our system gets it right within the top few guesses.
#
# Mean Reciprocal Rank (MRR):
#
# For each query, MRR evaluates the system’s accuracy by looking at the rank of the highest-placed relevant document. Specifically, it’s the average of the reciprocals of these ranks across all the queries. So, if the first relevant document is the top result, the reciprocal rank is 1; if it’s second, the reciprocal rank is 1/2, and so on.

retriever = vector_index.as_retriever(similarity_top_k=2)
retriever_evaluator = RetrieverEvaluator.from_metric_names(
    ["mrr", "hit_rate"], retriever=retriever
)
# Evaluate
eval_results = await retriever_evaluator.aevaluate_dataset(qa_dataset)

100%|██████████████████████████████████████████████████████████████████████████████████| 59/59 [00:59<00:00,  1.00s/it]


In [52]:
def display_results(name, eval_results):
    """Display results from evaluate."""

    metric_dicts = []
    for eval_result in eval_results:
        metric_dict = eval_result.metric_vals_dict
        metric_dicts.append(metric_dict)

    full_df = pd.DataFrame(metric_dicts)

    hit_rate = full_df["hit_rate"].mean()
    mrr = full_df["mrr"].mean()

    metric_df = pd.DataFrame(
        {"Retriever Name": [name], "Hit Rate": [hit_rate], "MRR": [mrr]}
    )

    return metric_df

display_results("OpenAI Embedding Retriever", eval_results)

Unnamed: 0,Retriever Name,Hit Rate,MRR
0,OpenAI Embedding Retriever,0.79661,0.673729
