In [40]:
!pip install llama-index
%pip install llama-index-readers-file



import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.ERROR)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

import nest_asyncio
nest_asyncio.apply()





In [41]:
import os
from llama_index.core import Settings
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding

# Configure Ollama LLM
ollama_llm = Ollama(
    #model="llama3.2:latest",
    model="mistral:7b",
    base_url="http://localhost:11434",
    temperature=0.1
)

# Configure embedding model
ollama_embedding = OllamaEmbedding(
    model_name="nomic-embed-text:latest",
    base_url="http://localhost:11434",
    ollama_additional_kwargs={"mirostat": 0}
)

Settings.llm = ollama_llm
Settings.embed_model = ollama_embedding

In [44]:
from llama_index.core import VectorStoreIndex
from llama_index.core.node_parser import SentenceSplitter
import pandas as pd

pd.set_option("display.max_colwidth", 0)

In [45]:
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader(input_files=['../data/paul_graham_essay3.txt']).load_data()
# documents = SimpleDirectoryReader(input_files=['../data/2022 Q3 AAPL.pdf']).load_data()


In [46]:
node_parser = SentenceSplitter(chunk_size=512)
nodes = node_parser.get_nodes_from_documents(documents)

In [47]:
# by default, the node ids are set to random uuids. To ensure same id's per run, we manually set them.
for idx, node in enumerate(nodes):
    node.id_ = f"node_{idx}"

In [48]:
vector_index = VectorStoreIndex(nodes)
retriever = vector_index.as_retriever(similarity_top_k=2)

In [49]:
retrieved_nodes = retriever.retrieve("What did the author do growing up?")

In [50]:
from llama_index.core.response.notebook_utils import display_source_node

for node in retrieved_nodes:
    display_source_node(node, source_length=1000)

**Node ID:** node_33<br>**Similarity:** 0.6204804937459925<br>**Text:** I certainly did. So at the end of the summer Dan and I switched to working on this new dialect of Lisp, which I called Arc, in a house I bought in Cambridge.

The following spring, lightning struck. I was invited to give a talk at a Lisp conference, so I gave one about how we'd used Lisp at Viaweb. Afterward I put a postscript file of this talk online, on paulgraham.com, which I'd created years before using Viaweb but had never used for anything. In one day it got 30,000 page views. What on earth had happened? The referring urls showed that someone had posted it on Slashdot. [10]

Wow, I thought, there's an audience. If I write something and put it on the web, anyone can read it. That may seem obvious now, but it was surprising then. In the print era there was a narrow channel to readers, guarded by fierce monsters known as editors. The only way to get an audience for anything you wrote was to get it published as a book, or in a newspaper or magazine. Now anyone could publish anythi...<br>

**Node ID:** node_0<br>**Similarity:** 0.6177200811881809<br>**Text:** What I Worked On

February 2021

Before college the two main things I worked on, outside of school, were writing and programming. I didn't write essays. I wrote what beginning writers were supposed to write then, and probably still are: short stories. My stories were awful. They had hardly any plot, just characters with strong feelings, which I imagined made them deep.

The first programs I tried writing were on the IBM 1401 that our school district used for what was then called "data processing." This was in 9th grade, so I was 13 or 14. The school district's 1401 happened to be in the basement of our junior high school, and my friend Rich Draves and I got permission to use it. It was like a mini Bond villain's lair down there, with all these alien-looking machines — CPU, disk drives, printer, card reader — sitting up on a raised floor under bright fluorescent lights.

The language we used was an early version of Fortran. You had to type programs on punch cards, then stack them in ...<br>

In [51]:
from llama_index.core.evaluation import (
    generate_question_context_pairs,
    EmbeddingQAFinetuneDataset,
)

In [52]:
qa_dataset = generate_question_context_pairs(
    nodes, llm=ollama_llm, num_questions_per_chunk=2
)

100%|██████████| 57/57 [01:55<00:00,  2.02s/it]


In [53]:
queries = qa_dataset.queries.values()
print(list(queries)[2])

Question 1 (Historical Context): In what year did the speaker purchase his first microcomputer?


In [54]:
import pandas as pd

# Access the `queries` attribute
queries = qa_dataset.queries

# Convert the queries dictionary to a DataFrame
queries_df = pd.DataFrame(list(queries.items()), columns=["Query ID", "Question"])

# Display the DataFrame
print(queries_df)


                                 Query ID  \
0    5abc3be7-bae0-442c-ad25-37de4cf273ed   
1    ce0a59b0-ac87-4bd5-b008-6dde61907ebf   
2    8557b956-9e44-4569-82eb-3dc54c024b7b   
3    e963d882-2d72-4d78-b2ba-c950e92674b0   
4    076eb60d-05a3-4022-b0be-52c696fbd25a   
..                                    ...   
109  d339f1b4-0437-4d36-b6ce-b454d1d05368   
110  dadbd5aa-731e-4458-a5bb-d6929aa34fec   
111  c4d40f9f-2d2d-4b0a-a238-5addba3d023f   
112  551f00ea-6add-453a-bf2a-06803de09b1c   
113  0809f39f-e732-4565-8960-56cfe535dd3b   

                                                                                                                                                                                                                                                                                                                                                                   Question  
0    Question 1 (Historical Context): In what year did the narrator first start programming? 

In [55]:
# [optional] save
qa_dataset.save_json("pg_eval_dataset.json")

In [56]:
# [optional] load
qa_dataset = EmbeddingQAFinetuneDataset.from_json("pg_eval_dataset.json")

In [29]:
import getpass
import os

os.environ["COHERE_API_KEY"] = getpass.getpass("cohere api key: ")

In [57]:
include_cohere_rerank = False
# include_cohere_rerank = True

if include_cohere_rerank:
    !pip install cohere -q

In [58]:
from llama_index.core.evaluation import RetrieverEvaluator

metrics = ["hit_rate", "mrr", "precision", "recall", "ap", "ndcg"]

if include_cohere_rerank:
    metrics.append(
        "cohere_rerank_relevancy"  # requires COHERE_API_KEY environment variable to be set
    )

retriever_evaluator = RetrieverEvaluator.from_metric_names(
    metrics, retriever=retriever
)

In [59]:
# try it out on a sample query
sample_id, sample_query = list(qa_dataset.queries.items())[0]
sample_expected = qa_dataset.relevant_docs[sample_id]

eval_result = retriever_evaluator.evaluate(sample_query, sample_expected)
print(eval_result)

Query: Question 1 (Historical Context): In what year did the narrator first start programming? Explain the environment and machine he used during that time.
Metrics: {'hit_rate': 0.0, 'mrr': 0.0, 'precision': 0.0, 'recall': 0.0, 'ap': 0.0, 'ndcg': 0.0}



In [60]:
# try it out on an entire dataset
eval_results = await retriever_evaluator.aevaluate_dataset(qa_dataset)

In [61]:
import pandas as pd


def display_results(name, eval_results):
    """Display results from evaluate."""

    metric_dicts = []
    for eval_result in eval_results:
        metric_dict = eval_result.metric_vals_dict
        metric_dicts.append(metric_dict)

    full_df = pd.DataFrame(metric_dicts)

    columns = {
        "retrievers": [name],
        **{k: [full_df[k].mean()] for k in metrics},
    }

    if include_cohere_rerank:
        crr_relevancy = full_df["cohere_rerank_relevancy"].mean()
        columns.update({"cohere_rerank_relevancy": [crr_relevancy]})

    metric_df = pd.DataFrame(columns)

    return metric_df

In [62]:
display_results("top-2 eval", eval_results)

Unnamed: 0,retrievers,hit_rate,mrr,precision,recall,ap,ndcg
0,top-2 eval,0.552632,0.469298,0.276316,0.552632,0.469298,0.49112
