In [28]:
!pip install llama-index



import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.ERROR)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

import nest_asyncio
nest_asyncio.apply()




In [29]:
import os
from llama_index.core import Settings
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding

# Configure Ollama LLM
ollama_llm = Ollama(
    #model="llama3.2:latest",
    model="mistral:7b",
    base_url="http://localhost:11434",
    temperature=0.1
)

# Configure embedding model
ollama_embedding = OllamaEmbedding(
    model_name="nomic-embed-text:latest",
    base_url="http://localhost:11434",
    ollama_additional_kwargs={"mirostat": 0}
)

Settings.llm = ollama_llm
Settings.embed_model = ollama_embedding

In [30]:
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    Response,
)
from llama_index.llms.openai import OpenAI
from llama_index.core.evaluation import FaithfulnessEvaluator
from llama_index.core.node_parser import SentenceSplitter
import pandas as pd

pd.set_option("display.max_colwidth", 0)

In [31]:
faithful_evaluator = FaithfulnessEvaluator(llm=ollama_llm)

In [32]:
documents = SimpleDirectoryReader("./test_wiki_data/").load_data()

In [33]:
# create vector index
splitter = SentenceSplitter(chunk_size=512)
vector_index = VectorStoreIndex.from_documents(
    documents, transformations=[splitter]
)

In [34]:
from llama_index.core.evaluation import EvaluationResult


# define jupyter display function
def display_eval_df(response: Response, eval_result: EvaluationResult) -> None:
    if response.source_nodes == []:
        print("no response!")
        return
    eval_df = pd.DataFrame(
        {
            "Response": str(response),
            "Source": response.source_nodes[0].node.text[:1000] + "...",
            "Evaluation Result": "Pass" if eval_result.passing else "Fail",
            "Reasoning": eval_result.feedback,
        },
        index=[0],
    )
    eval_df = eval_df.style.set_properties(
        **{
            "inline-size": "600px",
            "overflow-wrap": "break-word",
        },
        subset=["Response", "Source"]
    )
    display(eval_df)

In [35]:
query_engine = vector_index.as_query_engine()
response_vector = query_engine.query("How did New York City get its name?")
eval_result = faithful_evaluator.evaluate_response(response=response_vector)

In [36]:
display_eval_df(response_vector, eval_result)

Unnamed: 0,Response,Source,Evaluation Result,Reasoning
0,"New York City was named after King Charles II of England, who granted the lands to his brother, the Duke of York. The city was originally called New Amsterdam by the Dutch colonists, but it was renamed New York in 1664 when it came under British control.","The settlement was named New Amsterdam (Dutch: Nieuw Amsterdam) in 1626 and was chartered as a city in 1653. The city came under British control in 1664 and was renamed New York after King Charles II of England granted the lands to his brother, the Duke of York. The city was regained by the Dutch in July 1673 and was renamed New Orange for one year and three months; the city has been continuously named New York since November 1674. New York City was the capital of the United States from 1785 until 1790, and has been the largest U.S. city since 1790. The Statue of Liberty greeted millions of immigrants as they came to the U.S. by ship in the late 19th and early 20th centuries, and is a symbol of the U.S. and its ideals of liberty and peace. In the 21st century, New York City has emerged as a global node of creativity, entrepreneurship, and as a symbol of freedom and cultural diversity. The New York Times has won the most Pulitzer Prizes for journalism and remains the U.S. media's ""newsp...",Pass,YES


In [37]:
from llama_index.core.evaluation import DatasetGenerator

question_generator = DatasetGenerator.from_documents(documents)
eval_questions = question_generator.generate_questions_from_nodes(5)

eval_questions

  return cls(
  return QueryResponseDataset(queries=queries, responses=responses_dict)


['What is the most populous city in the United States as of 2020?',
 '2. What is the total area (in square miles) of New York City?',
 '3. How many times more populous is New York City than Los Angeles, the second-largest city in the U.S.?',
 '4. In which state is New York City located?',
 '5. Which megalopolis and metropolitan area does New York City constitute the geographical and demographic center of?']

In [52]:
import asyncio


def evaluate_query_engine(query_engine, questions):
    c = [query_engine.aquery(q) for q in questions]
    results = asyncio.run(asyncio.gather(*c))
    print("finished query")

    total_correct = 0
    
    # Iterate through questions and responses together
    for question, response in zip(questions, results):
        # Evaluate the response
        eval_result = (
            1 if faithful_evaluator.evaluate_response(response=response).passing else 0
        )
        total_correct += eval_result

        # Print question, response, and evaluation result
        print(f"Question: {question}")
        print(f"Response: {response}")
        print(f"Result: {'Pass' if eval_result == 1 else 'Fail'}")
        print("=" * 50)


    return total_correct, len(results)

In [53]:
vector_query_engine = vector_index.as_query_engine()
correct, total = evaluate_query_engine(vector_query_engine, eval_questions[:5])

print(f"score: {correct}/{total}")

finished query
Question: What is the most populous city in the United States as of 2020?
Response:  New York City is the most populous city in the United States as of 2020.
Result: Pass
Question: 2. What is the total area (in square miles) of New York City?
Response: 302.643 square miles (783.84 km2)
Result: Pass
Question: 3. How many times more populous is New York City than Los Angeles, the second-largest city in the U.S.?
Response: 2 times as populous.
Result: Fail
Question: 4. In which state is New York City located?
Response:  The city of New York is located in the state of New York.
Result: Pass
Question: 5. Which megalopolis and metropolitan area does New York City constitute the geographical and demographic center of?
Response:  The Northeast megalopolis.
Result: Pass
score: 4/5
