In [None]:
!pip install llama-index-core
!pip install llama-index-llms-ollama
!pip install llama-index-embeddings-huggingface
!pip install pandas
!pip install matplotlib

In [None]:
!curl https://ollama.ai/install.sh | sed 's#https://ollama.ai/download#https://github.com/jmorganca/ollama/releases/download/v0.1.20#' | sh
!sudo apt install -y neofetch

In [None]:
!neofetch

In [None]:
OLLAMA_MODEL='mistral'

# Set it at the OS level
import os
os.environ['OLLAMA_MODEL'] = OLLAMA_MODEL
!echo $OLLAMA_MODEL

In [None]:
import subprocess
import time

# Start ollama as a background process
command = "nohup ollama serve&"

# Use subprocess.Popen to start the process in the background
process = subprocess.Popen(command,
                            shell=True,
                           stdout=subprocess.PIPE,
                           stderr=subprocess.PIPE)
print("Process ID:", process.pid)
# Let's use fly.io resources
#!OLLAMA_HOST=https://ollama-demo.fly.dev:443
time.sleep(5)  # Makes Python wait for 5 seconds

In [None]:
!ollama -v

In [None]:
!ollama run $OLLAMA_MODEL "Translate the following emoji sentence to a meaningful text : 💙🤓💻📱🔧🌀💡"

In [None]:
from llama_index.core import (
    Settings,
    VectorStoreIndex,
    Document,
)

from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.ollama import Ollama
from llama_index.core.response.notebook_utils import display_source_node
from llama_index.core.evaluation import RetrieverEvaluator

from llama_index.core.evaluation import (
    EmbeddingQAFinetuneDataset,
)

import matplotlib.pyplot as plt
import pandas as pd
import json
import os

In [None]:
Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)
Settings.llm = Ollama(
    model="mistral", request_timeout=60.0
)

In [None]:
df = pd.read_csv('benchmark.csv')
df = df.drop(df[df['E'] == 'bad'].index)
df.head()

In [None]:
for i in set(df['T']):
    print(i)

# Each Row as a Document

In [None]:
documents = []

eval_dataset = {
    'queries': dict(),
    'corpus': dict(),
    'relevant_docs': dict()
}

for idx in df.index:
    table = df['T'][idx]
    question = df['Q'][idx]
    answer = df['A'][idx]
    document = Document(
            text=answer,
            metadata={
                'table': table
            },
            doc_id=f"doc_{idx}",
        )
    documents.append(document)
    
    eval_dataset['queries'].update(
        {f"query_{idx}": 
             f"For {table}, {question}"}
    )
    eval_dataset['corpus'].update(
        {f"doc_{idx}": answer}
    )
    eval_dataset['relevant_docs'].update(
        {f"query_{idx}": [f"doc_{idx}"]}
    )

In [None]:
len(documents)

In [None]:
documents[0]

In [None]:
print(eval_dataset['queries']['query_0'])
print()
print(eval_dataset['corpus']['doc_0'])
print()
print(eval_dataset['relevant_docs']['query_0'])

In [None]:
vector_index = VectorStoreIndex(documents)
print("Index created")

In [None]:
query = """For what purpose was the 
TrainsUK1 dataset created? Was there a specific task in mind? 
Was there a specific gap that needed to be filled? Please provide a description."""

In [None]:
retriever = vector_index.as_retriever(similarity_top_k=4)
retrieved_data = retriever.retrieve(query)
for data in retrieved_data:
    display_source_node(data, source_length=1000)

In [None]:
query = """Which dataset was designed to analyze the punctuality and performance?
Mention the name of the dataset."""

In [None]:
query_engine = vector_index.as_query_engine()
query_response = query_engine.query(query)
print("Response:\n", query_response)

# Retriever top-K Evaluation

In [None]:
import nest_asyncio

nest_asyncio.apply()

In [None]:
# The free API is limited to 10 calls per minute
include_cohere_rerank = False

if include_cohere_rerank:
    !pip install cohere -q

In [None]:
metrics = ["mrr", "hit_rate"]
retriever_names = []
evaluators = []

if include_cohere_rerank:
    metrics.append(
        "cohere_rerank_relevancy"  # requires COHERE_API_KEY environment variable to be set
    )

for k in range(1, 6, 2):
    vector_retriever = vector_index.as_retriever(similarity_top_k=k)
    vector_evaluator = RetrieverEvaluator.from_metric_names(
        metrics, retriever=vector_retriever
    )
    retriever_names.append(f"Vector Index Top{k}")
    evaluators.append(vector_evaluator)

In [None]:
sample_id, sample_query = list(qa_dataset.queries.items())[0]
sample_expected = qa_dataset.relevant_docs[sample_id]

print(sample_id)
print(sample_query)
print(sample_expected)

eval_result = evaluators[2].evaluate(sample_query, sample_expected)
print(eval_result)

In [None]:
def display_results(names, eval_results):
    """Display results from evaluate."""
    
    hit_rates = []
    mrrs = []
    if include_cohere_rerank:
        crr_relevancys = []
    
    for eval_result in eval_results:
        metric_dicts = []
        for result in eval_result:
            metric_dict = result.metric_vals_dict
            metric_dicts.append(metric_dict)

        full_df = pd.DataFrame(metric_dicts)

        hit_rates.append(full_df["hit_rate"].mean())
        mrrs.append(full_df["mrr"].mean())
        if include_cohere_rerank:
            crr_relevancys.append(full_df["cohere_rerank_relevancy"].mean())

        
    columns = {"retriever": names, "hit_rate": hit_rates, "mrr": mrrs}
    if include_cohere_rerank:
        columns.update({"cohere_rerank_relevancy": crr_relevancys})
    metric_df = pd.DataFrame(columns)

    return metric_df

In [None]:
eval_results = []

start_time = time.time()
for evaluator in evaluators:
    eval_result = await evaluator.aevaluate_dataset(qa_dataset, show_progress=True)
    eval_results.append(eval_result)
end_time = time.time()

In [None]:
print("Time taken for evaluation:", end_time - start_time)

In [None]:
eval_results[0][0]

In [None]:
metric_df = display_results(retriever_names, eval_results)
metric_df

In [None]:
metric_df.loc[:, ['hit_rate', 'mrr']].plot.line()
plt.show()