In [1]:
from dotenv import load_dotenv

load_dotenv('../../../.env')

True

# BaseLine Evaluation

In [4]:
from llama_index.core.node_parser import SimpleNodeParser
from llama_index.core import SimpleDirectoryReader

# First we create Document LlamaIndex objects from the text data
documents = SimpleDirectoryReader("./data/").load_data()
node_parser = SimpleNodeParser.from_defaults(chunk_size=512)
nodes = node_parser.get_nodes_from_documents(documents)

# By default, the node/chunks ids are set to random uuids. To ensure same id's per run, we manually set them.
for idx, node in enumerate(nodes):
    node.id_ = f"node_{idx}"

print(f"Number of Documents: {len(documents)}")
print(f"Number of nodes: {len(nodes)} with the current chunk size of {node_parser.chunk_size}")

Number of Documents: 1
Number of nodes: 57 with the current chunk size of 512


In [13]:
from langchain import HuggingFaceHub
from llama_index.core import set_global_tokenizer
from transformers import AutoTokenizer

llm = HuggingFaceHub(
    repo_id='mistralai/Mistral-7B-Instruct-v0.2',
    model_kwargs={'temperature':0.5,"max_length": 64,"max_new_tokens":512}
)

set_global_tokenizer(
    AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2").encode
)

# Vector index

In [14]:
from llama_index.core import VectorStoreIndex, ServiceContext, StorageContext
from llama_index.vector_stores.deeplake import DeepLakeVectorStore


# Create a local Deep Lake VectorStore
dataset_path = "./data/paul_graham/deep_lake_db"
vector_store = DeepLakeVectorStore(dataset_path=dataset_path, overwrite=True)

# # LLM that will answer questions with the retrieved context
# llm = OpenAI(model="gpt-3.5-turbo-1106")
# # We use OpenAI's embedding model "text-embedding-ada-002"
# embed_model = OpenAIEmbedding()

service_context = ServiceContext.from_defaults(embed_model='local', llm=llm,)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

vector_index = VectorStoreIndex(nodes, service_context=service_context, storage_context=storage_context, show_progress=True)

  service_context = ServiceContext.from_defaults(embed_model='local', llm=llm,)


Generating embeddings:   0%|          | 0/57 [00:00<?, ?it/s]

Uploading data to deeplake dataset.


100%|██████████| 57/57 [00:00<00:00, 292.31it/s]

Dataset(path='./data/paul_graham/deep_lake_db', tensors=['text', 'metadata', 'embedding', 'id'])

  tensor      htype      shape     dtype  compression
  -------    -------    -------   -------  ------- 
   text       text      (57, 1)     str     None   
 metadata     json      (57, 1)     str     None   
 embedding  embedding  (57, 384)  float32   None   
    id        text      (57, 1)     str     None   





In [15]:
query_engine = vector_index.as_query_engine(similarity_top_k=10)
response_vector = query_engine.query("What are the main things Paul worked on before college?")
print(response_vector.response)

ValueError: Calculated available context size -309 was not non-negative.

In [16]:
from llama_index.evaluation import generate_question_context_pairs
qc_dataset = generate_question_context_pairs(
    nodes,
    llm=llm,
    num_questions_per_chunk=1
)
# We can save the dataset as a json file for later use.
qc_dataset.save_json("qc_dataset.json")

ModuleNotFoundError: No module named 'llama_index.evaluation'

In [None]:
from llama_index.finetuning.embeddings.common import (
    EmbeddingQAFinetuneDataset,
)
qc_dataset = EmbeddingQAFinetuneDataset.from_json(
    "qc_dataset.json"
)

In [None]:
import pandas as pd

def display_results_retriever(name, eval_results):
    """Display results from evaluate."""

    metric_dicts = []
    for eval_result in eval_results:
        metric_dict = eval_result.metric_vals_dict
        metric_dicts.append(metric_dict)

    full_df = pd.DataFrame(metric_dicts)

    hit_rate = full_df["hit_rate"].mean()
    mrr = full_df["mrr"].mean()

    metric_df = pd.DataFrame(
        {"Retriever Name": [name], "Hit Rate": [hit_rate], "MRR": [mrr]}
    )

    return metric_df

In [None]:
from llama_index.evaluation import RetrieverEvaluator

# We can evaluate the retievers with different top_k values.
for i in [2, 4, 6, 8, 10]:
    retriever = vector_index.as_retriever(similarity_top_k=i)
    retriever_evaluator = RetrieverEvaluator.from_metric_names(
        ["mrr", "hit_rate"], retriever=retriever
    )
    eval_results = await retriever_evaluator.aevaluate_dataset(qc_dataset)
    print(display_results_retriever(f"Retriever top_{i}", eval_results))

In [None]:
from llama_index.evaluation import RelevancyEvaluator, FaithfulnessEvaluator, BatchEvalRunner

for i in [2, 4, 6, 8, 10]:   
    # Set Faithfulness and Relevancy evaluators
    query_engine = vector_index.as_query_engine(similarity_top_k=i)

    # While we use GPT3.5-Turbo to answer questions
    # we can use GPT4 to evaluate the answers.
    llm_gpt4 = OpenAI(temperature=0, model="gpt-4-1106-preview")
    service_context_gpt4 = ServiceContext.from_defaults(llm=llm_gpt4)

    faithfulness_evaluator = FaithfulnessEvaluator(service_context=service_context_gpt4)
    relevancy_evaluator = RelevancyEvaluator(service_context=service_context_gpt4)

    # Run evaluation
    queries = list(qc_dataset.queries.values())
    batch_eval_queries = queries[:20]

    runner = BatchEvalRunner(
    {"faithfulness": faithfulness_evaluator, "relevancy": relevancy_evaluator},
    workers=8,
    )
    eval_results = await runner.aevaluate_queries(
        query_engine, queries=batch_eval_queries
    )
    faithfulness_score = sum(result.passing for result in eval_results['faithfulness']) / len(eval_results['faithfulness'])
    print(f"top_{i} faithfulness_score: {faithfulness_score}")

    relevancy_score = sum(result.passing for result in eval_results['faithfulness']) / len(eval_results['relevancy'])
    print(f"top_{i} relevancy_score: {relevancy_score}")

# Changing the embedding model

In [None]:
import os
from llama_index import VectorStoreIndex, ServiceContext, StorageContext
from llama_index.vector_stores import DeepLakeVectorStore
from llama_index.embeddings.cohereai import CohereEmbedding
from llama_index.llms import OpenAI

# Create another local DeepLakeVectorStore to store the embeddings
dataset_path = "./data/paul_graham/deep_lake_db_1"
vector_store = DeepLakeVectorStore(dataset_path=dataset_path, overwrite=False)

llm = OpenAI(model="gpt-3.5-turbo-1106")
embed_model = CohereEmbedding(
    cohere_api_key=os.getenv('COHERE_API_KEY'),
    model_name="embed-english-v3.0",
    input_type="search_document",
)

service_context = ServiceContext.from_defaults(embed_model=embed_model, llm=llm,)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
vector_index = VectorStoreIndex(nodes, service_context=service_context, storage_context=storage_context, show_progress=True)

In [None]:
from llama_index.evaluation import RetrieverEvaluator

embed_model.input_type = "search_query"
retriever = vector_index.as_retriever(similarity_top_k=10, embed_model=embed_model)

retriever_evaluator = RetrieverEvaluator.from_metric_names(
    ["mrr", "hit_rate"], retriever=retriever
)
eval_results = await retriever_evaluator.aevaluate_dataset(qc_dataset)
print(display_results_retriever(f"Retriever_cohere_embeds", eval_results))

# Incorporating a Reranker

In [None]:
from llama_index.postprocessor.cohere_rerank import CohereRerank
from llama_index.indices.postprocessor import SentenceTransformerRerank, LLMRerank

st_reranker = SentenceTransformerRerank(
    top_n=5, model="cross-encoder/ms-marco-MiniLM-L-6-v2"
)

llm_reranker = LLMRerank(
    choice_batch_size=4, top_n=5,
)
cohere_rerank = CohereRerank(api_key=os.getenv('COHERE_API_KEY'), top_n=10)
for reranker in [cohere_rerank, st_reranker, llm_reranker]:
    retriever_with_reranker = vector_index.as_retriever(similarity_top_k=10, postprocessor=reranker, embed_model=embed_model)

    retriever_evaluator_1 = RetrieverEvaluator.from_metric_names(
        ["mrr", "hit_rate"], retriever=retriever_with_reranker
    )
    eval_results1 = await retriever_evaluator_1.aevaluate_dataset(qc_dataset)
    print(display_results_retriever("Retriever with added Reranker", eval_results1))

# Employing Deep Memory

In [None]:
def create_query_relevance(qa_dataset):
    """Function for converting LlamaIndex dataset to correct format for deep memory training"""
    queries = [text for _, text in qa_dataset.queries.items()]
    relevant_docs = qa_dataset.relevant_docs
    relevance = []
    for doc in relevant_docs:
        relevance.append([(relevant_docs[doc][0], 1)])
    return queries, relevance

train_queries, train_relevance = create_query_relevance(qc_dataset)
print(len(train_queries))

In [None]:
import deeplake
local = "./data/paul_graham/deep_lake_db"
hub_path = "hub://genai360/optimization_paul_graham"
hub_managed_path = "hub://genai360/optimization_paul_graham_managed"

# First upload our local vector store
deeplake.deepcopy(local, hub_path, overwrite=True)
# Create a managed vector store
deeplake.deepcopy(hub_path, hub_managed_path, overwrite=True, runtime={"tensor_db": True})

In [None]:
import os
from llama_index import VectorStoreIndex, ServiceContext, StorageContext
from llama_index.vector_stores import DeepLakeVectorStore
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms import OpenAI

vector_store = DeepLakeVectorStore(dataset_path=hub_managed_path, overwrite=False, runtime={"tensor_db": True}, read_only=True)
llm = OpenAI(model="gpt-3.5-turbo-1106")
embed_model = OpenAIEmbedding()

service_context = ServiceContext.from_defaults(embed_model=embed_model, llm=llm,)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

vector_index = VectorStoreIndex.from_vector_store(vector_store,service_context=service_context, storage_context=storage_context, use_async=False, show_progress=True)

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

job_id = vector_store.vectorstore.deep_memory.train(
    queries=train_queries,
    relevance=train_relevance,
    embedding_function=embeddings.embed_documents,
)

In [None]:
vector_store.vectorstore.deep_memory.status('652dceeed7d1579bf6abf3df')

In [None]:
from llama_index.evaluation import generate_question_context_pairs
# Generate test dataset
test_dataset = generate_question_context_pairs(
    nodes[:20],
    llm=llm,
    num_questions_per_chunk=1
)
test_dataset.save_json("test_dataset.json")

# We can also load the dataset from a json file if already done previously.
from llama_index.finetuning.embeddings.common import (
    EmbeddingQAFinetuneDataset,
)
test_dataset = EmbeddingQAFinetuneDataset.from_json(
    "test_dataset.json"
)

test_queries, test_relevance = create_query_relevance(test_dataset)

In [None]:
# Evaluate recall on the generated test dataset
recalls = vector_store.vectorstore.deep_memory.evaluate(
    queries=test_queries,
    relevance=test_relevance,
    embedding_function=embeddings.embed_documents,
)

In [None]:
import os
from llama_index.postprocessor.cohere_rerank import CohereRerank
from llama_index.evaluation import (
    RetrieverEvaluator,
)

base_retriever = vector_index.as_retriever(similarity_top_k=10)
deep_memory_retriever = vector_index.as_retriever(
similarity_top_k=10, vector_store_kwargs={"deep_memory": True}
)

base_retriever_evaluator = RetrieverEvaluator.from_metric_names(
    ["mrr", "hit_rate"], retriever=base_retriever
)
eval_results = await base_retriever_evaluator.aevaluate_dataset(test_dataset)
print(display_results_retriever("Retriever Results", eval_results))

In [None]:
deep_memory_retriever = vector_index.as_retriever(
similarity_top_k=10, vector_store_kwargs={"deep_memory": True}
)

dm_retriever_evaluator = RetrieverEvaluator.from_metric_names(
    ["mrr", "hit_rate"], retriever=deep_memory_retriever
)
dm_eval_results = await dm_retriever_evaluator.aevaluate_dataset(test_dataset)
print(display_results_retriever("Retriever Results", dm_eval_results))