### Install dependencies

In [None]:
!CUDACXX=/usr/local/cuda-12.2/bin/nvcc CMAKE_ARGS="-DLLAMA_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=native" FORCE_CMAKE=1 pip install llama-cpp-python --no-cache-dir --force-reinstall --upgrade

In [None]:
!pip install -q langchain-core langchain langchain-community langchain-chroma langchain-text-splitters langchain-huggingface langchain_milvus jedi==0.17

In [None]:
!pip install -q --force-reinstall numpy==1.26.4 pandas==2.2.2 pymilvus==2.4.6 pymilvus[model]==2.4.6 protobuf==3.20.3 grpcio==1.63.0

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Imports and data loading

In [None]:
import os
import pandas as pd
import numpy as np

from langchain import hub

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

from langchain_community.llms import LlamaCpp
from langchain_community.document_loaders.dataframe import DataFrameLoader
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

from langchain_chroma import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings

from langchain_milvus.retrievers import MilvusCollectionHybridSearchRetriever
from langchain_milvus.utils.sparse import BM25SparseEmbedding

In [None]:
from pymilvus import MilvusClient
client = MilvusClient("./squad_v2.db")

DEBUG:pymilvus.milvus_client.milvus_client:Created new connection using: dec9bafbc3ad4d739fe00142d563651c


In [None]:
context_df = pd.read_json("context.jsonl", lines=True)
qna_df = pd.read_json("squad_v2_dataset.jsonl", lines=True)

In [None]:
context_df.head()

Unnamed: 0,id,context
0,0,The Normans (Norman: Nourmands; French: Norman...
1,1,"The Norman dynasty had a major political, cult..."
2,2,"The English name ""Normans"" comes from the Fren..."
3,3,"In the course of the 10th century, the initial..."
4,4,"Before Rollo's arrival, its populations did no..."


In [None]:
print(qna_df.loc[0, "question"])
print(qna_df.loc[0, "answers"])

In what country is Normandy located?
{'text': ['France', 'France', 'France', 'France'], 'answer_start': [159, 159, 159, 159]}


In [None]:
qna_df["ans_text"] = qna_df["answers"].str.extract(r"'text': (\[.*?\])")[0]
qna_df["ans_text"] = qna_df["ans_text"].apply(lambda x: [m.strip() for m in x[1:-1].split(",")])
qna_df.head()

In what country is Normandy located?
{'text': ['France', 'France', 'France', 'France'], 'answer_start': [159, 159, 159, 159]}


Unnamed: 0,question,answers,context_id,ans_text
0,In what country is Normandy located?,"{'text': ['France', 'France', 'France', 'Franc...",0,"['France', 'France', 'France', 'France']"
1,When were the Normans in Normandy?,"{'text': ['10th and 11th centuries', 'in the 1...",0,"['10th and 11th centuries', 'in the 10th and 1..."
2,From which countries did the Norse originate?,"{'text': ['Denmark, Iceland and Norway', 'Denm...",0,"['Denmark, Iceland and Norway', 'Denmark, Icel..."
3,Who was the Norse leader?,"{'text': ['Rollo', 'Rollo', 'Rollo', 'Rollo'],...",0,"['Rollo', 'Rollo', 'Rollo', 'Rollo']"
4,What century did the Normans first gain their ...,"{'text': ['10th century', 'the first half of t...",0,"['10th century', 'the first half of the 10th c..."


In [None]:
qna_con_df = pd.merge(qna_df, context_df, left_on="context_id", right_on="id")[["question", "ans_text", "context"]]
qna_con_df["context"] = qna_con_df["context"].apply(lambda x: [x])
qna_con_df.head()

Unnamed: 0,question,ans_text,context
0,In what country is Normandy located?,"['France', 'France', 'France', 'France']",[The Normans (Norman: Nourmands; French: Norma...
1,When were the Normans in Normandy?,"['10th and 11th centuries', 'in the 10th and 1...",[The Normans (Norman: Nourmands; French: Norma...
2,From which countries did the Norse originate?,"['Denmark, Iceland and Norway', 'Denmark, Icel...",[The Normans (Norman: Nourmands; French: Norma...
3,Who was the Norse leader?,"['Rollo', 'Rollo', 'Rollo', 'Rollo']",[The Normans (Norman: Nourmands; French: Norma...
4,What century did the Normans first gain their ...,"['10th century', 'the first half of the 10th c...",[The Normans (Norman: Nourmands; French: Norma...


## Setup Vector Index for Hybrid Search --> Milvus

In [None]:
# chunk the contents of the blog
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
# splits = text_splitter.split_documents(docs)

# Load the context column into langchain document loader
loader = DataFrameLoader(context_df, page_content_column="context")
documents = loader.load()

In [None]:
import nltk
nltk.download('punkt')

# Embeddings
embedders = ["BAAI/bge-small-en", "sentence-transformers/all-mpnet-base-v2", ]
# dense_embedding_func = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
dense_embedding_func = HuggingFaceBgeEmbeddings(
    model_name = "BAAI/bge-small-en",
    model_kwargs = {"device": "cpu"},
    encode_kwargs = {"normalize_embeddings": True}
)
sparse_embedding_func = BM25SparseEmbedding(corpus=context_df["context"].tolist())

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
  from tqdm.autonotebook import tqdm, trange


In [None]:
dense_embd_dim = dense_embedding_func.dict()['client'].get_sentence_embedding_dimension()
dense_embd_dim

384

In [None]:
from pymilvus import (
    Collection,
    CollectionSchema,
    DataType,
    FieldSchema,
    WeightedRanker,
    RRFRanker,
    connections,
)

# Define field names and their data types
pk_field = "context_id"
dense_field = "dense_vector"
sparse_field = "sparse_vector"
text_field = "text"
fields = [
    FieldSchema(
        name=pk_field,
        dtype=DataType.INT64,
        is_primary=True,
        auto_id=False,
        max_length=100,
    ),
    FieldSchema(name=dense_field, dtype=DataType.FLOAT_VECTOR, dim=dense_embd_dim),
    FieldSchema(name=sparse_field, dtype=DataType.SPARSE_FLOAT_VECTOR),
    FieldSchema(name=text_field, dtype=DataType.VARCHAR, max_length=65_535),
]

In [None]:
# Create a collection with the defined schema
schema = CollectionSchema(fields=fields, enable_dynamic_field=False)
collection_name = "squad_v2_contexts"

if client.has_collection(collection_name):
    client.drop_collection(collection_name)

client.create_collection(
    collection_name=collection_name, schema=schema, consistency_level="Strong"
)

DEBUG:pymilvus.milvus_client.milvus_client:Successfully created collection: squad_v2_contexts


In [None]:
# Create a index on field - dense_vector for efficient search on dense vector field
dense_index_params = MilvusClient.prepare_index_params()
dense_index_params.add_index(
    field_name=dense_field,
    metric_type="IP",
    index_type="FLAT",
    index_name="dense_vector_index",
)
client.create_index(collection_name = collection_name, index_params = dense_index_params)

# Create a index on field - sparse_vector for efficient search on sparse vector field
sparse_index_params = MilvusClient.prepare_index_params()
sparse_index_params.add_index(
    field_name=sparse_field,
    metric_type="IP",
    index_type="SPARSE_INVERTED_INDEX",
    index_name="sparse_vector_index",
)
client.create_index(collection_name = collection_name, index_params = sparse_index_params)

DEBUG:pymilvus.milvus_client.milvus_client:Successfully created an index on collection: squad_v2_contexts
DEBUG:pymilvus.milvus_client.milvus_client:Successfully created an index on collection: squad_v2_contexts


In [None]:
client.describe_collection(collection_name = collection_name)

{'collection_name': 'squad_v2_contexts',
 'auto_id': False,
 'num_shards': 0,
 'description': '',
 'fields': [{'field_id': 100,
   'name': 'context_id',
   'description': '',
   'type': <DataType.INT64: 5>,
   'params': {},
   'is_primary': True},
  {'field_id': 101,
   'name': 'dense_vector',
   'description': '',
   'type': <DataType.FLOAT_VECTOR: 101>,
   'params': {'dim': 384}},
  {'field_id': 102,
   'name': 'sparse_vector',
   'description': '',
   'type': <DataType.SPARSE_FLOAT_VECTOR: 104>,
   'params': {}},
  {'field_id': 103,
   'name': 'text',
   'description': '',
   'type': <DataType.VARCHAR: 21>,
   'params': {'max_length': 65535}}],
 'aliases': [],
 'collection_id': 0,
 'consistency_level': 0,
 'properties': {},
 'num_partitions': 0,
 'enable_dynamic_field': False}

In [None]:
connections.connect(uri = "./squad_v2.db")
collection = Collection(name=collection_name)

In [None]:
entities = []
for index, row in context_df.iterrows():
    dense_vec = dense_embedding_func.embed_documents([row["context"]])[0]
    sparse_vec = sparse_embedding_func.embed_documents([row["context"]])[0]
    if not sparse_vec:
        print(row["context"])
        print(f"Dense Vector Type: {type(dense_vec)}")
        print(f"Sparse Vector Type: {type(sparse_vec)}")
        print(f"Sparse Vector: {sparse_vec}")
        print("\n")
    entity = {
        pk_field: row["id"],
        dense_field: dense_vec,
        sparse_field: sparse_vec,
        text_field: row["context"],
    }
    entities.append(entity)
collection.insert(entities)
collection.load()


In [None]:
client.get_collection_stats(collection_name = collection_name)

{'row_count': 1204}

## Retriever evaluation

#### Retrieval metrics

In [None]:
# Order unaware binary relevance metrics
def precision_at_k(relevant: list[str], retrieved: list[str], k: int):
    """Computes precision at k.

    Args:
    relevant: A list of actual relevant documents.
    retrieved: A list of predicted relevant documents.
    k: The number of top documents to consider.

    Returns:
    The precision at k.
    """
    if not retrieved:
        return 0.0
    retrieved = retrieved[:k]
    num_correct = len(set(relevant).intersection(set(retrieved)))
    return num_correct / len(retrieved)

def recall_at_k(relevant: list[str], retrieved: list[str], k: int):
    """Computes recall at k.

    Args:
    relevant: A list of actual relevant documents.
    retrieved: A list of predicted relevant documents.
    k: The number of top documents to consider.

    Returns:
    The recall at k.
    """
    if not relevant:
        return 0.0
    retrieved = retrieved[:k]
    num_correct = len(set(relevant).intersection(set(retrieved)))
    return num_correct / len(relevant)

def f1_at_k(relevant: list[str], retrieved: list[str], k: int):
    """Computes F1 score at k.

    Args:
    relevant: A list of actual relevant documents.
    retrieved: A list of predicted relevant documents.
    k: The number of top documents to consider.

    Returns:
    The F1 score at k.
    """
    precision_k = precision_at_k(relevant, retrieved, k)
    recall_k = recall_at_k(relevant, retrieved, k)
    if precision_k + recall_k == 0:
        return 0.0
    return 2 * (precision_k * recall_k) / (precision_k + recall_k)

def hit_rate_at_k(relevant: list[str], retrieved: list[str], k: int):
    """Computes hit rate at k.

    Args:
    retrieved: A list of predicted relevant documents.
    relevant: A list of actual relevant documents.
    k: The number of top documents to consider.

    Returns:
    The hit rate at k.
    """
    retrieved = retrieved[:k]
    return int(any(doc in retrieved for doc in relevant))


# Order aware binary relevance metrics
def reciprocal_rank(relevant: list[str], retrieved:list[str]):
    """Computes reciprocal rank of a query.

    Args:
    relevant: A list of actual relevant documents for some query.
    retrieved: A list of predicted relevant documents for some query.

    Returns:
    The mean reciprocal rank.
    """
    if not relevant:
        return 0.0
    for i, doc in enumerate(retrieved):
        if doc in relevant:
            return 1.0 / (i + 1)
    return 0.0

def mean_reciprocal_rank(relevant: list[list[str]], retrieved: list[list[str]]):
    """Computes mean reciprocal rank.

    Args:
    relevant: A list of actual relevant documents for each query.
    retrieved: A list of predicted relevant documents for each query.

    Returns:
    The mean reciprocal rank.
    """
    num_queries = len(relevant)
    reciprocal_rank_sum = 0.0
    for query_index in range(num_queries):
        query_relevant = relevant[query_index]
        query_retrieved = retrieved[query_index]
        reciprocal_rank_sum += reciprocal_rank(query_relevant, query_retrieved)
    return reciprocal_rank_sum * (1.0 / num_queries)


def average_precision(relevant: list[str], retrieved: list[str]):
    """Computes average precision.

    Args:
    relevant: A list of actual relevant documents.
    retrieved: A list of predicted relevant documents.

    Returns:
    The average precision.
    """
    score = 0.0
    num_hits = 0.0
    for i, doc in enumerate(retrieved):
        if doc in relevant:
            num_hits += 1.0
            score += num_hits / (i + 1)

    return float(score / len(relevant))

def mean_average_precision_at_k(relevant: list[list[str]], retrieved: list[list[str]], k: int):
    """Computes mean average precision.

    Args:
    relevant: A list of actual relevant documents for each query.
    retrieved: A list of predicted relevant documents for each query.

    Returns:
    The mean average precision.
    """
    num_queries = len(relevant)
    average_precision_sum = 0.0
    for query_index in range(num_queries):
        query_relevant = relevant[query_index]
        query_retrieved = retrieved[query_index][:k]  # top k retrieved docs for a query
        average_precision_sum += average_precision(query_relevant, query_retrieved)

    return float(average_precision_sum / num_queries)


# Graded relevance metrics
def ndcg_at_k(relevant: list[list[str]], retrieved: list[list[str]], k: int):
    """Computes Normalized Discounted Cumulative Gain at k.

    Args:
    relevant: A list of actual relevant documents.
    retrieved: A list of predicted relevant documents.
    k: The number of top documents to consider.

    Returns:
    The NDCG at k.
    """
    ndcg_scores = []
    for relevant_docs, retrieved_docs in zip(relevant, retrieved):
        retrieved_docs = retrieved_docs[:k]
        dcg = 0.0
        for i, doc in enumerate(retrieved_docs):
            if doc in relevant_docs:
                dcg += 1.0 / np.log2(i + 2)

        idcg = np.sum([1.0 / np.log2(i + 2) for i in range(min(len(relevant_docs), k))])
        ndcg_scores.append(dcg / idcg if idcg > 0 else 0.0)

    return np.mean(ndcg_scores)

#### Retriever instantiation

In [None]:
# instantiate the retriever, defining search parameters for sparse and dense fields:
sparse_search_params = {"metric_type": "IP"}
dense_search_params = {"metric_type": "IP", "params": {}}

retriever = MilvusCollectionHybridSearchRetriever(
    collection=collection,
    rerank=RRFRanker(),
    anns_fields=[dense_field, sparse_field],
    field_embeddings=[dense_embedding_func, sparse_embedding_func],
    field_search_params=[dense_search_params, sparse_search_params],
    top_k=10,
    text_field=text_field,
)

In [None]:
def format_docs(docs, return_type="str"):
    if return_type == "str":
        return "\n\n".join(doc.page_content for doc in docs)
    if return_type == "list":
        return [doc.page_content for doc in docs]

print(format_docs(retriever.invoke(qna_con_df.loc[0, "question"]), return_type="list"))

['The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ("Norman" comes from "Norseman") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries.']


In [None]:
# queries = qna_con_df["question"].tolist()
# relevant_contexts = qna_con_df["context"].tolist()
# retrieved_contexts = [format_docs(retriever.invoke(query), return_type="list") for query in queries]

queries = []
relevant_docs = []
retrieved_docs = []

for index, row in qna_con_df.iterrows():
    try:
        retrieved_docs.append(format_docs(retriever.invoke(row["question"]), return_type="list"))
        queries.append(row["question"])
        relevant_docs.append(row["context"])
    except Exception as e:
        print("Exception encountered:\n",e)
        print(f"Question: {row['question']}")
        print(f"Context: {row['context']}")
        continue

print(len(queries), len(relevant_docs), len(retrieved_docs))

k=5
mean_precision_at_k = np.mean([precision_at_k(relevant, retrieved, k) for relevant, retrieved in zip(relevant_docs, retrieved_docs)])
mean_recall_at_k = np.mean([recall_at_k(relevant, retrieved, k) for relevant, retrieved in zip(relevant_docs, retrieved_docs)])
mean_f1_at_k = np.mean([f1_at_k(relevant, retrieved, k) for relevant, retrieved in zip(relevant_docs, retrieved_docs)])
mean_hit_rate_at_k = np.mean([hit_rate_at_k(relevant, retrieved, k) for relevant, retrieved in zip(relevant_docs, retrieved_docs)])
mrr = mean_reciprocal_rank(relevant_docs, retrieved_docs)
map_at_k = mean_average_precision_at_k(relevant_docs, retrieved_docs, k)
ndcg_k = ndcg_at_k(relevant_docs, retrieved_docs, k)

print("Precision@k:", mean_precision_at_k)
print("Recall@k:", mean_recall_at_k)
print("F1-Score@k:", mean_f1_at_k)
print("Hit-Rate@k:", mean_hit_rate_at_k)
print("MRR:", mrr)
print("MAP@k:", map_at_k)
print("NDCG@k:", ndcg_k)

#### Retriever testing

In [None]:
# Evaluate the retriever for differen values of top_k

top_k = [1, 3, 5, 10]
eval_dict = {"Metric": ["Precision@k","Recall@k", "F1-Score@k", "Hit-Rate@k", "MRR", "MAP@k", "NDCG@k"]}

sparse_search_params = {"metric_type": "IP"}
dense_search_params = {"metric_type": "IP", "params": {}}


for k in top_k:
    # set the top_k
    retriever = MilvusCollectionHybridSearchRetriever(
        collection=collection,
        rerank=WeightedRanker(0.75, 0.25),
        anns_fields=[dense_field, sparse_field],
        field_embeddings=[dense_embedding_func, sparse_embedding_func],
        field_search_params=[dense_search_params, sparse_search_params],
        top_k=k,
        text_field=text_field,
    )
    # Initialize lists to store results
    queries = []
    relevant_docs = []
    retrieved_docs = []
    # retrieve with current top_k
    for index, row in qna_con_df.iterrows():
        try:
            retrieved_docs.append(format_docs(retriever.invoke(row["question"]), return_type="list"))
            queries.append(row["question"])
            relevant_docs.append(row["context"])
        except Exception as e:
            print(f"Exception at {index=}")
            continue
    # Evaluation with current top_k
    mean_precision_at_k = np.mean([precision_at_k(relevant, retrieved, k) for relevant, retrieved in zip(relevant_docs, retrieved_docs)])
    mean_recall_at_k = np.mean([recall_at_k(relevant, retrieved, k) for relevant, retrieved in zip(relevant_docs, retrieved_docs)])
    mean_f1_at_k = np.mean([f1_at_k(relevant, retrieved, k) for relevant, retrieved in zip(relevant_docs, retrieved_docs)])
    mean_hit_rate_at_k = np.mean([hit_rate_at_k(relevant, retrieved, k) for relevant, retrieved in zip(relevant_docs, retrieved_docs)])
    mrr = mean_reciprocal_rank(relevant_docs, retrieved_docs)
    map_at_k = mean_average_precision_at_k(relevant_docs, retrieved_docs, k)
    ndcg_k = ndcg_at_k(relevant_docs, retrieved_docs, k)

    eval_dict[f"{k=}"] = [mean_precision_at_k, mean_recall_at_k, mean_f1_at_k, mean_hit_rate_at_k, mrr, map_at_k, ndcg_k]

eval_df = pd.DataFrame(eval_dict)
eval_df.set_index("Metric", inplace=True)
print()
print(eval_df.to_markdown())

## Augmented Generation

In [None]:
prompt = ChatPromptTemplate.from_messages([
("human", '''You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: {question}
Context: {context}\n
Answer: '''),
])

prompt.pretty_print()



You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: [33;1m[1;3m{question}[0m
Context: [33;1m[1;3m{context}[0m

Answer: 


In [None]:
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler

n_gpu_layers = -1  # The number of layers to put on the GPU. The rest will be on the CPU. If you don't know how many layers there are, you can use -1 to move all to GPU.
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.

callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path="/content/drive/MyDrive/ELC/models/llama-3/Meta-Llama-3-8B-Instruct-Q8_0.gguf",
    n_gpu_layers=n_gpu_layers,
    n_ctx=512,
    # f16_kv=True,
    # n_batch=n_batch,
    callback_manager=callback_manager,
    verbose=True,  # Verbose is required to pass to the callback manager
    model_kwargs = {"chat_format":"llama-3"}
)

In [None]:
print(qna_df.loc[2, "question"])
question = qna_df.loc[1, "question"]
context = format_docs(retriever.invoke(qna_df.loc[1, "question"]))
prompt.format_prompt(question=question, context=context).messages

In [None]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

question = qna_df.loc[1, "question"]
# question = "In which years were the Normans staying in Normandy ?"
response = rag_chain.invoke(question)