### Importations

In [None]:
!pip install llama-index
!pip install datasets
!pip install llama-index-retrievers-bm25
!pip install llama-index-embeddings-huggingface
!pip install llama-index-llms-llama-cpp
!pip install ragatouille
!pip install llama-index-postprocessor-colbert-rerank
!pip install FlagEmbedding
%pip install llama-index-postprocessor-flag-embedding-reranker
# %pip install llama-index-llms-openai

Collecting llama-index
  Downloading llama_index-0.10.55-py3-none-any.whl (6.8 kB)
Collecting llama-index-agent-openai<0.3.0,>=0.1.4 (from llama-index)
  Downloading llama_index_agent_openai-0.2.8-py3-none-any.whl (13 kB)
Collecting llama-index-cli<0.2.0,>=0.1.2 (from llama-index)
  Downloading llama_index_cli-0.1.12-py3-none-any.whl (26 kB)
Collecting llama-index-embeddings-openai<0.2.0,>=0.1.5 (from llama-index)
  Downloading llama_index_embeddings_openai-0.1.10-py3-none-any.whl (6.2 kB)
Collecting llama-index-indices-managed-llama-cloud>=0.2.0 (from llama-index)
  Downloading llama_index_indices_managed_llama_cloud-0.2.5-py3-none-any.whl (9.3 kB)
Collecting llama-index-legacy<0.10.0,>=0.9.48 (from llama-index)
  Downloading llama_index_legacy-0.9.48-py3-none-any.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting llama-index-llms-openai<0.2.0,>=0.1.13 (from llama-index)
  Downloading lla

### Utils


In [None]:
import re
import numpy as np
import pandas as pd
from datasets import load_dataset
from llama_index.core import Document, VectorStoreIndex, ServiceContext
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.llms.llama_cpp import LlamaCPP
from llama_index.core.evaluation import generate_question_context_pairs, RetrieverEvaluator, BatchEvalRunner, FaithfulnessEvaluator, RelevancyEvaluator
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.retrievers.bm25 import BM25Retriever
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.response.notebook_utils import display_response

from llama_index.postprocessor.colbert_rerank import ColbertRerank

def create_documents(data):
    documents = []
    # i = 0
    for filename, pages in data.items():
        print(f"Processing {filename}")
        for page_number, contents in pages.items():
            # print(f"Processing page {page_number}")
            title = ""
            text_set = set()  # Set to store unique text within the same page
            for content in contents:
                for type, info in content.items():
                    # print(type)
                    if type == 'title':
                        title = info
                    if type == 'text':
                        text = info
                        if text not in text_set:  # Check if text is already present in the set
                            documents.append(Document(
                                                    # doc_id=i,
                                                    text=text,
                                                    metadata={
                                                        'filename': filename,
                                                        'page_number': page_number,
                                                        'title': title
                                                    }
                                                )
                            )
                            # i += 1
                            text_set.add(text)




            #         if type == 'title':
            #             # print(info)
            #             if text == "":
            #                 title = info
            #             else :
            #                 # print("2")
            #                 documents.append(text)
            #                 title = info
            #                 text = ""
            #         if type == 'text':
            #             # print("textt")
            #             text += info
            # if text != "":
            #     documents.append(text)


    return documents


def display_results_retriever(name, eval_results, metrics = ["mrr", "hit_rate"]):
    """Display results from evaluate."""

    metric_dicts = []
    for eval_result in eval_results:
        metric_dict = eval_result.metric_vals_dict
        metric_dicts.append(metric_dict)

    full_df = pd.DataFrame(metric_dicts)

    result = {}
    for metric in metrics :
        result[metric]= full_df[metric].mean()

    result["Retriever Name"] =  [name]
    return pd.DataFrame( result)

from llama_index.core.evaluation import EmbeddingQAFinetuneDataset

def select_qa(dataset):

    new_corpus = dataset.corpus

    keys = [k for k, v in dataset.queries.items() if not (v.startswith("```"))and "uestion" not in v and not (v.startswith("import"))]
    new_queries = {x: dataset.queries[x] for x in keys}
    new_docs = {x: dataset.relevant_docs[x] for x in keys}

    return new_queries, new_corpus, new_docs

def retriever_evaluation (retriever, node_postprocessor = None, metrics = ["hit_rate","mrr"]) :
  # print(node_postprocessor)
  retriever_evaluator = RetrieverEvaluator.from_metric_names(
      metric_names =metrics, retriever=retriever, node_postprocessors = node_postprocessor
  )

  return retriever_evaluator


In [None]:
model_url = "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q4_0.gguf"

llm_phi = LlamaCPP(
    model_url=model_url,
    model_path=None,
    temperature=0.1,
    max_new_tokens=256,
    context_window=3900,
    generate_kwargs={},
    model_kwargs={"n_gpu_layers": -1},  # if compiled to use GPU
    verbose=True,
)

Downloading url https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q4_0.gguf to path /tmp/llama_index/models/phi-2.Q4_0.gguf
total size (MB): 1602.46


1529it [00:10, 143.75it/s]                          
llama_model_loader: loaded meta data with 20 key-value pairs and 325 tensors from /tmp/llama_index/models/phi-2.Q4_0.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = phi2
llama_model_loader: - kv   1:                               general.name str              = Phi2
llama_model_loader: - kv   2:                        phi2.context_length u32              = 2048
llama_model_loader: - kv   3:                      phi2.embedding_length u32              = 2560
llama_model_loader: - kv   4:                   phi2.feed_forward_length u32              = 10240
llama_model_loader: - kv   5:                           phi2.block_count u32              = 32
llama_model_loader: - kv   6:                  phi2.attention.head_count u32              = 32
llama_model_loader: - kv

### Load Documents and models (from computer)

In [None]:
# sentence transformers
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

embed_model = HuggingFaceEmbedding(model_name = "BAAI/bge-small-en")#model_name="Salesforce/SFR-Embedding-2_R")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/90.8k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/684 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
from google.colab import drive
import pickle
drive.mount('/content/drive')

DATA_PATH = "/content/nodes_ICRC.pkl"
infile = open(DATA_PATH,'rb')
nodes = pickle.load(infile)

Mounted at /content/drive


In [None]:
index = VectorStoreIndex(nodes, embed_model=embed_model, show_progress=True)

Generating embeddings:   0%|          | 0/2048 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/2048 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/358 [00:00<?, ?it/s]

In [None]:
from google.colab import drive
import pickle
drive.mount('/content/drive')

DATA_PATH = "/content/ICRC_qa_dataset.pkl"
infile = open(DATA_PATH,'rb')
ICRC_qa_dataset = pickle.load(infile)

DATA_PATH = "/content/ICRC_qa_dataset_short.pkl"
infile = open(DATA_PATH,'rb')
ICRC_qa_dataset_short = pickle.load(infile)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Load Documents and models (from llama3)

In [None]:
from google.colab import drive
import json
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
f = open('results.json')
results = json.load(f)

In [None]:
documents = create_documents(results)
print(len(documents))
splitter = SentenceSplitter(chunk_size=512, chunk_overlap=20)
nodes = splitter.get_nodes_from_documents(documents)
print(len(nodes))

Processing RA_1964_FRE
Processing RA_1950_ENG
Processing CIRC_LC_1929_1991_FRE
Processing RA_1970_ENG
Processing CDP_1989_FRE
Processing RA_1968_ENG
Processing CAP_1995_FRE_02
Processing CIRC_1933_1952_ENG
Processing CIRC_1900_1920
Processing icrc-0907-002
Processing icrc-annual-report-2016
Processing icrc-annual-report-2003
Processing CIRC_1949_1957
Processing icrc-annual-report-2019-1
Processing icrc-0907-001
Processing icrc-annual-report-2011
Processing CDP_1973
Processing icrc-annual-report-2017
Processing BIB_00002
Processing icrc-annual-report-2020-2
Processing CDP_1974
Processing CIRC_1916_1951
Processing RA_1970_FRE
Processing icrc-annual-report-2013
Processing icrc-annual-report-2009
Processing icrc-annual-report-2008
Processing CIRC_LC_1972_1979_FRE
Processing icrc-annual-report-2005
Processing CIRC_1984_1994_FRE
Processing 2003-07-04-fr-congo-kinshasa-congo-rdc-activites-d-98f0b396ff0ae685817ac5db8f5e8711
Processing CDP_1950
Processing CIRC_1976_1985_ENG
Processing icrc-annu

In [None]:
# sentence transformers
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

embed_model = HuggingFaceEmbedding(model_name = "BAAI/bge-small-en")#model_name="Salesforce/SFR-Embedding-2_R")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/90.8k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/684 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
index = VectorStoreIndex(nodes, embed_model=embed_model, show_progress=True)

Generating embeddings:   0%|          | 0/2048 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/2048 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/756 [00:00<?, ?it/s]

In [None]:
model_url = "https://huggingface.co/bartowski/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf"
llm_llama3 = LlamaCPP(
    model_url=model_url,
    model_path=None,
    temperature=0.1,
    max_new_tokens=256,
    context_window=3900,
    generate_kwargs={},
    model_kwargs={"n_gpu_layers": -1},  # if compiled to use GPU
    verbose=True,
)

# from transformers import BitsAndBytesConfig
# from transformers import AutoModelForCausalLM, AutoTokenizer
# from peft import PeftModel, PeftConfig, LoraConfig, TaskType, get_peft_model

#1st possibility :
# quantization_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     # bnb_4bit_compute_dtype=torch.float16,
#     # bnb_4bit_quant_type="nf4",
#     # bnb_4bit_use_double_quant=True,
# )
# llm = HuggingFaceLLM(
# #     model_name="meta-llama/Meta-Llama-3-8B",
#     tokenizer_name="meta-llama/Meta-Llama-3-8B",
#     context_window=3900,
#     max_new_tokens=256,
#     model_kwargs={"quantization_config": quantization_config},
#     generate_kwargs={"temperature": 0.1},
#     # messages_to_prompt=messages_to_prompt,
#     # completion_to_prompt=completion_to_prompt,
#     device_map="cuda",
# )

#2nd one :
# from transformers import BitsAndBytesConfig
# quantization_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     # bnb_4bit_compute_dtype=torch.float16,
#     # bnb_4bit_quant_type="nf4",
#     # bnb_4bit_use_double_quant=True,
# )
# model = AutoModelForCausal.from_pretrained("meta-llama/Meta-Llama-3-8B", quantization_config=q_config)
# peft_config = LoraConfig(
#         r=args.lora_r, # Rank
#         lora_alpha=args.lora_alpha,
#         target_modules=args.lora_target_modules,
#         lora_dropout=args.lora_dropout,
#         bias="none",
#         task_type=TaskType.SEQ_2_SEQ_LM # FLAN-T5
#     )
# model = get_peft_model(model, peft_config)

# llm = HuggingFaceLLM(
#     model = model
#     tokenizer_name="meta-llama/Meta-Llama-3-8B",
#     context_window=3900,
#     max_new_tokens=256,
#     generate_kwargs={"temperature": 0.1},
#     # messages_to_prompt=messages_to_prompt,
#     # completion_to_prompt=completion_to_prompt,
#     device_map="cuda",
# )

Downloading url https://huggingface.co/bartowski/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf to path /tmp/llama_index/models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf
total size (MB): 4920.73


4693it [00:52, 88.83it/s]                           
llama_model_loader: loaded meta data with 26 key-value pairs and 291 tensors from /tmp/llama_index/models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Meta-Llama-3-8B-Instruct
llama_model_loader: - kv   2:                          llama.block_count u32              = 32
llama_model_loader: - kv   3:                       llama.context_length u32              = 8192
llama_model_loader: - kv   4:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.attention.head_count u32 

In [None]:
qa_generate_prompt_tmpl = """\
Context information is below.

---------------------
{context_str}
---------------------

Given the context information and not prior knowledge.
generate only questions based on the below query.

You are a Professor. Your task is to setup \
{num_questions_per_chunk} questions for an upcoming \
quiz/examination. The questions should be developped and diverse in nature \
across the document. The questions should not contain options, not start with Q1/ Q2. \
Restrict the questions to the context information provided.\
"""

RC_qa_dataset_llama = generate_question_context_pairs(
    nodes[0::10][0:10],
    llm=llm_llama3,
    num_questions_per_chunk=2,
    qa_generate_prompt_tmpl=qa_generate_prompt_tmpl
)

queries = list(RC_qa_dataset_llama.queries.values())
queries

  0%|          | 0/10 [00:00<?, ?it/s]Llama.generate: prefix-match hit

llama_print_timings:        load time =  187664.16 ms
llama_print_timings:      sample time =      93.65 ms /    44 runs   (    2.13 ms per token,   469.85 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     0 tokens (    -nan ms per token,     -nan tokens per second)
llama_print_timings:        eval time =   28812.77 ms /    44 runs   (  654.84 ms per token,     1.53 tokens per second)
llama_print_timings:       total time =   28963.57 ms /    44 tokens
 10%|█         | 1/10 [00:28<04:20, 28.98s/it]Llama.generate: prefix-match hit

llama_print_timings:        load time =  187664.16 ms
llama_print_timings:      sample time =      85.98 ms /    41 runs   (    2.10 ms per token,   476.88 tokens per second)
llama_print_timings: prompt eval time =  204051.30 ms /   560 tokens (  364.38 ms per token,     2.74 tokens per second)
llama_print_timings:        eval time =   25499.68 ms /    40 run

['Here are your two questions:',
 'What was the purpose of the consultations conducted by the CICR?',
 'Here are your two questions:',
 'What is the purpose of the conference being organized by the organizers?',
 'Here are your two questions:',
 'What was the total amount of Swiss francs available for distribution by the Commission paritaire chargée de la distribution des revenus du Fonds de l’Impératrice Shéken?',
 'Here are your two questions:',
 'What is the role of Colonel DRAUDT in the organization mentioned?',
 'Here are your two questions:',
 "What was the total amount of solde bénéficiaire at the beginning of the exercise 334'611'16?",
 'Here are your two questions:',
 'What was the ICRC doing in 1970 regarding visits to people from occupied territories?',
 'Here are your two questions:',
 'What was the purpose of the Comité international de la Croix-Rouge (CICR) in this situation?',
 'Here are your two questions:',
 'What is the significance of the new fourth Protocol prohibit

In [None]:
new_queries, new_corpus, new_docs = select_qa(RC_qa_dataset_llama)

ICRC_qa_dataset_llama = EmbeddingQAFinetuneDataset(
        queries=new_queries, corpus=new_corpus, relevant_docs=new_docs
    )

print(len(ICRC_qa_dataset_llama.queries.values()))

10


In [None]:
# response = llm_llama3.complete("Hello, how are you?")
# print(str(response))

### Evaluation Different RAG models


In [None]:
base_retriever = index.as_retriever(similarity_top_k=3)
base_retriever_evaluator = retriever_evaluation(base_retriever)
base_eval_results = await base_retriever_evaluator.aevaluate_dataset(ICRC_qa_dataset)
display_results_retriever("Base Retriever", base_eval_results)

Unnamed: 0,Retriever Name,Hit Rate,MRR
0,Base Retriever,0.649753,0.57192


In [None]:
base_retriever = index.as_retriever(similarity_top_k=10)
colbert_reranker = ColbertRerank(
    top_n=3,
    model="colbert-ir/colbertv2.0",
    tokenizer="colbert-ir/colbertv2.0",
    keep_retrieval_score=True,
)
base_colbert_retriever_evaluator = retriever_evaluation(base_retriever, node_postprocessor=[colbert_reranker], metrics =["hit_rate", "mrr", "precision", "recall", "ap", "ndcg"])

base_colbert_eval_results =  await base_colbert_retriever_evaluator.aevaluate_dataset(ICRC_qa_dataset_short)
display_results_retriever("BM25 and colbert Retriever", base_colbert_eval_results)

Unnamed: 0,Retriever Name,Hit Rate,MRR
0,BM25 and colbert Retriever,0.48,0.453333


In [None]:
from llama_index.postprocessor.flag_embedding_reranker import FlagEmbeddingReranker

bge_reranker = FlagEmbeddingReranker(
    top_n=3,
    model="BAAI/bge-reranker-base",
    use_fp16=False
)

base_bge_retriever_evaluator = retriever_evaluation(base_retriever, node_postprocessor=[bge_reranker], metrics =["hit_rate", "mrr", "precision", "recall", "ap", "ndcg"])

base_bge_eval_results =  await base_bge_retriever_evaluator.aevaluate_dataset(ICRC_qa_dataset)
display_results_retriever("BM25 and bge Retriever", base_bge_eval_results)

tokenizer_config.json:   0%|          | 0.00/443 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/279 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/799 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

In [None]:
BM25retriever = BM25Retriever.from_defaults(nodes=nodes, similarity_top_k=3)
BM25_retriever_evaluator = retriever_evaluation(BM25retriever)
BM25_eval_results =  await BM25_retriever_evaluator.aevaluate_dataset(ICRC_qa_dataset_short)
display_results_retriever("BM25 Retriever", BM25_eval_results)


DEBUG:bm25s:Building index from IDs objects


Unnamed: 0,Retriever Name,Hit Rate,MRR
0,BM25 Retriever,0.4,0.35


In [None]:
BM25retriever = BM25Retriever.from_defaults(nodes=nodes, similarity_top_k=10)
colbert_reranker = ColbertRerank(
    top_n=3,
    model="colbert-ir/colbertv2.0",
    tokenizer="colbert-ir/colbertv2.0",
    keep_retrieval_score=True,
)

BM25_colbert_retriever_evaluator = retriever_evaluation(BM25retriever, node_postprocessor=[colbert_reranker], metrics =["hit_rate", "mrr", "precision", "recall", "ap", "ndcg"])

BM25_colbert_eval_results =  await BM25_colbert_retriever_evaluator.aevaluate_dataset(ICRC_qa_dataset_short)
display_results_retriever("BM25 and colbert Retriever", BM25_colbert_eval_results)

DEBUG:bm25s:Building index from IDs objects


Unnamed: 0,Retriever Name,Hit Rate,MRR
0,OpenAI Embedding Retriever,0.46,0.44


In [None]:
BM25retriever = BM25Retriever.from_defaults(nodes=nodes, similarity_top_k=10)
bge_reranker = FlagEmbeddingReranker(
    top_n=3,
    model="BAAI/bge-reranker-base",
    use_fp16=False
)

BM25_bge_retriever_evaluator = retriever_evaluation(BM25retriever, node_postprocessor=[bge_reranker], metrics =["hit_rate", "mrr", "precision", "recall", "ap", "ndcg"])

BM25_bge_eval_results =  await BM25_bge_retriever_evaluator.aevaluate_dataset(ICRC_qa_dataset_short)
display_results_retriever("BM25 and bge Retriever", BM25_bge_eval_results)

DEBUG:bm25s:Building index from IDs objects


Unnamed: 0,Retriever Name,Hit Rate,MRR
0,BM25 and bge Retriever,0.48,0.453333


In [None]:
!pip install faiss
%pip install llama-index-vector-stores-faiss

[31mERROR: Could not find a version that satisfies the requirement faiss (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for faiss[0m[31m
[0mCollecting llama-index-vector-stores-faiss
  Downloading llama_index_vector_stores_faiss-0.1.2-py3-none-any.whl (3.6 kB)
Installing collected packages: llama-index-vector-stores-faiss
Successfully installed llama-index-vector-stores-faiss-0.1.2


In [None]:
from llama_index.vector_stores.faiss import FaissVectorStore
from llama_index.core import StorageContext, ServiceContext
import faiss
faiss_index = faiss.IndexFlatL2(384)
vector_store = FaissVectorStore(faiss_index=faiss_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
service_context = ServiceContext.from_defaults(
        llm=llm_llama3,
        embed_model=embed_model,
    )
index_faiss = VectorStoreIndex(nodes, storage_context=storage_context, show_progress=True,service_context=service_context)
base_retriever = index_faiss.as_retriever(similarity_top_k=3)
base_retriever_evaluator = retriever_evaluation(base_retriever)
base_eval_results = await base_retriever_evaluator.aevaluate_dataset(ICRC_qa_dataset_short)
display_results_retriever("Base Retriever", base_eval_results)

  service_context = ServiceContext.from_defaults(


Generating embeddings:   0%|          | 0/2048 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/2048 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/358 [00:00<?, ?it/s]

Unnamed: 0,Retriever Name,Hit Rate,MRR
0,Base Retriever,0.51,0.425


In [None]:
source_nodes = retriever.retrieve('What was the total amount of Swiss francs available for distribution by the Commission paritaire chargée de la distribution des revenus du Fonds de l’Impératrice Shéken?')
for node in source_nodes:
    # print(node.metadata)
    print(f"---------------------------------------------")
    print(f"Score: {node.score:.3f}")
    print(node.get_content())
    print(f"---------------------------------------------\n\n")

---------------------------------------------
Score: 0.882
La Commission paritaire chargée de la distribution des revenus
du fonds de l’Impératrice Shéken s’est réunie 4 Genéve le 17 mars
1948. Elle a pris connaissance de la situation de ce fonds au
31 décembre 1947. On trouvera plus loin en annexe la situation du
fonds et la eomposition actuelle du portefeuille.

Les revenus du fonds de l’Impératrice Shéken pour l’année
1947, auxquels s’ajoute le solde actif du précédent exercice, per-
mettaient de distribuer une somme de fr. 13.841,—.

La commission a décidé de répartir fr. 13.000,—.

IIuit sociétés nationales de la Croix-Rouge s’étaient inscrites
dans les délais preserits pour obtenir une allocation sur ces
revenus. Ce nombre de demandes, beaucoup plus élévé que d’ordi-
naire, 0’a pas permis a la Commission de répondre affirmative-
ment pour tous les cas.

Tenant compte de Vobjet des demandes et de leur ecaractére
d@urgenee, prenant en considération les allocations déja obtenues
ant

In [None]:
from llama_index.core.llama_pack import download_llama_pack


# download and install dependencies
RAGatouilleRetrieverPack = download_llama_pack(
    "RAGatouilleRetrieverPack", "./ragatouille_pack"
)

ragatouille_pack = RAGatouilleRetrieverPack(
    documents,
    llm=llm_llama3,
    index_name="my_index",
    top_k=3,
)

colbert_retriever = ragatouille_pack.get_modules()["retriever"]

colbert_retriever_evaluator = retriever_evaluation(colbert_retriever)
colbert_eval_results =  await colbert_retriever_evaluator.aevaluate_dataset(ICRC_qa_dataset_short)
display_results_retriever("RAGatouille Retriever", colbert_eval_results)

### Using Haystack

In [None]:
%%bash
pip install farm-haystack[colab,inference,metrics]

Collecting farm-haystack[colab,inference,metrics]
  Downloading farm_haystack-1.26.2-py3-none-any.whl (763 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 763.7/763.7 kB 8.2 MB/s eta 0:00:00
Collecting boilerpy3 (from farm-haystack[colab,inference,metrics])
  Downloading boilerpy3-1.0.7-py3-none-any.whl (22 kB)
Collecting events (from farm-haystack[colab,inference,metrics])
  Downloading Events-0.5-py3-none-any.whl (6.8 kB)
Collecting lazy-imports==0.3.1 (from farm-haystack[colab,inference,metrics])
  Downloading lazy_imports-0.3.1-py3-none-any.whl (12 kB)
Collecting posthog (from farm-haystack[colab,inference,metrics])
  Downloading posthog-3.5.0-py2.py3-none-any.whl (41 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 41.3/41.3 kB 5.1 MB/s eta 0:00:00
Collecting prompthub-py==4.0.0 (from farm-haystack[colab,inference,metrics])
  Downloading prompthub_py-4.0.0-py3-none-any.whl (6.9 kB)
Collecting pydantic<2 (from farm-haystack[colab,inference,metrics])
  Downloading pydantic-1.10.1

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cudf-cu12 24.4.1 requires pyarrow<15.0.0a0,>=14.0.1, but you have pyarrow 15.0.2 which is incompatible.


In [None]:
import logging

logging.basicConfig(format="%(levelname)s - %(name)s -  %(message)s", level=logging.WARNING)
logging.getLogger("haystack").setLevel(logging.INFO)

In [None]:
from haystack.nodes import DensePassageRetriever
from haystack.utils import fetch_archive_from_http
from haystack.document_stores import InMemoryDocumentStore

### Evaluation Faithfulness, Relevancy

In [None]:
from llama_index.core.evaluation import BatchEvalRunner, FaithfulnessEvaluator, RelevancyEvaluator
from llama_index.core import ServiceContext

# Let's pick 10 queries to do evaluation
batch_eval_queries = queries

service_context = ServiceContext.from_defaults(llm=llm , embed_model=embed_model)
faithfulness = FaithfulnessEvaluator(service_context=service_context)
relevancy = RelevancyEvaluator(service_context=service_context)

# Initiate BatchEvalRunner to compute FaithFulness and Relevancy Evaluation.
runner = BatchEvalRunner(
    {"faithfulness": faithfulness, "relevancy": relevancy},
    workers=8,
)

# Compute evaluation
eval_results = await runner.aevaluate_queries(
    query_engine, queries=batch_eval_queries
)

  service_context = ServiceContext.from_defaults(llm=llm , embed_model=embed_model)
Llama.generate: prefix-match hit

llama_print_timings:        load time =  203682.69 ms
llama_print_timings:      sample time =      13.19 ms /    22 runs   (    0.60 ms per token,  1667.68 tokens per second)
llama_print_timings: prompt eval time =  431695.43 ms /  1107 tokens (  389.97 ms per token,     2.56 tokens per second)
llama_print_timings:        eval time =   13815.15 ms /    21 runs   (  657.86 ms per token,     1.52 tokens per second)
llama_print_timings:       total time =  445542.56 ms /  1128 tokens
Llama.generate: prefix-match hit

llama_print_timings:        load time =  203682.69 ms
llama_print_timings:      sample time =      53.21 ms /    93 runs   (    0.57 ms per token,  1747.69 tokens per second)
llama_print_timings: prompt eval time =  407188.84 ms /  1050 tokens (  387.80 ms per token,     2.58 tokens per second)
llama_print_timings:        eval time =   60855.28 ms /    92 runs

In [None]:
faithfulness_score = sum(result.passing for result in eval_results['faithfulness']) / len(eval_results['faithfulness'])
relevancy_score = sum(result.passing for result in eval_results['relevancy']) / len(eval_results['relevancy'])
print(faithfulness_score, relevancy_score)

0.5 0.5
