## Run on T4 GPU

In [None]:
!pip install llama-index
!pip install llama-index-readers-file

In [None]:
!pip install llama-index-embeddings-huggingface
!pip install llama-index-llms-google-genai

In [19]:
import nest_asyncio

nest_asyncio.apply()

In [2]:
import os
from google.colab import userdata

os.environ['GOOGLE_API_KEY'] = userdata.get("GOOGLE_API_KEY")

In [3]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.node_parser import SentenceSplitter

In [4]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.google_genai import GoogleGenAI

In [5]:
model_name = "thenlper/gte-large"
embed_model = HuggingFaceEmbedding(model_name=model_name)

Access to the secret `HF_TOKEN` has not been granted on this notebook.
You will not be requested again.
Please restart the session if you want to be prompted again.


modules.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/57.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/619 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/670M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/191 [00:00<?, ?B/s]

In [6]:
llm = GoogleGenAI(model="gemini-2.5-flash",max_tokens=4096,temperature=0.1)

In [7]:
from llama_index.core import Settings

Settings.embed_model = embed_model
Settings.llm = llm

In [8]:
documents = SimpleDirectoryReader("./data").load_data()

In [9]:
node_parser = SentenceSplitter(chunk_size=1000)
nodes = node_parser.get_nodes_from_documents(documents)

In [11]:
vector_index = VectorStoreIndex(nodes)
retriever = vector_index.as_retriever(similarity_top_k=3)

In [12]:
from llama_index.core.evaluation import RetrieverEvaluator
from llama_index.core.evaluation import generate_question_context_pairs

In [13]:
retriever_evaluator = RetrieverEvaluator.from_metric_names(
    ["mrr", "hit_rate"], retriever=retriever
)

In [14]:
qa_dataset = generate_question_context_pairs(
    nodes,
    llm=llm,
    num_questions_per_chunk=2
)

100%|██████████| 12/12 [01:05<00:00,  5.49s/it]


In [15]:
dict(qa_dataset).keys()

dict_keys(['queries', 'corpus', 'relevant_docs', 'mode'])

In [16]:
qa_dataset.queries

{'9fa3b6cb-559a-4042-b5f0-3ab60f33473c': 'Here are two questions for your upcoming quiz/examination:',
 '470e7ea4-c06f-4b80-a109-3cff71bb5e51': 'Describe the primary motivation for developing LuxLlama, detailing the base model it was fine-tuned from and the specific Parameter-Efficient Fine-Tuning (PEFT) technique employed, along with its optimization kernel.',
 '8ac5537f-7c4b-4a3d-8a45-f693cfe16abe': 'Here are two questions for your upcoming quiz/examination:',
 '76dc95b4-dcbf-40dc-a9bb-a6782c095d12': 'The text highlights that building general multilingual models is often insufficient for low-resource languages like Luxembourgish. According to the document, what specific details do these general models frequently miss, and what two contributions did the LuxLlama project aim to provide to fill this gap?',
 '01dc0b4b-4f7d-47d4-ba7d-14376cd1ccab': 'Here are two questions for your upcoming quiz/examination:',
 '225cad79-eee4-47e8-b041-4f61db760227': '---',
 '46fabcc9-65ea-42c2-bb5d-4b1026

In [17]:
qa_dataset.relevant_docs

{'9fa3b6cb-559a-4042-b5f0-3ab60f33473c': ['be3d248c-34eb-41be-8ccb-2dabc9fa54f0'],
 '470e7ea4-c06f-4b80-a109-3cff71bb5e51': ['be3d248c-34eb-41be-8ccb-2dabc9fa54f0'],
 '8ac5537f-7c4b-4a3d-8a45-f693cfe16abe': ['65e3f7ae-9b32-4b4c-b665-474c79a64144'],
 '76dc95b4-dcbf-40dc-a9bb-a6782c095d12': ['65e3f7ae-9b32-4b4c-b665-474c79a64144'],
 '01dc0b4b-4f7d-47d4-ba7d-14376cd1ccab': ['8680cc36-325e-462f-9abd-c0b8c5f86f77'],
 '225cad79-eee4-47e8-b041-4f61db760227': ['8680cc36-325e-462f-9abd-c0b8c5f86f77'],
 '46fabcc9-65ea-42c2-bb5d-4b10263202cb': ['4b4905a0-1de1-4990-9f15-6bd2539524e9'],
 '7a4f3cdf-b224-4099-abf4-be885232e3f5': ['4b4905a0-1de1-4990-9f15-6bd2539524e9'],
 'f144f5c2-3be6-4bcb-88b1-1255666cc14b': ['fe002f0d-4782-4920-84b3-9b698270ddf1'],
 '7d172610-807f-44be-be25-f8d43b2efa8f': ['fe002f0d-4782-4920-84b3-9b698270ddf1'],
 '50ac4941-b590-4bc9-9748-2d4fc2dc323e': ['499d67cb-7e08-4631-8048-9eb3125e750a'],
 '387bf3aa-bf4a-498b-8349-d22444682f08': ['499d67cb-7e08-4631-8048-9eb3125e750a'],
 'd3

In [None]:
qa_dataset.corpus

In [21]:
nest_asyncio.apply()
eval_results = await retriever_evaluator.aevaluate_dataset(qa_dataset)

In [22]:
def display_results(name, eval_results):
    import pandas as pd

    metric_dicts = []
    for eval_result in eval_results:
        metric_dict = eval_result.metric_vals_dict
        metric_dicts.append(metric_dict)

    full_df = pd.DataFrame(metric_dicts)

    hit_rate = full_df["hit_rate"].mean()
    mrr = full_df["mrr"].mean()

    metric_df = pd.DataFrame(
        {"Retriever Name": [name], "Hit Rate": [hit_rate], "MRR": [mrr]}
    )

    return metric_df

In [23]:
display_results(f"{model_name} Retriever", eval_results)

Unnamed: 0,Retriever Name,Hit Rate,MRR
0,thenlper/gte-large Retriever,0.666667,0.569444
