# RAG Framework using hugginface libraries and dataset

In [1]:
!uv pip install sentence-transformers faiss-cpu autoawq==0.2.9 --quiet

In [2]:
%%writefile rag-pipeline-hf-lib-dataset.py

#!uv pip install sentence-transformers faiss-cpu autoawq==0.2.9 --quiet

import pandas as pd
import numpy as np
from tqdm import tqdm
from scipy.special import expit

import faiss
from sentence_transformers import SentenceTransformer
from sentence_transformers import CrossEncoder
from datasets import load_dataset
from transformers import pipeline


def q_retrival_with_corss_encoder(query, model, index, cross_encoder_model, top_k=10):
    q_embedding = model.encode([query])
    distances, indexes = index.search(q_embedding, top_k)
    retrived_document = [unpack_list[i] for i in indexes[0]]
    output = cross_encoder_reranker(query, retrived_document, cross_encoder_model)
    return output


def cross_encoder_reranker(query, retrived_document, cross_encoder_model):
    data_prep = [(query, retrived) for retrived in retrived_document]
    output_socre = cross_encoder_model.predict(data_prep)
    softmax = [round(x, 2) for x in expit(output_socre)/(1 + (expit(output_socre)))]

    document_prob_pair = {'query': retrived_document,
                          'prob': softmax}
    reranked_final_documents = [x for x, y in zip(list(document_prob_pair.values())[0], list(document_prob_pair.values())[1]) if y > 0.1]
    return "\n".join(reranked_final_documents)

## 3. Preparing the prompt for the each query and context fetched by retriver.

def prepare_prompt(question, context):

    prompt_template =  f"""You are a medical professional with knowledge of medical termilogy and field.

    for a given question

    {question}

    with the context provided below:

    {context}

    now based on this give you answer below

    """.format(question, context)

    return prompt_template


def inference_in_batches(df, batch_size=16):

    text_generator = pipeline('text-generation', model='mistralai/Mistral-7B-Instruct-v0.3')
    
    results = []
    for i in tqdm(range(0, len(df), batch_size)):

        prompts_batch = df['prompt'].iloc[i : i + 16].tolist()
        otuput_batch = text_generator(prompts_batch, batch_size=batch_size, 
                                      max_new_tokens=1000, return_full_text=False,
                                      num_return_sequences=1)
        results.extend([o[0]["generated_text"].strip() for o in otuput_batch])

    df['ai_final_answer'] = results

    return df


## download data from the HF
dataset = load_dataset("pubmed_qa", "pqa_labeled")
df = pd.DataFrame(dataset['train'])


cross_encoder_model = CrossEncoder("cross-encoder/ms-marco-TinyBERT-L2-v2")
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

list_of_list = [x['contexts'] for x in df["context"]]
unpack_list = [x for sublist in list_of_list for x in sublist]

context_embeddings = embedding_model.encode(unpack_list, show_progress_bar=True)

index = faiss.IndexFlatL2(context_embeddings.shape[1])
index.add(np.array(context_embeddings))

df['retrived_documents'] = df.question.apply(lambda x: q_retrival_with_corss_encoder(x, embedding_model,
                                                                    index, cross_encoder_model))

df['prompt'] = df.apply(lambda x: prepare_prompt(x['question'], x['retrived_documents']), axis=1)

df.to_csv("fina_output.csv", index=False)

Writing rag-pipeline-hf-lib-dataset.py


In [3]:
!python rag-pipeline-hf-lib-dataset.py

2025-10-26 06:03:48.218071: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1761458628.458687      72 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1761458628.523415      72 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
README.md: 5.19kB [00:00, 23.5MB/s]
pqa_labeled/train-00000-of-00001.parquet: 100%|█| 1.08M/1.08M [00:00<00:00, 1.47
Generating train split: 100%|█████| 1000/1000 [00:00<00:00, 60117.88 examples/s]
config.json: 100%|█████████████████████████████| 787/787 [00:00<00:00, 6.08MB/s]
model.safetensors: 100%|███████████████████| 17.6M/17.6M [00:00<00:00, 47.1MB/s]
tokenizer_config.json: 1.33kB [00:00, 8.07MB/s]
vocab.txt: 232kB [00