In [1]:
import ollama

import faiss
import pickle
import os
from sentence_transformers import SentenceTransformer
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM

import torch



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from sentence_transformers import SentenceTransformer
import faiss
import pickle
import torch

def load_retriever(
    index_path: str,
    chunks_path: str
):
    # Initialize device
    device = "cuda" if torch.cuda.is_available() else "cpu"
    
    # Load SentenceTransformer MiniLM model (lighter than intfloat/e5-large)
    model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", device=device)
    
    # You can optionally configure max length
    model.max_seq_length = 512  # if needed for long sentences
    model.tokenizer.do_lower_case = False  # Keep for Urdu if using custom tokenizer

    # Load FAISS index
    index = faiss.read_index(index_path)
    
    # Load stored chunks
    with open(chunks_path, "rb") as f:
        chunks_list = pickle.load(f)
    
    return model, index, chunks_list, device


In [3]:
model, index, chunks_list, device = load_retriever(
    index_path="../../vector_db/paragraphs/5884_paras/5884_paras_index.faiss",
    chunks_path="../../data_storage/Paragraph_chunks/5884_paragraphs/5884_parachunks.pkl"
)

In [4]:
model.device

device(type='cuda', index=0)

In [5]:
def retrieve_documents(query, k=3):
    # Encode the query using MiniLM model
    query_embedding = model.encode([query])
    
    # Search the FAISS index
    _, indices = index.search(query_embedding, k)
    
    # Return the top-k retrieved chunks
    return [chunks_list[i] for i in indices[0]]

In [6]:
def generate_using_llama3(context, query):
    prompt = f"""You are a helpful assistant designed to generate precise and informative answers based strictly on the given context.

Query:
{query}

Retrieved Context:
{context}

Instruction:
Answer the query using only the information present in the retrieved context. If the answer is not directly stated, make the best possible inference from the available context. Do not say "no information available", "cannot answer", or provide disclaimers. Only return a clear and direct answer — no introductions, no explanations, and no repetition of the query."""


    try:
        response = ollama.generate(
            model='llama3:8b',
            prompt=prompt
        )
        return response['response'].strip()
    except Exception as e:
        print("Error during generation:", e)
        return "Error generating answer."


In [7]:
def rag_pipeline(query: str, k=3) -> str:
    retrieved_chunks = retrieve_documents(query,k=k)
    print("retrieved_chunks: ", retrieved_chunks)
    answer = generate_using_llama3(query, retrieved_chunks)
    return answer

In [9]:
import pandas as pd



# Load only the required columns
df = pd.read_csv('../../../Dataset_code_csvs/hotpotQA/hotpotQA_dataset_versions/5884paras_598queries/English/598_QnAs.csv', usecols=[
    'level', 'question', 'answer', 'actual_retrieved_sentences'
])

# Rename the column
df.rename(columns={'actual_retrieved_sentences': 'context'}, inplace=True)

# Optional: View the result
print(df.head())

  level                                           question          answer  \
0  easy  The 2017–18 Serie B (known as the Serie B ConT...            1929   
1  easy  What specialism did Alec Naylor Dakin, fellow ...    cryptologist   
2  easy  Which actor from Jurassic Park assisted David ...         BD Wong   
3  easy  What character, voiced by Dan Castellaneta, st...  Grampa Simpson   
4  easy  Who was a part of S#arp, Lee Ji-hye or Curtis ...      Lee Ji-hye   

                                             context  
0   A total of 22 teams are contesting the league...  
1  Alec Naylor Dakin (3 April 1912 – 14 June 2003...  
2   It was directed by Jerry Zaks, with B. D. Won...  
3  "Loan-a Lisa" is the second episode of "The Si...  
4  Lee Ji-hye (born January 11, 1980) is a South ...  


In [11]:
import os
import time
from datetime import timedelta

# Initialize empty columns
df['retrieved_context'] = ""
df['final_answer'] = ""
df['retriever_time'] = 0.0
df['generator_time'] = 0.0
df['total_time'] = 0.0

# Relative path to output directory
output_dir = "../../results/pipeline results/5884paras_598qna"
os.makedirs(output_dir, exist_ok=True)
output_csv = os.path.join(output_dir, "simple_rag_qna_results_GPU_version.csv")

# Timing variables
total_start_time = time.time()
batch_start_time = time.time()
processed_count = 0

print(f"Starting processing of {len(df)} records at {time.strftime('%Y-%m-%d %H:%M:%S')}")
print("="*80)

for i, row in df.iterrows():
    record_start_time = time.time()
    query = row['question']
    
    # Print current record being processed
    print(f"\nProcessing record {i+1}...")  # Show first 50 chars of query
    
    # Retrieve documents
    retriever_start = time.time()
    retrieved_chunks = retrieve_documents(query, k=3)
    retriever_time = time.time() - retriever_start
    
    # Generate answer
    generator_start = time.time()
    final_answer = generate_using_llama3(query, "\n".join(retrieved_chunks))
    generator_time = time.time() - generator_start
    
    # Update dataframe
    df.at[i, 'retrieved_context'] = "\n".join(retrieved_chunks)
    df.at[i, 'final_answer'] = final_answer
    df.at[i, 'retriever_time'] = retriever_time
    df.at[i, 'generator_time'] = generator_time
    df.at[i, 'total_time'] = time.time() - record_start_time
    
    # Print record processing time
    print(f"Completed record {i+1} in {df.at[i, 'total_time']:.2f}s "
          f"(Retriever: {retriever_time:.2f}s, Generator: {generator_time:.2f}s)")
    
    # Save progress every 100 records
    if (i + 1) % 100 == 0 or (i + 1) == len(df):
        batch_end_time = time.time()
        batch_duration = batch_end_time - batch_start_time
        processed_count = min(100, (i+1) - (i//100)*100)  # Handle partial batches
        
        print("\n" + "="*60)
        print(f"BATCH SUMMARY: Records {(i//100)*100 + 1}-{i+1}")
        print(f"Batch processing time: {timedelta(seconds=batch_duration)}")
        print(f"Average time per record: {batch_duration/processed_count:.2f}s")
        print(f"Current timestamp: {time.strftime('%Y-%m-%d %H:%M:%S')}")
        
        # Save batch
        df.iloc[max(0, i-99):i+1].to_csv(
            output_csv,
            mode='a',
            header=not os.path.exists(output_csv),
            index=False,
            encoding="utf-8-sig"
        )
        print(f"Saved batch to: {os.path.abspath(output_csv)}")
        print("="*60 + "\n")
        
        batch_start_time = time.time()

# Final statistics
total_duration = time.time() - total_start_time
print("\n" + "="*80)
print(f"PROCESSING COMPLETED: {len(df)} records")
print(f"Total processing time: {timedelta(seconds=total_duration)}")
print(f"Average time per record: {total_duration/len(df):.2f}s")
print(f"Total retriever time: {df['retriever_time'].sum():.2f}s")
print(f"Total generator time: {df['generator_time'].sum():.2f}s")
print("="*80)

Starting processing of 598 records at 2025-05-21 16:16:04

Processing record 1...
Completed record 1 in 0.83s (Retriever: 0.04s, Generator: 0.78s)

Processing record 2...
Completed record 2 in 0.33s (Retriever: 0.01s, Generator: 0.32s)

Processing record 3...
Completed record 3 in 0.29s (Retriever: 0.01s, Generator: 0.28s)

Processing record 4...
Completed record 4 in 0.37s (Retriever: 0.01s, Generator: 0.36s)

Processing record 5...
Completed record 5 in 0.36s (Retriever: 0.01s, Generator: 0.35s)

Processing record 6...
Completed record 6 in 0.29s (Retriever: 0.02s, Generator: 0.27s)

Processing record 7...
Completed record 7 in 0.37s (Retriever: 0.01s, Generator: 0.36s)

Processing record 8...
Completed record 8 in 0.63s (Retriever: 0.01s, Generator: 0.62s)

Processing record 9...
Completed record 9 in 0.69s (Retriever: 0.01s, Generator: 0.68s)

Processing record 10...
Completed record 10 in 1.14s (Retriever: 0.01s, Generator: 1.12s)

Processing record 11...
Completed record 11 in 0.