In [16]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


#Installs

In [2]:
!pip install llama-index==0.10.18 llama-index-llms-groq==0.1.3 groq==0.4.2 llama-index-embeddings-huggingface==0.2.0
!pip install datasets==2.16.1 PyPDF2==3.0.1 langchain_experimental python-dotenv==1.0.1 pyyaml==6.0.1 coloredlogs==15.0.1 mdc==1.2.1 pytest==8.1.2
!pip install -r drive/MyDrive/RAG-llamaindex/requirements.txt
!pip install huggingface_hub
!pip install llama-index-llms-huggingface
!pip uninstall -y bitsandbytes accelerate transformers
!pip install bitsandbytes accelerate transformers


Collecting llama-index==0.10.18
  Downloading llama_index-0.10.18-py3-none-any.whl (5.6 kB)
Collecting llama-index-llms-groq==0.1.3
  Downloading llama_index_llms_groq-0.1.3-py3-none-any.whl (2.7 kB)
Collecting groq==0.4.2
  Downloading groq-0.4.2-py3-none-any.whl (65 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.7/65.7 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting llama-index-embeddings-huggingface==0.2.0
  Downloading llama_index_embeddings_huggingface-0.2.0-py3-none-any.whl (7.1 kB)
Collecting llama-index-agent-openai<0.2.0,>=0.1.4 (from llama-index==0.10.18)
  Downloading llama_index_agent_openai-0.1.7-py3-none-any.whl (12 kB)
Collecting llama-index-cli<0.2.0,>=0.1.2 (from llama-index==0.10.18)
  Downloading llama_index_cli-0.1.12-py3-none-any.whl (26 kB)
Collecting llama-index-core<0.11.0,>=0.10.18 (from llama-index==0.10.18)
  Downloading llama_index_core-0.10.53.post1-py3-none-any.whl (15.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

#Imports


In [2]:
# For Loading the LLM
import json
import torch
from transformers import (AutoTokenizer,
                          AutoModelForCausalLM,
                          BitsAndBytesConfig,
                          pipeline)

from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    ServiceContext,
    load_index_from_storage
)
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import SentenceSplitter

import warnings
warnings.filterwarnings('ignore')

# For Data Ingestion
from typing import Literal, Any, List
import logging
import PyPDF2
import sys
sys.path.append('drive/MyDrive/RAG-llamaindex')
import os
from langchain_experimental.text_splitter import SemanticChunker
from math import ceil

# For Embeddings
from transformers import AutoModel

# For the LLM Model
from llama_index.llms.huggingface import HuggingFaceLLM
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


#Loading the LLM

In [3]:
import json
import torch
from transformers import (AutoTokenizer,
                          AutoModelForCausalLM,
                          BitsAndBytesConfig,
                          pipeline)

from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    ServiceContext,
    load_index_from_storage
)
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import SentenceSplitter

import warnings
warnings.filterwarnings('ignore')

In [4]:
# Getting the Hugging Face token
config_data = json.load(open("drive/MyDrive/RAG-llamaindex/config.json"))
HF_TOKEN = config_data["HF_TOKEN"]

# Defining the pre-trained model we will use, which is Llama-3-8B
model_name = "meta-llama/Meta-Llama-3-8B"

# Defining the quantization configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Defing the Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name ,
                                          token=HF_TOKEN)
tokenizer.pad_token = tokenizer.eos_token

# Initialising the LLM
model = AutoModelForCausalLM.from_pretrained(
    model_name ,
    device_map="auto",
    quantization_config=bnb_config,
    token=HF_TOKEN
)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

#Data Ingestion

In [17]:
from typing import Literal, Any, List
import logging
import PyPDF2
import sys
sys.path.append('drive/MyDrive/RAG-llamaindex')
import os
from langchain_experimental.text_splitter import SemanticChunker
from math import ceil

In [6]:
reader = SimpleDirectoryReader(input_dir="drive/MyDrive/RAG-llamaindex/Data")
documents = reader.load_data(num_workers=4)

# Filter out the data where the file_name is not "Harry Potter The Complete Collection.pdf"
documents = [doc for doc in documents if doc.metadata["file_name"] == "Harry Potter The Complete Collection.pdf"]

# Creating a function for chunking using the LLama Index SentenceSplitter
def get_chunks(
    documents: list[Any],
    chunk_size: int = 512,
    chunk_overlap: float = 0.0
) -> list[str]:
    overlap = int(chunk_size * chunk_overlap)
    text_splitter = SentenceSplitter(chunk_size=chunk_size, chunk_overlap=overlap)
    chunks= text_splitter.get_nodes_from_documents(documents, show_progress=True)

    return chunks

# Creating a function for manual chunking
def get_chunks_manual(documents: list[Any], chunk_size: int = 512, chunk_overlap: float = 0.0) -> List[str]:
    """
    Takes in a `file_path`, retrieves the document, breaks it down into chunks of size
    `chunk_size` and overlap `chunk_overlap`, and returns the chunks.
    """
    chunks = []

    text = ""
    for doc in documents:
        text += doc.text
    # replace every \t with " "
    text = text.replace("\t", " ")

    step_size = ceil(chunk_size * (1 - chunk_overlap))
    for i in range(0, len(text), step_size):
        chunks.append(text[i:i + chunk_size])

    return chunks

In [7]:
# Using the Sentence Splitter

# Creating chunks of 512 characters with no overlap
chunks_512_0 = get_chunks(documents)
print("Created", len(chunks_512_0), "chunks of size 512 and 0% overlap with the SentenceSplitter")
print("The 100th chunk is:", chunks_512_0[100])

# Creating chunks of 512 with 50% overlap
chunks_512_50 = get_chunks(documents, chunk_size=512, chunk_overlap=0.5)
print("Created", len(chunks_512_50), "chunks of size 512 and 0% overlap with the SentenceSplitter")
print("The 100th chunk is:", chunks_512_50[100])

Parsing nodes:   0%|          | 0/3623 [00:00<?, ?it/s]

Created 6882 chunks of size 512 and 0% overlap with the SentenceSplitter
The 100th chunk is: Node ID: 6f915fbc-29db-474e-9b30-6cceb653ecbd
Text: about     you,    probably        nothing a       good
beating wouldn’t        have    cured   —       and     as for     all
this    about   your    parents,        well,   they    were
weirdos,        no      denying it,     and     the world’s better
off     without them    in      my      opinion —       asked   for
all     they    got, getting mixed   up      with    these   wizarding
types   —       just    what    I       expected,       always knew
they’d  come    to      a       sticky  end     —” But     at
that...


Parsing nodes:   0%|          | 0/3623 [00:00<?, ?it/s]

Created 8170 chunks of size 512 and 0% overlap with the SentenceSplitter
The 100th chunk is: Node ID: 36f0b8ed-9aeb-4d9a-b15e-23185e45e2ac
Text: He        lay     and     watched his     birthday        tick
nearer, wondering       if      the     Dursleys        would
remember        at      all,    wondering       where   the     letter
writer  was     now. Five    minutes to      go.     Harry   heard
something       creak   outside.        He      hoped   the roof
wasn’t  going   to      fall    in,     although        he      might
be      warmer  if      it      did.    Four minutes to      go.
Maybe   the     house   in      Privet  Drive   would   be      so
full    of...


In [8]:
# Using the Manual Chunking

# Creating chunks of 512 with no overlap
manual_chunks_512_0 = get_chunks_manual(documents, 512, 0.0)
print("Created", len(manual_chunks_512_0), "chunks of size 512 and 0% overlap with the Manual Splitter")
print("The 100th chunk is:", manual_chunks_512_0[100])

# Creating chunks of 512 with 50% overlap
manual_chunks_512_50 = get_chunks_manual(documents, 512, 0.5)
print("Created", len(manual_chunks_512_50), "chunks of size 512 and 50% overlap with the Manual Splitter")
print("The 100th chunk is:", manual_chunks_512_50[100])

Created 12269 chunks of size 512 and 0% overlap with the Manual Splitter
The 100th chunk is: en the bill, snorted in disgust, and flipped over the
postcard.
“Marge’s ill,” he informed Aunt Petunia. “Ate a funny whelk . . .”
“Dad!” said Dudley suddenly. “Dad, Harry’s got something!”
Harry was on the point of unfolding his letter, which was written on the
same heavy parchment as the envelope, when it was jerked sharply out of his
hand by Uncle Vernon.
“That’s 
mine
!” said Harry, trying to snatch it back.
“Who’d be writing to you?” sneered Uncle Vernon, shaking the letter open
with one hand and glanc
Created 24537 chunks of size 512 and 50% overlap with the Manual Splitter
The 100th chunk is: and took out the silver Put-Outer. He clicked it once, and twelve balls
of light sped back to their street lamps so that Privet Drive glowed suddenly
orange and he could make out a tabby cat slinking around the corner at the
other end of the street. He could just see the bundle of blankets on the st

#Creating the Embedding Model

In [9]:
from transformers import AutoModel

In [10]:
class HuggingFaceEmbedding:
    def __init__(self, model_name: str):
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModel.from_pretrained(model_name)
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)

    def get_text_embedding(self, text: str) -> torch.Tensor:
        inputs = self.tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(self.device)
        with torch.no_grad():
            outputs = self.model(**inputs)
        return outputs.last_hidden_state.mean(dim=1).squeeze()

    def get_embeddings(self, texts: list[str], batch_size: int = 16) -> list[torch.Tensor]:
        embeddings = []
        for i in range(0, len(texts), batch_size):
            batch_texts = texts[i:i + batch_size]
            inputs = self.tokenizer(batch_texts, return_tensors="pt", truncation=True, padding=True, max_length=512).to(self.device)
            with torch.no_grad():
                outputs = self.model(**inputs)
            batch_embeddings = outputs.last_hidden_state.mean(dim=1)
            embeddings.extend(batch_embeddings.cpu())
        return embeddings

embed_model =  HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")


#Defining the LLM Model

In [11]:
from llama_index.llms.huggingface import HuggingFaceLLM
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [12]:
llm= HuggingFaceLLM(model=model, tokenizer=tokenizer)

# Find the embedding of each text in Manual_Chunks of size 512 and 0% overlap and put in list
manual_512_0_embed = embed_model.get_embeddings(manual_chunks_512_0)
manual_512_0_embed_np = [embedding.numpy() for embedding in manual_512_0_embed]
print(len(manual_512_0_embed_np))

# Find the embedding of each text in Manual_Chunks of size 512 and 50% overlap and put in list
manual_512_50_embed = embed_model.get_embeddings(manual_chunks_512_50)
manual_512_50_embed_np = [embedding.numpy() for embedding in manual_512_50_embed]
print(len(manual_512_50_embed_np))



12269
24537


In [13]:
# Creating the first Retriever: Top-k Embeddings Retriever

def top_k_embed_retriever(query: str, k: int, embed_model, manual_embed_np, texts):
    query_embed = embed_model.get_text_embedding(query)
    query_embed_np = query_embed.cpu().numpy().reshape(1, -1)

    # Calculate cosine similarity with all the other embeddings
    similarities = cosine_similarity(query_embed_np, manual_embed_np).flatten()

    # Find the top k indices
    top_k_indices = np.argpartition(similarities, -k)[-k:]
    top_k_indices = top_k_indices[np.argsort(similarities[top_k_indices])[::-1]]

    # Retrieve the top k texts
    top_k_texts = [texts[i] for i in top_k_indices]

    return top_k_texts
"""
# Example usage
query = "Hogwarts"
k = 3
top_k_texts = top_k_embed_retriever(query, k, embed_model, np.stack(manual_embed_np), manual_chunks)
print(top_k_texts[0])
print("___________")
print(top_k_texts[1])
print("___________")
print(top_k_texts[2])"""

'\n# Example usage\nquery = "Hogwarts"\nk = 3\ntop_k_texts = top_k_embed_retriever(query, k, embed_model, np.stack(manual_embed_np), manual_chunks)\nprint(top_k_texts[0])\nprint("___________")\nprint(top_k_texts[1])\nprint("___________")\nprint(top_k_texts[2])'

In [14]:
vectorizer_512_0 = TfidfVectorizer()
vectorizer_512_50 = TfidfVectorizer()

tfidf_matrix_512_0 = vectorizer_512_0.fit_transform(manual_chunks_512_0)
tfidf_matrix_512_50 = vectorizer_512_50.fit_transform(manual_chunks_512_50)

In [15]:
# Creating the second Retriever: Top-k TF-IDF Retriever
def top_k_tfidf_retriever(query: str, k: int, tfidf_matrix, texts, vectorizer):
    query_vector = vectorizer.transform([query])
    similarities = cosine_similarity(query_vector, tfidf_matrix).flatten()
    top_k_indices = np.argpartition(similarities, -k)[-k:]
    top_k_indices = top_k_indices[np.argsort(similarities[top_k_indices])[::-1]]
    top_k_texts = [texts[i] for i in top_k_indices]
    return top_k_texts

"""# Example usage
query = "Hogwarts"
k = 3
top_k_texts = top_k_tfidf_retriever(query, k, tfidf_matrix_512_0, manual_chunks, vectorizer_512_0)
print(top_k_texts[0])
print("___________")
print(top_k_texts[1])
print("___________")
print(top_k_texts[2])"""

'# Example usage\nquery = "Hogwarts"\nk = 3\ntop_k_texts = top_k_tfidf_retriever(query, k, tfidf_matrix_512_0, manual_chunks, vectorizer_512_0)\nprint(top_k_texts[0])\nprint("___________")\nprint(top_k_texts[1])\nprint("___________")\nprint(top_k_texts[2])'

#Creating the Pipeline

In [None]:
import pandas as pd

In [20]:
class HuggingFaceLLM:
    def __init__(self, model, tokenizer):
        self.model = model
        self.tokenizer = tokenizer

    def generate_answer(self, prompt: str) -> str:
        inputs = self.tokenizer(prompt, return_tensors="pt", padding=True)
        outputs = self.model.generate(**inputs)
        return self.tokenizer.decode(outputs[0], skip_special_tokens=True)

llm = HuggingFaceLLM(model=model, tokenizer=tokenizer)

prompt = "what's 2+2?. Answer should be only one number."
llm_answer = llm.generate_answer(prompt)
print(llm_answer)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


what's 2+2?. Answer should be only one number. 1. 2. 3. 4. 5. 6. 7. 8. 9. 10. 11. 12. 13. 14. 15. 16. 17. 18. 19. 20. 21. 22. 23. 24. 25. 26. 27. 28. 29. 30. 31. 32. 33. 34. 35. 36. 37. 38. 39. 40. 41. 42. 43. 44. 45. 46. 47. 48. 49. 50. 51. 52. 53. 54. 55. 56. 57. 58. 59. 60. 61. 62. 63. 64. 65. 66. 67. 68. 69. 70. 71. 72. 73. 74. 75. 76. 77. 78. 79. 80. 81. 82. 83. 84. 85. 86. 87. 88. 89. 90. 91. 92. 93. 94. 95. 96. 97. 98. 99. 100.


In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Getting the Hugging Face token
config_data = json.load(open("drive/MyDrive/RAG-llamaindex/config.json"))
HF_TOKEN = config_data["HF_TOKEN"]

# Defining the pre-trained model we will use, which is Llama-3-8B
model_name = "meta-llama/Meta-Llama-3-8B"

# Defining the quantization configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Defing the Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name ,
                                          token=HF_TOKEN)
tokenizer.pad_token = tokenizer.eos_token

# Initialising the LLM
model = AutoModelForCausalLM.from_pretrained(
    model_name ,
    device_map="auto",
    quantization_config=bnb_config,
    token=HF_TOKEN
)

# Function to generate a response
def generate_response(question):
    inputs = tokenizer(question, return_tensors="pt")
    with torch.no_grad():
        outputs = model.generate(inputs['input_ids'], max_length=100, num_return_sequences=1)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response




Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token.As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Question: What is the capital of France?
Answer: What is the capital of France? Paris, of course. And what is the capital of the US? Washington, D.C., of course. But what is the capital of the world? That’s a question that many people have been trying to answer for centuries. And while there is no one definitive answer, there are some contenders that have emerged over the years.
What is the capital of the world?
There is no definitive answer to this question, as the capital of the world is subjective.


In [14]:
def generate_response(question):
    inputs = tokenizer(question, return_tensors="pt", padding=True)
    input_ids = inputs['input_ids']
    attention_mask = inputs['attention_mask']

    with torch.no_grad():
        outputs = model.generate(
            input_ids,
            attention_mask=attention_mask,
            max_length=1000,
            num_return_sequences=1,
            no_repeat_ngram_size=2,  # To reduce repetition
            pad_token_id=tokenizer.eos_token_id  # Set pad token id
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.strip()

In [15]:
question = "What is 2+2? Answer: 4. \nWhat is the capital of France? Answer: Paris. \n What's 5 times 7? Answer:"
answer = generate_response(question)
print(f"Question: {question}")
print(f"Answer: {answer}")

Question: What is 2+2? Answer: 4. 
What is the capital of France? Answer: Paris. 
 What's 5 times 7? Answer:
Answer: What is 2+2? Answer: 4. 
What is the capital of France? Answer: Paris. 
 What's 5 times 7? Answer: Thirty-five.


In [None]:
def rag_pipeline(documents: list[Any], df: pd.DataFrame, chunk_sz: int = 512, chunk_ol: float = 0.0, retriever: str = "embed", k: int = 3, embd_mod=None):
    '''
    This function takes a dataframe `df` that contains the QA's, `chunk_sz` and overlap `chunk_ol`,
     a retriver `retiever` between the top_k_embed_retriever and the top_k_tfidf_retriever,
    the parameter `k` for the retrievers and an embeddings model.
    '''

    # Creating the chunks
    chunks = get_chunks_manual(documents, chunk_sz, chunk_ol)

    # Manual Embeddings for TF-IDF
    if retriever == 'embed':
      # Loading the embedding model and embed the chunks
      manual_embed = embed_model.get_embeddings(chunks)
      manual_embed_np = [embedding.numpy() for embedding in manual_embed]
    else:
      # Calculating TF-IDF
      vectorizer = TfidfVectorizer()
      tfidf_matrix = vectorizer.fit_transform(chunks)

    results = []
    for index, row in df.iterrows():
      query = row["Question"]

      if retriever == "embed":
        top_k_texts = top_k_embed_retriever(query, k, embed_model, np.stack(manual_embed_np), chunks)
      else:
        top_k_texts = top_k_tfidf_retriever(query, k, tfidf_matrix, chunks, vectorizer)

      retrieved_context = "\n\n".join(top_k_texts)

      instruction1 = "Q: What is the spell to open doors? \n Context: she shouted Alohomora and the door opened\n Answer: Alohomora.\n"
      instruction2 = "Q: What is the name of Harry's mom? \n Context: he saw the picture of his parents: James and Lily\n Answer: Lily.\n"

      prompt = f"{instruction1}{instruction2}Q: {query}\nContext: {retrieved_context}\nAnswer:"
      llm_answer = generate_response(prompt)

      result = {
          "Question": query,
          "True_Answer": row['Answer'],
           "Retrieved_Context": retrieved_context,
           "LLM_Answer": llm_answer
      }
      results.append(result)

    return pd.DataFrame(results)

In [None]:

# def rag_pipeline(documents: list[Any], df: pd.DataFrame, chunk_sz: int = 512, chunk_ol: float = 0.0, retriever: str = "embed", k: int = 3, embd_mod=None):
#     '''
#     This function takes a dataframe `df` that contains the QA's, `chunk_sz` and overlap `chunk_ol`,
#      a retriver `retiever` between the top_k_embed_retriever and the top_k_tfidf_retriever,
#     the parameter `k` for the retrievers and an embeddings model.
#     '''

#     # Creating the chunks
#     chunks = get_chunks_manual(documents, chunk_sz, chunk_ol)

#     # Manual Embeddings for TF-IDF
#     if retriever == 'embed':
#       # Loading the embedding model and embed the chunks
#       manual_embed = embed_model.get_embeddings(chunks)
#       manual_embed_np = [embedding.numpy() for embedding in manual_embed]
#     else:
#       # Calculating TF-IDF
#       vectorizer = TfidfVectorizer()
#       tfidf_matrix = vectorizer.fit_transform(chunks)

#     results = []
#     for index, row in df.iterrows():
#       query = row["Question"]

#       if retriever == "embed":
#         top_k_texts = top_k_embed_retriever(query, k, embed_model, np.stack(manual_embed_np), chunks)
#       else:
#         top_k_texts = top_k_tfidf_retriever(query, k, tfidf_matrix, chunks, vectorizer)

#       retrieved_context = " ".join(top_k_texts)
#       prompt = f"Q: {query}\nContext: {retrieved_context}\nAnswer:"
#       llm_answer = llm.generate_answer(prompt)

#       result = {
#           "Question": query,
#           "True_Answer": row['Answer'],
#            "Retrieved_Context": retrieved_context,
#            "LLM_Answer": llm_answer
#       }
#       results.append(result)

#     return pd.DataFrame(results)


In [None]:
documents = [doc for doc in documents if doc.metadata["file_name"] == "Harry Potter The Complete Collection.pdf"]
import csv
harrypotter = documents[0]

class HuggingFaceLLM:
    def __init__(self, model, tokenizer):
        self.model = model
        self.tokenizer = tokenizer

    def generate_answer(self, prompt: str) -> str:
        inputs = self.tokenizer(prompt, return_tensors="pt", padding=True)
        outputs = self.model.generate(**inputs)
        return self.tokenizer.decode(outputs[0], skip_special_tokens=True)

llm = HuggingFaceLLM(model=model, tokenizer=tokenizer)
embed_model =  HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
df = pd.read_csv('drive/MyDrive/RAG-llamaindex/harry-potter-data.csv')

"""result_df = rag_pipeline(harrypotter, df, chunk_sz=512, chunk_ol=0.0, retriever="embed", k=3, embd_mod=embed_model)
result_df.to_csv('retrieved_answers.csv', index=False)
print(result_df.head())"""

with open('drive/MyDrive/RAG-llamaindex/results.csv', 'w', newline='') as csvfile:
    fieldnames = ['Question', 'Context', 'Answer']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()

    # Process each question and write the result immediately
    for q in df['Question']:
        result = rag_pipeline(harrypotter, q, chunk_size=512, chunk_overlap=0.0, retriever="embed", k=3)
        writer.writerow(result)

print("Results saved successfully.")