In [None]:
!pip install langchain langchain-community langchain-core langchain-text-splitters
!pip install langchain-huggingface
!pip install sentence-transformers
!pip install faiss-cpu
!pip install pypdf
!pip install transformers accelerate
!pip install torch


Collecting langchain-core
  Downloading langchain_core-0.3.79-py3-none-any.whl.metadata (3.2 kB)
Downloading langchain_core-0.3.79-py3-none-any.whl (449 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m449.8/449.8 kB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: langchain-core
  Attempting uninstall: langchain-core
    Found existing installation: langchain-core 1.0.5
    Uninstalling langchain-core-1.0.5:
      Successfully uninstalled langchain-core-1.0.5
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain-huggingface 1.0.1 requires langchain-core<2.0.0,>=1.0.3, but you have langchain-core 0.3.79 which is incompatible.[0m[31m
[0mSuccessfully installed langchain-core-0.3.79
Collecting langchain-core<2.0.0,>=1.0.3 (from langchain-huggingface)
  Using cached langchain_core-1.0.5-py3-none-any.whl.m

In [None]:
from pathlib import Path
from typing import Union, List

from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.documents import Document


DEFAULT_EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
DEFAULT_CHUNK_SIZE = 1000
DEFAULT_CHUNK_OVERLAP = 200


class EmbeddingPipeline:

    def __init__(
        self,
        pdf_path: Union[str, Path],
        save_dir: Union[str, Path] = "vectorstore",
        embed_model_name: str = DEFAULT_EMBED_MODEL,
        chunk_size: int = DEFAULT_CHUNK_SIZE,
        chunk_overlap: int = DEFAULT_CHUNK_OVERLAP,
        k_retriever: int = 6
    ):
        self.pdf_path = Path(pdf_path)
        self.save_dir = Path(save_dir)
        self.save_dir.mkdir(parents=True, exist_ok=True)

        self.embed_model_name = embed_model_name
        self.chunk_size = chunk_size
        self.chunk_overlap = chunk_overlap
        self.k_retriever = k_retriever

        self.chunks: List[Document] = []
        self.retriever = None
        self.vectorstore = None

    def load_and_split(self):
        docs = []
        paths = [self.pdf_path] if self.pdf_path.is_file(
        ) else list(self.pdf_path.glob("*.pdf"))

        for p in paths:
            loader = PyPDFLoader(str(p))
            docs.extend(loader.load())

        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=self.chunk_size,
            chunk_overlap=self.chunk_overlap,
        )
        self.chunks = text_splitter.split_documents(docs)

    def create_vectorstore(self):
        embeddings = HuggingFaceEmbeddings(model_name=self.embed_model_name)
        self.vectorstore = FAISS.from_documents(self.chunks, embeddings)

        # Save FAISS index
        self.vectorstore.save_local(self.save_dir)

        self.retriever = self.vectorstore.as_retriever(
            search_kwargs={"k": self.k_retriever})

    def run(self):
        self.load_and_split()
        self.create_vectorstore()

        print(f"\n*** Vectorstore saved to: {self.save_dir} ***")
        return self.retriever

In [None]:
from pathlib import Path
from typing import Union
import torch

from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings

from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, Runnable

from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain_huggingface import HuggingFacePipeline


DEFAULT_EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
DEFAULT_GEN_MODEL = "mistralai/Mistral-7B-Instruct-v0.3"


class RAGPipeline:
    """
    This version assumes embeddings are ALREADY created & stored in FAISS.
    """

    def __init__(
        self,
        vectorstore_path: Union[str, Path],
        embed_model_name: str = DEFAULT_EMBED_MODEL,
        gen_model_name: str = DEFAULT_GEN_MODEL,
        k_retriever: int = 6,
    ):
        self.vectorstore_path = Path(vectorstore_path)
        self.embed_model_name = embed_model_name
        self.gen_model_name = gen_model_name
        self.k_retriever = k_retriever

        self.retriever = None
        self.rag_chain: Runnable = None

        self._load_vectorstore()
        self._setup_llm_and_chain()

    def _load_vectorstore(self):
        """Loads the FAISS vectorstore built earlier."""

        print(
            f"--- Loading FAISS Vectorstore from {self.vectorstore_path} ---")

        embeddings = HuggingFaceEmbeddings(model_name=self.embed_model_name)

        vectorstore = FAISS.load_local(
            folder_path=self.vectorstore_path,
            embeddings=embeddings,
            allow_dangerous_deserialization=True
        )

        self.retriever = vectorstore.as_retriever(
            search_kwargs={"k": self.k_retriever}
        )

        print("Vectorstore loaded successfully.")

    def _setup_llm_and_chain(self):
        """Initializes the LLM and builds the RAG chain."""

        print(f"--- Loading LLM model: {self.gen_model_name} ---")

        # 1️⃣ Load tokenizer
        tokenizer = AutoTokenizer.from_pretrained(
            self.gen_model_name, trust_remote_code=True
        )

        # 2️⃣ Load model on CPU first (required for disk offload)
        model = AutoModelForCausalLM.from_pretrained(
            self.gen_model_name,
            torch_dtype=torch.float32,
            trust_remote_code=True,
            device_map="auto",            # REQUIRED
        )

        # 4️⃣ Create generation pipeline
        pipe = pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer,
            max_new_tokens=300,
            temperature=0.2,
            top_p=0.95,
            do_sample=True,
        )

        llm = HuggingFacePipeline(pipeline=pipe)

        # RAG prompt
        template = """You are a helpful assistant. Answer the question using ONLY the provided context.
If the context does not contain the answer, say "I don't know".

Context:
{context}

Question: {question}
Answer:"""

        prompt = PromptTemplate.from_template(template)

        def format_context(docs):
            return "\n\n".join(doc.page_content for doc in docs)

        self.rag_chain = (
            {
                "context": self.retriever | format_context,
                "question": RunnablePassthrough(),
            }
            | prompt
            | llm
            | StrOutputParser()
        )

        print("RAG chain is ready.")

    def query(self, question: str) -> str:
        if not self.rag_chain:
            raise RuntimeError("RAG chain not initialized.")
        return self.rag_chain.invoke(question)

In [None]:
from pathlib import Path

In [None]:
!pip install gTTs



In [None]:
import re
from gtts import gTTS


def initialize_rag_pipeline():
    """
    Helper to initialize and run a basic RAGPipeline query loop.
    """
    vectorstore_dir = "./vectorstore"
    pipeline = RAGPipeline(vectorstore_dir)

    print("\nType a question to query the documents, or type 'exit' to quit.\n")
    while True:
        try:
            question = input("Your question: ").strip()
        except (EOFError, KeyboardInterrupt):
            print("\nExiting RAG pipeline.")
            break

        if question.lower() in ("exit", "quit"):
            print("Goodbye!")
            break

        if not question:
            continue

        try:
            answer = pipeline.query(question)
            print(f"\nAnswer:\n{answer}\n")
            answer = re.findall(r"Answer:\s*(.+)", answer)
            tts = gTTS(text=answer, lang='en')
            tts.save("answer.mp3")
        except Exception as e:
            print(f"Error during query: {e}")

In [None]:
vectorstore_dir = "./vectorstore"
vectorstore_path = Path(vectorstore_dir)

# Check if index files exist
pkl_index = vectorstore_path / "index.pkl"
faiss_index = vectorstore_path / "index.faiss"

if pkl_index.exists() and faiss_index.exists():
    print("Index files found. Running RAG pipeline...")
    initialize_rag_pipeline()
else:
    print("Index files not found. Please build the vectorstore first.")

Index files found. Running RAG pipeline...
--- Loading FAISS Vectorstore from vectorstore ---


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Vectorstore loaded successfully.
--- Loading LLM model: mistralai/Mistral-7B-Instruct-v0.3 ---


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/587k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/601 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.55G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Device set to use cuda:0


RAG chain is ready.

Type a question to query the documents, or type 'exit' to quit.

Your question: who is the Don?


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



Answer:
You are a helpful assistant. Answer the question using ONLY the provided context.
If the context does not contain the answer, say "I don't know".

Context:
To this day it’s jolting to see Brando as Don Corleone — 
the receded hairline, the gray pencil moustache, jowls 
hanging off a twisted mouth, and a voice cracked from 
years of command. Brando makes the character extraor-
dinarily complex largely through his physical expressive-
ness. He walks as if his shoulder blades were pinned be-
hind him (which emphasizes an old man’s paunch in 
front). But the sensibility beneath the authority is aston-
ishingly agile: the Don can suddenly break into mimicry, 
or turn his daughter in a waltz with a slight protective 
bent that catches sentiment in movement. Brando puts 
so much substance into his relatively few scenes, blowing 
hot and cold with equal eclat, that he enables Coppola to 
draw parallels between his sons and himself through nu-
ances at once fleeting and concrete. 
 
Ja