### Install Dependencies

In [24]:
!pip install -U langchain chromadb sentence-transformers pypdf
!pip install -U transformers accelerate bitsandbytes


Collecting sentence-transformers
  Downloading sentence_transformers-5.0.0-py3-none-any.whl.metadata (16 kB)
Downloading sentence_transformers-5.0.0-py3-none-any.whl (470 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m470.2/470.2 kB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentence-transformers
  Attempting uninstall: sentence-transformers
    Found existing installation: sentence-transformers 4.1.0
    Uninstalling sentence-transformers-4.1.0:
      Successfully uninstalled sentence-transformers-4.1.0
Successfully installed sentence-transformers-5.0.0




KeyboardInterrupt: 

### Load and Split Documents

In [1]:
import os
from langchain_community.document_loaders import TextLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document

documents = []
data_folder = "/content/drive/MyDrive/Docs"  # 👈 your docs folder

# Load files and attach metadata
for filename in os.listdir(data_folder):
    filepath = os.path.join(data_folder, filename)
    file_ext = os.path.splitext(filename)[1].lower()
    if file_ext not in [".txt", ".pdf"]:
        continue

    metadata = {
        "source": filepath,
        "filename": filename,
        "filetype": file_ext
    }

    loader = TextLoader(filepath) if file_ext == ".txt" else PyPDFLoader(filepath)
    raw_docs = loader.load()

    for doc in raw_docs:
        doc.metadata.update(metadata)
        documents.append(doc)

print(f"✅ Loaded {len(documents)} base documents")

# Split into 300-character chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
chunked_docs = splitter.split_documents(documents)
print(f"✅ Split into {len(chunked_docs)} chunks")


✅ Loaded 5 base documents
✅ Split into 38 chunks


In [3]:
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma

embedding_model = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

vectordb = Chroma.from_documents(
    documents=chunked_docs,
    embedding=embedding_model,
    persist_directory="chroma_db"
)

vectordb.persist()
print("✅ Stored in ChromaDB")


  embedding_model = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


✅ Stored in ChromaDB


  vectordb.persist()


In [4]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline

model_id = "microsoft/phi-2"

bnb_config = BitsAndBytesConfig(load_in_8bit=True)

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    quantization_config=bnb_config
)

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,
    temperature=0.2,
    repetition_penalty=1.1
)

print("✅ LLM loaded and ready")


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0


✅ LLM loaded and ready


In [10]:
# Reload Chroma
retriever = vectordb.as_retriever()

# 🔍 Ask a question
question = "Why did Albert Einstein receive the Nobel Prize, and was it for his theory of relativity?"

# Retrieve chunks
retrieved_docs = retriever.invoke(question)
retrieved_context = "\n\n".join([doc.page_content for doc in retrieved_docs])

# Generate prompt
prompt = f"""
You are a helpful assistant.

Context:
{retrieved_context}

Question:
{question}

Instruction:
Answer the question using only the context above. Be simple, clear, and factual. Avoid adding extra information.
Answer:
"""

# Generate answer
response = pipe(prompt)[0]["generated_text"]
print("🤖 Answer:\n", response)


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


🤖 Answer:
 
You are a helpful assistant.

Context:
1921 – Albert Einstein (Germany)
Contrary to common belief, Einstein did not win the Nobel for relativity, but for his explanation of the photoelectric effect — a critical validation of quantum mechanics.

1921 – Albert Einstein (Germany)
Contrary to common belief, Einstein did not win the Nobel for relativity, but for his explanation of the photoelectric effect — a critical validation of quantum mechanics.

1927 – Arthur H. Compton (USA)
Honored for the Compton Effect, which confirmed the particle nature of electromagnetic radiation and solidified duality theory.

1921 – Albert Einstein (Germany)
Contrary to common belief, Einstein did not win the Nobel for relativity, but for his explanation of the photoelectric effect — a critical validation of quantum mechanics.

1927 – Arthur H. Compton (USA)
Honored for the Compton Effect, which confirmed the particle nature of electromagnetic radiation and solidified duality theory.

1921 – Albe