In [None]:
!pip install -q langchain langchain-community langchain-huggingface faiss-cpu sentence-transformers transformers accelerate pypdf


In [None]:
from google.colab import files
uploaded = files.upload()


Saving ai_notes.pdf to ai_notes (4).pdf


In [None]:
# ==============================
# FREE RAG IMPLEMENTATION
# ==============================

from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import torch

# ----------------------------
# 1️⃣ Load PDF
# ----------------------------
loader = PyPDFLoader("ai_notes.pdf")
documents = loader.load()
print(f"✅ Loaded {len(documents)} pages")

# ----------------------------
# 2️⃣ Text Chunking
# ----------------------------
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=100
)

texts = text_splitter.split_documents(documents)
print(f"✅ Total Chunks: {len(texts)}")

# ----------------------------
# 3️⃣ Create Embeddings
# ----------------------------
embedding = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

# ----------------------------
# 4️⃣ FAISS Vector Store
# ----------------------------
vectorstore = FAISS.from_documents(texts, embedding)
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

# ----------------------------
# 5️⃣ Load FREE Text Generation Model
# ----------------------------
model_name = "google/flan-t5-base"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

pipe = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=256,
    temperature=0
)

llm = HuggingFacePipeline(pipeline=pipe)

# ----------------------------
# 6️⃣ Create RAG Chain
# ----------------------------
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever
)

# ----------------------------
# 7️⃣ Ask Question Function
# ----------------------------
def ask_question(query):
    response = qa_chain.invoke({"query": query})
    return response["result"]


✅ Loaded 200 pages
✅ Total Chunks: 1400


tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Device set to use cpu
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
  llm = HuggingFacePipeline(pipeline=pipe)


In [None]:
print("Q1:", ask_question("What is Artificial Intelligence?"))
print("\nQ2:", ask_question("Explain supervised learning."))
print("\nQ3:", ask_question("What are applications of AI?"))


Q1: the branch of computer science that aims to create machines that can perform tasks requiring human intelligence

Q2: I don't know.

Q3: healthcare, finance, robotics, autonomous vehicles, recommendation systems, and more
