In [50]:
# Fully upgrade all core libraries to their latest, compatible versions
!pip install -U -q transformers sentence-transformers faiss-cpu PyPDF2 accelerate

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.9/40.9 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m67.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [51]:
# 📁 Upload PDF
from google.colab import files
uploaded = files.upload()

Saving GenAI_QA_Project_Interview_Questions.pdf to GenAI_QA_Project_Interview_Questions.pdf


In [57]:
# 📄 Read PDF
from PyPDF2 import PdfReader
def read_pdf(path):
  return "".join([p.extract_text() for p in PdfReader(path).pages])
pdf_text = read_pdf('GenAI_QA_Project_Interview_Questions.pdf')

In [53]:
# ✂️ Chunk the text
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
recursive_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=80,
    separators=["\n\n", "\n", " ", ""], # Default separators
    length_function=len,
    is_separator_regex=False,
)
chunks = recursive_splitter.split_text(pdf_text)

In [54]:
# 🧠 Embeddings using MiniLM
from sentence_transformers import SentenceTransformer
import numpy as np
embedder = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = np.array([e.numpy() for e in embedder.encode(chunks, convert_to_tensor=True)])

In [55]:
# 📦 Store in FAISS index
import faiss
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)

In [62]:
# 🔍 Retrieve top k chunks
def retrive_chunks(query,k=3):
  query_embedding = embedder.encode([query])
  D, I = index.search(query_embedding, k)
  return " ".join(chunks[i] for i in I[0])

In [None]:
# 💬 Load the Flan-T5 model for answering
# Load model directly
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")

In [70]:
#Ask the question
from transformers import pipeline
rag_pipe = pipeline("text2text-generation",model = model, tokenizer = tokenizer)
#Question
question = "What is the difference between GPT and T5 models?"
# Retrieve chunks and then truncate the context to fit the model's limit (512 tokens for T5)
context = retrive_chunks(question)
encoded_context = tokenizer.encode(context, max_length=512, truncation=True, return_tensors="pt")
truncated_context = tokenizer.decode(encoded_context[0], skip_special_tokens=True)

prompt = f"Use the following context to answer the question:\n\nContext: {context}\n\nQuestion: {question}"
response = rag_pipe(prompt)[0]["generated_text"]
print("📌 Answer:\n", response.split("[/INST]")[-1].strip())

Device set to use cpu


📌 Answer:
 GPT is a decoder-only model optimized for generation, while T5 is an encoder-decoder model suited for both understanding and generating text.
