<a href="https://colab.research.google.com/github/Aamina0/Project-1/blob/main/GIKI_RAG_Chatbot_FlanT5_FULL_(1).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 📘 GIKI Prospectus Q&A Chatbot (Fast RAG with Flan‑T5)
This Colab notebook sets up a **Retrieval‑Augmented Generation (RAG)** chatbot using:
- **multilingual-e5** embeddings for retrieval
- **google/flan-t5-large** for fast answer generation
- **FAISS** for vector search
- **Streamlit** UI + **Cloudflare Tunnel** for a public URL (no ngrok limits)

**Steps**
1) Install deps  2) Mount Drive  3) Create project files  4) Launch app


In [1]:
# ✅ 1) Install dependencies
!pip -q install streamlit==1.36.0 transformers==4.42.4 accelerate sentence-transformers faiss-cpu \
               pypdf python-docx cloudflared -U


[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/43.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.6/8.6 MB[0m [31m60.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.3/9.3 MB[0m [31m58.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m374.9/374.9 kB[0m [31m25.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m17.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m310.5/310.5 kB[0m [31m22.3 MB/s[0m eta [36m0:00

In [2]:
# ✅ 2) Mount Google Drive and set project working directory
from google.colab import drive
drive.mount('/content/drive')

PROJECT_DIR = '/content/drive/MyDrive/Project5'
import os
os.makedirs(PROJECT_DIR, exist_ok=True)
%cd "$PROJECT_DIR"
print('Working directory:', PROJECT_DIR)


Mounted at /content/drive
/content/drive/MyDrive/Project1
Working directory: /content/drive/MyDrive/Project1


In [3]:
# ✅ 3) Create config.py
%%writefile config.py
EMBEDDING_MODEL_ID = "intfloat/multilingual-e5-base"
LLM_MODEL_ID = "google/flan-t5-large"

# Retrieval
TOP_K = 4
SIMILARITY_THRESHOLD = 0.25  # cosine similarity; increase for stricter filtering

# Chunking (word-based)
CHUNK_TOKENS = 300
CHUNK_OVERLAP = 50

# UI
APP_TITLE = "📘 GIKI Prospectus Q&A (Fast: Flan‑T5)"
DEFAULT_LANG = "English"


Overwriting config.py


In [4]:
# ✅ 4) Create llm_t5.py (fast seq2seq wrapper)
%%writefile llm_t5.py
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

class FlanT5Wrapper:
    def __init__(self, model_id: str):
        self.tokenizer = AutoTokenizer.from_pretrained(model_id)
        self.model = AutoModelForSeq2SeqLM.from_pretrained(
            model_id,
            device_map="auto",
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
        )

    def generate(self, system: str, user: str, max_new_tokens=300, temperature=0.0):
        prompt = f"{system}\n\n{user}"
        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
        outputs = self.model.generate(
            **inputs,
            do_sample=(temperature > 0.0),
            temperature=temperature if temperature > 0 else None,
            max_new_tokens=max_new_tokens,
            num_beams=1,
            early_stopping=True,
        )
        return self.tokenizer.decode(outputs[0], skip_special_tokens=True)


Overwriting llm_t5.py


In [5]:
# ✅ 5) Create embeddings.py (E5 + cosine normalisation)
%%writefile embeddings.py
import numpy as np
from sentence_transformers import SentenceTransformer

def _l2_normalize(mat: np.ndarray) -> np.ndarray:
    norms = np.linalg.norm(mat, axis=1, keepdims=True)
    norms = np.maximum(norms, 1e-12)
    return mat / norms

class E5Embeddings:
    def __init__(self, model_id: str):
        self.model = SentenceTransformer(model_id)

    def encode_passages(self, passages):
        vecs = self.model.encode(passages, convert_to_numpy=True, normalize_embeddings=False)
        return _l2_normalize(vecs).astype('float32')

    def encode_queries(self, queries):
        vecs = self.model.encode(queries, convert_to_numpy=True, normalize_embeddings=False)
        return _l2_normalize(vecs).astype('float32')


Overwriting embeddings.py


In [6]:
# ✅ 6) Create ingest.py (PDF/DOCX readers + chunker)
%%writefile ingest.py
import os
from pypdf import PdfReader
import docx

def read_pdf(file_path):
    records = []
    reader = PdfReader(file_path)
    for page_num, page in enumerate(reader.pages, start=1):
        try:
            text = page.extract_text() or ""
        except Exception:
            text = ""
        text = text.strip()
        if text:
            records.append({
                "page": page_num,
                "text": text,
                "file": os.path.basename(file_path)
            })
    return records

def read_docx(file_path):
    records = []
    d = docx.Document(file_path)
    paras = [p.text for p in d.paragraphs if p.text and p.text.strip()]
    text = "\n".join(paras)
    if text.strip():
        records.append({
            "page": 1,
            "text": text,
            "file": os.path.basename(file_path)
        })
    return records

def chunk_records(records, chunk_size=300, overlap=50):
    chunks = []
    for rec in records:
        words = rec["text"].split()
        start = 0
        while start < len(words):
            end = min(start + chunk_size, len(words))
            chunk_text = " ".join(words[start:end])
            chunks.append({
                "chunk": chunk_text,
                "file": rec["file"],
                "page": rec["page"],
            })
            if end == len(words):
                break
            start = max(0, end - overlap)
    return chunks


Overwriting ingest.py


In [7]:
# ✅ 7) Create indexer.py (FAISS cosine-sim with inner product)
%%writefile indexer.py
import faiss
import numpy as np

def build_faiss(embeddings: np.ndarray):
    dim = embeddings.shape[1]
    index = faiss.IndexFlatIP(dim)  # inner product; use with normalized vectors
    index.add(embeddings.astype('float32'))
    return index

def search(index, query_vec: np.ndarray, k=4):
    scores, idxs = index.search(query_vec.astype('float32'), k)
    return scores, idxs


Overwriting indexer.py


In [8]:
# ✅ 8) Create rag.py (prompt builder + guard)
%%writefile rag.py
def build_user_prompt(question, chunks, lang="English"):
    context_lines = []
    for c in chunks:
        context_lines.append(f"[Source: {c['file']} (page {c['page']})]\n{c['chunk']}")
    context = "\n\n".join(context_lines)
    return (
        f"Answer the question in {lang} using only the context below. "
        f"Cite sources as (file, page).\n\n"
        f"Context:\n{context}\n\nQuestion: {question}"
    )

def guard_answer(scores, raw_answer, lang="English", threshold=0.25):
    # scores are cosine similarities because we use normalized vectors + IP
    best = float(scores[0][0]) if len(scores) and len(scores[0]) else 0.0
    if best < threshold:
        return (
            "Sorry, I couldn't find a reliable answer in the uploaded documents. "
            "Please provide more specific material or ask a different question."
        )
    return raw_answer


Overwriting rag.py


In [9]:
!pip install transformers==4.41.2 sentence-transformers==2.6.1 --force-reinstall


Collecting transformers==4.41.2
  Downloading transformers-4.41.2-py3-none-any.whl.metadata (43 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/43.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.8/43.8 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentence-transformers==2.6.1
  Downloading sentence_transformers-2.6.1-py3-none-any.whl.metadata (11 kB)
Collecting filelock (from transformers==4.41.2)
  Downloading filelock-3.19.1-py3-none-any.whl.metadata (2.1 kB)
Collecting huggingface-hub<1.0,>=0.23.0 (from transformers==4.41.2)
  Downloading huggingface_hub-0.34.4-py3-none-any.whl.metadata (14 kB)
Collecting numpy>=1.17 (from transformers==4.41.2)
  Downloading numpy-2.3.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.1/62.1 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting p

In [13]:
# ✅ 9) Create app.py (Streamlit UI)
%%writefile app.py
import os, tempfile
import streamlit as st
from config import APP_TITLE, DEFAULT_LANG, TOP_K, CHUNK_TOKENS, CHUNK_OVERLAP, EMBEDDING_MODEL_ID, LLM_MODEL_ID
from ingest import read_pdf, read_docx, chunk_records
from embeddings import E5Embeddings
from indexer import build_faiss, search
from llm_t5 import FlanT5Wrapper
from rag import build_user_prompt, guard_answer

st.set_page_config(page_title=APP_TITLE, page_icon="📘", layout="wide")
st.title(APP_TITLE)

# --- Session State Initialization ---
if "embedder" not in st.session_state:
    st.session_state.embedder = E5Embeddings(EMBEDDING_MODEL_ID)

if "llm" not in st.session_state:
    st.session_state.llm = None

def ensure_llm_loaded():
    if st.session_state.llm is None:
        with st.spinner("Loading Flan‑T5 model (fast)…"):
            st.session_state.llm = FlanT5Wrapper(LLM_MODEL_ID)

if "index" not in st.session_state:
    st.session_state.index = None
if "meta" not in st.session_state:
    st.session_state.meta = None
if "uploaded_files" not in st.session_state:
    st.session_state.uploaded_files = []

# --- Sidebar ---
lang = st.sidebar.radio("Answer Language", ["English", "Urdu"], index=0 if DEFAULT_LANG=="English" else 1)
st.sidebar.write("\n")
if st.sidebar.button("Reset App"):
    for k in list(st.session_state.keys()):
        del st.session_state[k]
    st.experimental_rerun()

# --- Upload ---
files = st.file_uploader("Upload up to 5 documents (PDF/DOCX)", type=["pdf","docx"], accept_multiple_files=True)
if files:
    st.session_state.uploaded_files = files

# --- Build Index ---
if st.button("Build Index"):
    if not st.session_state.uploaded_files:
        st.warning("Please upload at least one document first.")
    else:
        all_records = []
        for f in st.session_state.uploaded_files[:5]:
            with tempfile.NamedTemporaryFile(delete=False) as tmp:
                tmp.write(f.read())
                tmp_path = tmp.name
            if f.name.lower().endswith(".pdf"):
                all_records.extend(read_pdf(tmp_path))
            else:
                all_records.extend(read_docx(tmp_path))
            os.remove(tmp_path)

        chunks = chunk_records(all_records, CHUNK_TOKENS, CHUNK_OVERLAP)
        passages = [c["chunk"] for c in chunks]
        emb = st.session_state.embedder.encode_passages(passages)
        index = build_faiss(emb)

        st.session_state.index = index
        st.session_state.meta = chunks

        st.success(f"✅ Index built with {len(chunks)} chunks")

# --- Question ---
question = st.text_input("Ask a question about the documents:")

# --- Get Answer ---
if st.button("Get Answer"):
    if st.session_state.index is None or st.session_state.meta is None:
        st.warning("Please upload and build the index first.")
    elif not question.strip():
        st.warning("Please enter a question.")
    else:
        ensure_llm_loaded()
        q_vec = st.session_state.embedder.encode_queries([question])
        scores, idxs = search(st.session_state.index, q_vec, k=TOP_K)
        picked = [st.session_state.meta[int(i)] for i in idxs[0] if int(i) >= 0]

        user_prompt = build_user_prompt(question, picked, lang=lang)
        system_msg = (
            "You are a helpful assistant that answers using ONLY the provided context. "
            "If the answer is not present, say so clearly. Always include (file, page) citations."
        )

        with st.spinner("Generating answer…"):
            raw_answer = st.session_state.llm.generate(
                system=system_msg,
                user=user_prompt,
                max_new_tokens=300
            )
            final_answer = guard_answer(scores, raw_answer, lang=lang)

        st.subheader("Answer")
        st.write(final_answer)

        with st.expander("Show retrieved chunks"):
            for j, c in enumerate(picked, start=1):
                st.markdown(f"**{j}. {c['file']} (page {c['page']})**\n\n{c['chunk']}")


Overwriting app.py


In [14]:
!wget -q https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64.deb
!dpkg -i cloudflared-linux-amd64.deb


(Reading database ... (Reading database ... 5%(Reading database ... 10%(Reading database ... 15%(Reading database ... 20%(Reading database ... 25%(Reading database ... 30%(Reading database ... 35%(Reading database ... 40%(Reading database ... 45%(Reading database ... 50%(Reading database ... 55%(Reading database ... 60%(Reading database ... 65%(Reading database ... 70%(Reading database ... 75%(Reading database ... 80%(Reading database ... 85%(Reading database ... 90%(Reading database ... 95%(Reading database ... 100%(Reading database ... 126375 files and directories currently installed.)
Preparing to unpack cloudflared-linux-amd64.deb ...
Unpacking cloudflared (2025.8.1) over (2025.8.1) ...
Setting up cloudflared (2025.8.1) ...
Processing triggers for man-db (2.10.2-1) ...


In [15]:
# ✅ Run Streamlit with Cloudflare Tunnel after installing binary
import subprocess, re

# Kill previous processes
!pkill -f streamlit || true
!pkill -f cloudflared || true

port = 8501

# Start Streamlit in the background
streamlit_proc = subprocess.Popen(
    ["streamlit", "run", "app.py", "--server.port", str(port), "--server.headless", "true"],
    stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
)

# Start Cloudflare Tunnel
cf_proc = subprocess.Popen(
    ["cloudflared", "tunnel", "--url", f"http://localhost:{port}"],
    stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
)

# Extract public URL in real-time
print("⏳ Waiting for Cloudflare public URL...")
public_url = None
while True:
    line = cf_proc.stdout.readline()
    if not line:
        break
    print(line.strip())  # Show logs for debugging
    match = re.search(r"https://[a-zA-Z0-9-]+\.trycloudflare\.com", line)
    if match:
        public_url = match.group(0)
        break

print("\n✅ Your Streamlit app is live at:", public_url)
print("Keep this cell running while you use the app.")


^C
^C
⏳ Waiting for Cloudflare public URL...
2025-08-26T18:55:49Z INF Thank you for trying Cloudflare Tunnel. Doing so, without a Cloudflare account, is a quick way to experiment and try it out. However, be aware that these account-less Tunnels have no uptime guarantee, are subject to the Cloudflare Online Services Terms of Use (https://www.cloudflare.com/website-terms/), and Cloudflare reserves the right to investigate your use of Tunnels for violations of such terms. If you intend to use Tunnels in production you should use a pre-created named tunnel by following: https://developers.cloudflare.com/cloudflare-one/connections/connect-apps
2025-08-26T18:55:49Z INF Requesting new quick Tunnel on trycloudflare.com...
2025-08-26T18:55:54Z INF +--------------------------------------------------------------------------------------------+
2025-08-26T18:55:54Z INF |  Your quick Tunnel has been created! Visit it at (it may take some time to be reachable):  |
2025-08-26T18:55:54Z INF |  https://