<a href="https://colab.research.google.com/github/Sujatha2108/Chat-With-Your-PDF/blob/main/pdf_q%26a.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install flask pyngrok transformers sentence-transformers langchain faiss-cpu PyPDF2 langchain-community

Collecting pyngrok
  Downloading pyngrok-7.4.1-py3-none-any.whl.metadata (8.1 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting langchain-community
  Downloading langchain_community-0.4-py3-none-any.whl.metadata (3.0 kB)
INFO: pip is looking at multiple versions of langchain-community to determine which version is compatible with other requirements. This could take a while.
  Downloading langchain_community-0.3.31-py3-none-any.whl.metadata (3.0 kB)
Collecting requests (from transformers)
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting dataclasses-json<0.7.0,>=0.6.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7.0,>=0.6.7->langchain-community)
  Downloading marshmallow-3.26

In [2]:
!mkdir -p templates
!mkdir -p static
!mkdir -p uploads

In [3]:
%%writefile app.py
from flask import Flask, render_template, request
import os
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain_community.llms import HuggingFacePipeline
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
from PyPDF2 import PdfReader

UPLOAD_FOLDER = "uploads"
os.makedirs(UPLOAD_FOLDER, exist_ok=True)

app = Flask(__name__)

# ------------------- Load models -------------------
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
hf_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer, max_length=512)
llm = HuggingFacePipeline(pipeline=hf_pipeline)

# Global memory
vector_store = None
pdf_chunks = None

def process_pdf(file_path):
    reader = PdfReader(file_path)
    text = ""
    for page in reader.pages:
        page_text = page.extract_text()
        if page_text:
            text += page_text + "\n"
    return text

@app.route("/", methods=["GET", "POST"])
def home():
    global vector_store, pdf_chunks
    answer = ""
    uploaded_file = None

    if request.method == "POST":
        # Upload PDF
        if "pdf_file" in request.files:
            file = request.files["pdf_file"]
            if file.filename != "":
                uploaded_file = os.path.join(UPLOAD_FOLDER, file.filename)
                file.save(uploaded_file)

                text = process_pdf(uploaded_file)
                splitter = RecursiveCharacterTextSplitter(chunk_size=1200, chunk_overlap=100)
                pdf_chunks = splitter.split_text(text)

                vector_store = FAISS.from_texts(pdf_chunks, embeddings)

        # Ask Question
        if vector_store and "user_question" in request.form:
            question = request.form["user_question"]
            docs = vector_store.similarity_search(question, k=5)
            chain = load_qa_chain(llm, chain_type="refine")
            answer = chain.run(input_documents=docs, question=question)

    return render_template("index.html", answer=answer, uploaded_file=uploaded_file)

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=8000, debug=False)


Writing app.py


In [4]:
%%writefile templates/index.html
<!DOCTYPE html>
<html>
<head>
    <title>📄 PDF Q&A Chatbot</title>
    <link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
</head>
<body>
<div class="container">
    <h1>📄 PDF Q&A Chatbot</h1>

    <div class="card">
        <h2>Upload PDF Document</h2>
        <form method="post" enctype="multipart/form-data">
            <input type="file" name="pdf_file" accept=".pdf" required>
            <button type="submit">Upload & Process 🚀</button>
        </form>
        {% if uploaded_file %}
        <p>✅ Uploaded: {{ uploaded_file.split('/')[-1] }}</p>
        {% endif %}
    </div>

    <div class="card">
        <h2>Ask a Question</h2>
        <form method="post">
            <input type="text" name="user_question" placeholder="Enter your question..." required>
            <button type="submit">Ask 🤖</button>
        </form>
        {% if answer %}
        <div class="result">
            <h2>Answer:</h2>
            <p>{{ answer }}</p>
        </div>
        {% endif %}
    </div>
</div>
</body>
</html>


Writing templates/index.html


In [5]:
%%writefile static/style.css
body {
    font-family: 'Segoe UI', sans-serif;
    background: linear-gradient(135deg, #141E30, #243B55);
    color: white;
    display: flex;
    justify-content: center;
    align-items: center;
    height: 100vh;
    margin: 0;
}
.container {
    text-align: center;
    width: 60%;
}
h1, h2 {
    margin-bottom: 15px;
}
.card {
    background: rgba(255, 255, 255, 0.1);
    padding: 20px;
    border-radius: 12px;
    box-shadow: 0 0 15px rgba(0,0,0,0.4);
    margin-bottom: 20px;
}
input, button {
    width: 90%;
    margin: 10px 0;
    padding: 10px;
    border-radius: 8px;
    border: none;
}
button {
    background: #FFD700;
    cursor: pointer;
    font-weight: bold;
}
button:hover {
    background: #FFA500;
}
.result {
    margin-top: 20px;
    padding: 15px;
    background: rgba(255,255,255,0.1);
    border-radius: 10px;
}


Writing static/style.css


In [6]:
# 🔴 Kill old Flask/ngrok processes
!pkill -f flask || echo "No flask running"
!pkill -f ngrok || echo "No ngrok running"


^C
^C


In [7]:
# 🔎 List processes using port 8000
!lsof -i :8000


In [8]:
!kill -9 672


/bin/bash: line 1: kill: (672) - No such process


In [9]:
# Run Flask in background
!nohup python app.py > flask.log 2>&1 &

In [10]:
# Start ngrok tunnel
from pyngrok import ngrok, conf
conf.get_default().auth_token = "NGROK_TOKEN"
public_url = ngrok.connect(8000)
print("🌍 Public URL:", public_url)

# Optional: show Flask logs
!sleep 3 && tail -n 20 flask.log

🌍 Public URL: NgrokTunnel: "https://c03c757f5681.ngrok-free.app" -> "http://localhost:8000"


In [None]:
!tail -f flask.log


2025-10-25 14:13:00.289168: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1761401580.309092    1528 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1761401580.315052    1528 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1761401580.330481    1528 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1761401580.330509    1528 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1761401580.330511    1528 computation_placer.cc:177] computation placer alr



Device set to use cuda:0
  llm = HuggingFacePipeline(pipeline=hf_pipeline)
 * Serving Flask app 'app'
 * Debug mode: off
 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:8000
 * Running on http://172.28.0.12:8000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [25/Oct/2025 14:15:54] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [25/Oct/2025 14:15:54] "GET /static/style.css HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [25/Oct/2025 14:15:56] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [25/Oct/2025 14:18:55] "POST / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [25/Oct/2025 14:18:56] "[36mGET /static/style.css HTTP/1.1[0m" 304 -
stuff: https://python.langchain.com/docs/versions/migrating_chains/stuff_docs_chain
map_reduce: https://python.langchain.com/docs/versions/migrating_chains/map_reduce_chain
refine: https://python.langchain.com/docs/versions/migrating_chains/refine_chain
map_rerank: https://python.langchain.