In [10]:
!pip install -q streamlit PyPDF2 faiss-cpu sentence-transformers openai pyngrok

In [2]:
from getpass import getpass
import os

key = getpass("Enter your OpenAI API key (sk-...): ")
os.environ["OPENAI_API_KEY"] = key
print("API key set in environment (not displayed).")

Enter your OpenAI API key (sk-...): ··········
API key set in environment (not displayed).


In [11]:
%%writefile utils.py
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss

# Load embedding model once
embedder = SentenceTransformer('all-MiniLM-L6-v2')

def process_pdf(file, chunk_size=2000):
    """Extract text from PDF, split into chunks, create FAISS index."""
    reader = PdfReader(file)
    text = ""
    for page in reader.pages:
        text += page.extract_text() or ""
    chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]

    embeddings = embedder.encode(chunks, convert_to_numpy=True, show_progress_bar=True)
    embeddings = np.array(embeddings).astype('float32')

    index = faiss.IndexFlatL2(embeddings.shape[1])
    index.add(embeddings)
    return chunks, index

def embed_query(query):
    """Create embedding for a search query."""
    emb = embedder.encode([query], convert_to_numpy=True)
    return np.array(emb).astype('float32')

Overwriting utils.py


In [12]:
%%writefile app.py
import streamlit as st
import os
from openai import OpenAI
from utils import process_pdf, embed_query

# Initialize OpenAI client
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", ""))

# Session state setup
if "messages" not in st.session_state:
    st.session_state.messages = []
if "chunks" not in st.session_state:
    st.session_state.chunks = []
if "index" not in st.session_state:
    st.session_state.index = None

# App title
st.title("📄 Chat with Your PDF")

# File uploader
uploaded_file = st.file_uploader("Upload PDF", type="pdf")

# Process PDF when uploaded
if uploaded_file and st.session_state.index is None:
    with st.spinner("Processing PDF... Please wait."):
        chunks, index = process_pdf(uploaded_file)
        st.session_state.chunks = chunks
        st.session_state.index = index
    st.success(f"✅ PDF processed! {len(st.session_state.chunks)} chunks created.")

# Chat interface
if st.session_state.index:
    user_input = st.text_input("Ask a question about your PDF:")
    if st.button("Send") and user_input.strip():
        query_vector = embed_query(user_input)
        k = 3  # Number of chunks to retrieve
        D, I = st.session_state.index.search(query_vector, k)
        context = "\n".join([st.session_state.chunks[i] for i in I[0]])

        prompt = (
            f"Answer the question based on the context:\n\n{context}\n\n"
            f"Question: {user_input}"
        )

        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.0
        )

        answer = response.choices[0].message.content.strip()
        st.session_state.messages.append(("You", user_input))
        st.session_state.messages.append(("Bot", answer))""

    # Display chat history
    for role, message in st.session_state.messages:
        if role == "You":
            st.markdown(f"**You:** {message}")
        else:
            st.markdown(f"**Bot:** {message}")

Overwriting app.py


In [13]:
from pyngrok import ngrok
ngrok.set_auth_token("31BUdmbmtgnAKp8JWfrJZi31mi8_5Jdh4FbZUkMY1rc8VvNDz")

In [14]:
# Start Streamlit in background (nohup so it survives the cell finishing)
# Output is written to /content/streamlit.log
get_ipython().system_raw('nohup streamlit run app.py --server.port 8501 > /content/streamlit.log 2>&1 &')
print("Streamlit started (logs -> /content/streamlit.log)")

Streamlit started (logs -> /content/streamlit.log)


In [15]:
# Create public URL with pyngrok
from pyngrok import ngrok
public_url = ngrok.connect(8501, bind_tls=True)
print("Open this URL in your browser:", public_url)

Open this URL in your browser: NgrokTunnel: "https://2d139104d3cd.ngrok-free.app" -> "http://localhost:8501"
