In [1]:
pip install streamlit openai PyPDF2 python-docx tiktoken


Collecting streamlit
  Downloading streamlit-1.49.1-py3-none-any.whl.metadata (9.5 kB)
Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting python-docx
  Downloading python_docx-1.2.0-py3-none-any.whl.metadata (2.0 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.49.1-py3-none-any.whl (10.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.0/10.0 MB[0m [31m69.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m17.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading python_docx-1.2.0-py3-none-any.whl (252 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m253.0/253.0 kB[0m [31m21.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━

In [3]:
# text_summarizer_streamlit.py
# Popla's AI Text Summarizer (no PIN required)
# Ready-to-run Streamlit app that accepts paste or file (PDF/TXT/DOCX)
# Uses OpenAI ChatCompletion. Set OPENAI_API_KEY in your environment.

import os
import io
import textwrap
from typing import List

import openai
import streamlit as st
from PyPDF2 import PdfReader

try:
    from docx import Document as DocxDocument
    DOCX_AVAILABLE = True
except Exception:
    DOCX_AVAILABLE = False

# ---------------------- Configuration ----------------------
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
if OPENAI_API_KEY is None:
    openai.api_key = None
else:
    openai.api_key = OPENAI_API_KEY

DEFAULT_MODEL = "gpt-4o-mini"  # change if needed
MAX_CHUNK_TOKENS = 2000  # approx char-based chunking size

# ---------------------- Helpers ----------------------
def read_pdf(file_bytes: bytes) -> str:
    try:
        reader = PdfReader(io.BytesIO(file_bytes))
        text_pages = []
        for p in range(len(reader.pages)):
            page = reader.pages[p]
            try:
                text_pages.append(page.extract_text() or "")
            except Exception:
                text_pages.append("")
        return "\n\n".join(text_pages)
    except Exception:
        return ""

def read_txt(file_bytes: bytes) -> str:
    try:
        return file_bytes.decode('utf-8', errors='ignore')
    except Exception:
        return file_bytes.decode('latin-1', errors='ignore')

def read_docx(file_bytes: bytes) -> str:
    if not DOCX_AVAILABLE:
        return ""
    with io.BytesIO(file_bytes) as f:
        doc = DocxDocument(f)
        paragraphs = [p.text for p in doc.paragraphs]
        return "\n\n".join(paragraphs)

def split_text_into_chunks(text: str, max_chunk_chars: int = 3500) -> List[str]:
    text = text.strip()
    if not text:
        return []
    sentences = text.split('.')
    chunks = []
    current = []
    current_len = 0
    for s in sentences:
        s = s.strip()
        if not s:
            continue
        s_full = s + '. '
        if current_len + len(s_full) > max_chunk_chars and current:
            chunks.append(''.join(current).strip())
            current = [s_full]
            current_len = len(s_full)
        else:
            current.append(s_full)
            current_len += len(s_full)
    if current:
        chunks.append(''.join(current).strip())
    return chunks

def _simple_extractive_summary(text: str, max_sentences: int = 5) -> str:
    sentences = [s.strip() for s in text.replace('\n', ' ').split('.') if s.strip()]
    sentences = sorted(sentences, key=lambda s: len(s), reverse=True)
    top = sentences[:max_sentences]
    return '. '.join(top).strip() + ('.' if top else '')

def summarize_with_openai(chunks: List[str], prompt_instructions: str, model: str = DEFAULT_MODEL) -> str:
    if openai.api_key is None:
        raise RuntimeError("OPENAI_API_KEY not set. Set environment variable and restart the app.")

    summaries = []
    for i, chunk in enumerate(chunks):
        system_prompt = "You are a helpful assistant that produces clear, concise summaries."
        user_prompt = f"{prompt_instructions}\n\nText to summarize:\n" + chunk
        try:
            resp = openai.ChatCompletion.create(
                model=model,
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt},
                ],
                temperature=0.2,
                max_tokens=800,
            )
            chunk_summary = resp['choices'][0]['message']['content'].strip()
        except Exception:
            chunk_summary = _simple_extractive_summary(chunk)
        summaries.append(chunk_summary)

    if len(summaries) == 0:
        return ""
    if len(summaries) == 1:
        return summaries[0]

    combined_input = "\n\n".join([f"PART {i+1}: {s}" for i, s in enumerate(summaries)])
    final_prompt = f"Combine the following partial summaries into one coherent, concise summary. Keep it in the requested style.\n\n{combined_input}"
    try:
        resp2 = openai.ChatCompletion.create(
            model=model,
            messages=[
                {"role": "system", "content": "You are a helpful assistant that merges partial summaries into a single concise summary."},
                {"role": "user", "content": final_prompt},
            ],
            temperature=0.2,
            max_tokens=900,
        )
        return resp2['choices'][0]['message']['content'].strip()
    except Exception:
        return "\n\n".join(summaries)

# ---------------------- Streamlit UI ----------------------
def main():
    st.set_page_config(page_title="AI Text Summarizer — Popla", layout="centered")
    st.title("Popla's AI Text Summarizer")
    st.caption("Paste text or upload a file (PDF/TXT/DOCX). No pins — just your OpenAI key.")

    with st.expander("How this works", expanded=False):
        st.markdown(
            "- Paste text directly into the box, or upload a file (PDF/TXT/DOCX).\n"
            "- Choose summary length and style.\n"
            "- Set your OPENAI_API_KEY as an environment variable.\n"
            "- Long text will be chunked, summarized, and merged."
        )

    col1, col2 = st.columns([3, 1])

    with col1:
        input_mode = st.radio("Input mode", options=["Paste / Type text", "Upload file"], index=0)
        user_text = ""
        uploaded_file = None
        if input_mode == "Paste / Type text":
            user_text = st.text_area("Enter text to summarize", height=300)
        else:
            uploaded_file = st.file_uploader("Upload a PDF / TXT / DOCX file", type=["pdf", "txt", "docx"])

        lang = st.selectbox("Summary language", options=["English", "Hindi", "Kannada", "Tamil", "Marathi", "Bengali"], index=0)
        length_option = st.selectbox("Summary length", options=["Very short (1-2 lines)", "Short (bullet points)", "Medium (1 paragraph)", "Long (detailed)"], index=1)
        style_option = st.selectbox("Style", options=["Concise", "Bullet points", "Explain like I'm 5", "Technical"], index=0)
        model_choice = st.text_input("Model (change if you have different model name)", value=DEFAULT_MODEL)
        summarize_button = st.button("Summarize")

    with col2:
        st.markdown("### Settings")
        st.write("No PIN — just your OpenAI API key in environment variables.")
        st.write(f"Max chunk size: ~{MAX_CHUNK_TOKENS} chars")
        if not DOCX_AVAILABLE:
            st.info("python-docx not installed — DOCX uploads will be ignored. Install: pip install python-docx")

    if input_mode == "Upload file" and uploaded_file is not None:
        file_bytes = uploaded_file.read()
        filename = uploaded_file.name.lower()
        if filename.endswith('.pdf'):
            extracted_text = read_pdf(file_bytes)
        elif filename.endswith('.txt'):
            extracted_text = read_txt(file_bytes)
        elif filename.endswith('.docx'):
            extracted_text = read_docx(file_bytes)
        else:
            extracted_text = ""
        user_text = extracted_text

    if summarize_button:
        if not user_text or user_text.strip() == "":
            st.error("Please paste text or upload a file with some text before summarizing.")
            st.stop()

        if openai.api_key is None:
            st.error("OPENAI_API_KEY not found. Set it in your environment (export OPENAI_API_KEY=\"sk-...\") and restart the app.")
            st.stop()

        with st.spinner("Preparing summary — chunking text and calling the model..."):
            chunks = split_text_into_chunks(user_text, max_chunk_chars=MAX_CHUNK_TOKENS)
            prompt_parts = []
            prompt_parts.append(f"Produce a {length_option.lower()} summary in {lang}.")
            if style_option == 'Bullet points':
                prompt_parts.append("Make the summary bullet points, each on its own line.")
            elif style_option == "Explain like I'm 5":
                prompt_parts.append("Explain the main ideas simply, as if to a 5-year-old.")
            elif style_option == 'Technical':
                prompt_parts.append("Use technical language and include key terminology and metrics where appropriate.")
            else:
                prompt_parts.append("Keep it concise and to the point.")

            prompt_instructions = ' '.join(prompt_parts)
            try:
                final_summary = summarize_with_openai(chunks, prompt_instructions, model=model_choice)
            except Exception as e:
                st.error(f"Failed to summarize: {e}")
                st.stop()

        st.success("Summary ready")
        st.markdown("---")
        st.subheader("Summary")
        st.write(final_summary)

        with st.expander("Download / copy options", expanded=True):
            st.download_button("Download summary as .txt", data=final_summary, file_name="summary.txt")
            st.code(final_summary)

        with st.expander("Original (first 4000 chars)"):
            st.write(textwrap.shorten(user_text, width=4000, placeholder='...'))

    st.markdown("---")
    st.caption("Built with ❤️ by Popla. For very heavy docs, consider increasing chunk size or using a higher-capacity model.")

if __name__ == '__main__':
    main()

2025-09-18 16:38:26.357 
  command:

    streamlit run /usr/local/lib/python3.12/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2025-09-18 16:38:26.377 Session state does not function when running a script without `streamlit run`
