#Imports

In [None]:
!pip install transformers streamlit pyngrok pdfplumber python-docx -q


#Build Models and Needed Functions

In [2]:

from transformers import pipeline
import streamlit as st


summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")

def summarize_text(text, max_len=130, min_len=30):
    """Generate summary from input text."""
    summary = summarizer(text, max_length=max_len, min_length=min_len, do_sample=False)
    return summary[0]['summary_text']

def answer_question(context, question):
    """Answer a question based on the context text."""
    result = qa_pipeline(question=question, context=context)
    return result['answer']

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m56.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m99.9 MB/s[0m eta [36m0:00:00[0m
[?25h

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0


config.json:   0%|          | 0.00/473 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/261M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

Device set to use cuda:0


#Deployment

In [25]:
%%writefile app.py
import streamlit as st
from transformers import pipeline
import pdfplumber
import torch

device = 0 if torch.cuda.is_available() else -1

summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=device)
qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", device=device)

def read_pdf(file):
    text = ""
    with pdfplumber.open(file) as pdf:
        for page in pdf.pages:
            text += page.extract_text() or ""
    return text

def read_txt(file):
    return file.read().decode("utf-8")

def chunk_text(text, max_chunk_size=1000):
    sentences = text.split(". ")
    chunks, current_chunk = [], ""

    for sentence in sentences:
        if len(current_chunk) + len(sentence) + 1 <= max_chunk_size:
            current_chunk += sentence + ". "
        else:
            chunks.append(current_chunk.strip())
            current_chunk = sentence + ". "
    if current_chunk:
        chunks.append(current_chunk.strip())

    return chunks

st.title("📘 AI Text Summarizer & Q&A App")
st.markdown("Upload a PDF or TXT file, or paste text directly, then summarize or ask questions about it!")

uploaded_file = st.file_uploader("Upload a file (.pdf or .txt)", type=["pdf", "txt"])
text_input = st.text_area("Or paste your text here:", height=200)

if uploaded_file is not None:
    if uploaded_file.name.endswith(".pdf"):
        text_input = read_pdf(uploaded_file)
    elif uploaded_file.name.endswith(".txt"):
        text_input = read_txt(uploaded_file)

if st.button("Summarize Text"):
    if text_input:
        chunks = chunk_text(text_input, max_chunk_size=1000)
        st.write(f"🔹 Document split into {len(chunks)} chunks.")

        summaries = []
        for i, chunk in enumerate(chunks):
            st.write(f"Summarizing chunk {i+1}/{len(chunks)}...")
            summary = summarizer(chunk, max_length=200, min_length=30, do_sample=False)[0]['summary_text']
            summaries.append(summary)

        final_summary = " ".join(summaries)
        st.subheader("📝 Summary")
        st.write(final_summary)
    else:
        st.warning("Please upload or enter text first!")

st.subheader("💬 Ask a Question about the Text")
question = st.text_input("Enter your question here:")

if st.button("Get Answer"):
    if text_input and question:
        try:
            answer = qa_pipeline(question=question, context=text_input)['answer']
            st.success(f"**Answer:** {answer}")
        except Exception as e:
            st.error(f"Error while answering: {e}")
    else:
        st.warning("Please enter both a question and text.")


Overwriting app.py


In [5]:

from pyngrok import ngrok

!ngrok authtoken 33fLuUGd0W0b3hxMtmN0QgJvLsi_As5QsijWrWBvHDeBA3oC



Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [6]:

from pyngrok import ngrok

ngrok.kill()

get_ipython().system_raw('streamlit run app.py --server.port 8501 &')

public_url = ngrok.connect(8501)
print("Public URL:", public_url)

Public URL: NgrokTunnel: "https://retta-acidy-nongenerically.ngrok-free.dev" -> "http://localhost:8501"
