In [21]:
from transformers import AutoTokenizer, TFAutoModelForSeq2SeqLM, pipeline

model_path = "E:/Models/bart_model_tf"

# Load tokenizer and TensorFlow model
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False)
model = TFAutoModelForSeq2SeqLM.from_pretrained(model_path)

# Load summarization pipeline
summarizer = pipeline("summarization", model=model, tokenizer=tokenizer)


All model checkpoint layers were used when initializing TFBartForConditionalGeneration.

All the layers of TFBartForConditionalGeneration were initialized from the model checkpoint at E:/Models/bart_model_tf.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBartForConditionalGeneration for predictions without further training.


In [10]:
!pip install PyMuPDF



In [22]:
import fitz
def extract_text_from_pdf(pdf_file):
    text = ""
    #Works with Gradio File object in local environment
    with fitz.open(pdf_file.name) as doc:
        for page in doc:
            text += page.get_text()
    return text

def summarize_text(text):
    if len(text) > 1024:
        text = text[:1024]  # Truncate if too long
    result = summarizer(text, max_length=250, min_length=60, do_sample=False)
    return result[0]["summary_text"]

def summarize_input(input_text=None, pdf_file=None):
    if pdf_file:
        text = extract_text_from_pdf(pdf_file)
    elif input_text:
        text = input_text
    else:
        return "Please provide either a PDF or text input."
    return summarize_text(text)

In [23]:
import gradio as gr

def summarize_handler(text, file):
    return summarize_input(input_text=text, pdf_file=file)

with gr.Blocks() as demo:
    gr.Markdown("## Smart Document Summarizer (Local Version)")

    with gr.Row():
        text_input = gr.Textbox(lines=10, label="Enter Text (Optional)")
        file_input = gr.File(label="Upload PDF (Optional)", file_types=[".pdf"])

    summarize_btn = gr.Button("Summarize")
    output = gr.Textbox(label="Summary")

    summarize_btn.click(summarize_handler, inputs=[text_input, file_input], outputs=output)

demo.launch()


* Running on local URL:  http://127.0.0.1:7867
* To create a public link, set `share=True` in `launch()`.


