In [1]:
!pip install transformers gradio




In [2]:
import gradio as gr
from transformers import pipeline
import re

# Initialize the summarization pipeline
summarizer = pipeline("summarization")

# Function to split text into chunks
def chunk_text(text, max_chunk_size=512):
    sentences = re.split(r'(?<=[.!?]) +', text)  # Split by sentence endings
    chunks = []
    current_chunk = ""

    for sentence in sentences:
        if len(current_chunk.split()) + len(sentence.split()) <= max_chunk_size:
            current_chunk += sentence + " "
        else:
            chunks.append(current_chunk.strip())
            current_chunk = sentence + " "
    if current_chunk:
        chunks.append(current_chunk.strip())

    return chunks

# Function to summarize text
def summarize(text, file):
    # Handle file upload
    if file is not None:
        try:
            # Check if the uploaded file is a .txt file based on file extension
            if not file.name.endswith(".txt"):
                return "Only .txt files are supported. Please upload a valid text file.", 0, 0, None

            # Open and read the content of the uploaded file
            with open(file.name, 'r', encoding='utf-8') as f:
                text = f.read()  # Read the text from the file
        except Exception as e:
            return f"Error reading file: {str(e)}", 0, 0, None

    # Check if text is provided
    if not text.strip():
        return "Please provide some text or upload a document.", 0, 0, None

    # Count words in the input text
    word_count = len(text.split())

    # Calculate target summary length (40% of the original word count)
    target_summary_length = max(1, int(word_count * 0.4))

    # Split the text into smaller chunks
    chunks = chunk_text(text, max_chunk_size=512)

    # Summarize each chunk separately
    summarized_chunks = [summarizer(chunk, max_length=200, min_length=100, do_sample=False)[0]['summary_text'] for chunk in chunks]

    # Join all the summarized chunks into a single summary
    summary = ' '.join(summarized_chunks)

    # Count the words in the summarized text
    summary_word_count = len(summary.split())

    # If the summary is too short, include more chunks
    while summary_word_count < target_summary_length and chunks:
        next_chunk = chunks.pop(0)  # Get the next chunk
        next_summary = summarizer(next_chunk, max_length=200, min_length=100, do_sample=False)[0]['summary_text']
        summary += ' ' + next_summary
        summary_word_count = len(summary.split())

    # Save the summary to a text file
    summary_file_path = "summary.txt"
    with open(summary_file_path, "w") as f:
        f.write(summary.strip())

    # Return summary, original word count, summary word count, and file path
    return summary.strip(), word_count, summary_word_count, summary_file_path

# Create Gradio interface
iface = gr.Interface(
    fn=summarize,
    inputs=[
        gr.Textbox(label="Enter Text", lines=10, placeholder="Type or paste your text here..."),
        gr.File(label="Or Upload a Text Document")
    ],
    outputs=[
        gr.Textbox(label="Summarized Text"),
        gr.Number(label="Original Words"),
        gr.Number(label="Summary Words"),
        gr.File(label="Download Summary")
    ],
    title="Text Summarizer",
    description="Enter text to get its summary or upload a text document.",
    allow_flagging="never"
)

# Launch the interface
iface.launch(share=True)


No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/1.80k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]



* Running on local URL:  http://127.0.0.1:7860

Could not create share link. Missing file: C:\Users\HP\AppData\Local\Programs\Python\Python312\Lib\site-packages\gradio\frpc_windows_amd64_v0.3. 

Please check your internet connection. This can happen if your antivirus software blocks the download of this file. You can install manually by following these steps: 

1. Download this file: https://cdn-media.huggingface.co/frpc-gradio-0.3/frpc_windows_amd64.exe
2. Rename the downloaded file to: frpc_windows_amd64_v0.3
3. Move the file to this location: C:\Users\HP\AppData\Local\Programs\Python\Python312\Lib\site-packages\gradio


