<a href="https://colab.research.google.com/github/AbdusSamad-syed/Document-summarizer/blob/main/ILDS_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers PyMuPDF sentencepiece gradio --quiet

import os
import fitz  # PyMuPDF
from transformers import pipeline
from typing import List
import gradio as gr

# STEP 1: Load Document
def load_document(file_path: str) -> str:
    if file_path.endswith(".pdf"):
        text = ""
        with fitz.open(file_path) as doc:
            for page in doc:
                text += page.get_text()
        return text
    elif file_path.endswith(".txt"):
        with open(file_path, 'r', encoding='utf-8') as f:
            return f.read()
    else:
        raise ValueError("Unsupported file format. Only PDF and TXT are allowed.")

# STEP 2: testing chunks
def chunk_text(text: str, max_chunk_size: int = 1000) -> List[str]:
    sentences = text.split('. ')
    chunks = []
    current_chunk = ""
    for sentence in sentences:
        if len(current_chunk) + len(sentence) < max_chunk_size:
            current_chunk += sentence + ". "
        else:
            chunks.append(current_chunk.strip())
            current_chunk = sentence + ". "
    if current_chunk:
        chunks.append(current_chunk.strip())
    return chunks

# STEP 3: Summarize chunks
def summarize_chunks(chunks: List[str]) -> str:
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
    summarized = ""
    for chunk in chunks:
        if len(chunk) < 50:
            continue
        summary = summarizer(chunk, max_length=130, min_length=30, do_sample=False)[0]['summary_text']
        summarized += summary + "\n"
    return summarized.strip()

# STEP 4: Full summary pipeline
def summarize_file(file_obj) -> str:
    file_path = file_obj.name
    try:
        raw_text = load_document(file_path)
        chunks = chunk_text(raw_text)
        summary = summarize_chunks(chunks)
        return summary if summary else "Summary is empty or document too short."
    except Exception as e:
        return f" Error: {str(e)}"

# STEP 5: Gradio Interface
iface = gr.Interface(
    fn=summarize_file,
    inputs=gr.File(file_types=[".pdf", ".txt"]),
    outputs="text",
    title="📄 AI Model- Intelligent Legal Document Summarizer",
    description="Upload a PDF or TXT file to get a concise summary using open-source models."
)

iface.launch(share=True)  # Use share=True to get public link
