<a href="https://colab.research.google.com/github/Shauryasawant/Simple-Document-Summarization/blob/main/Worko_AI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install gradio mistralai PyPDF2

Collecting mistralai
  Downloading mistralai-1.9.11-py3-none-any.whl.metadata (39 kB)
Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting eval-type-backport>=0.2.0 (from mistralai)
  Downloading eval_type_backport-0.2.2-py3-none-any.whl.metadata (2.2 kB)
Collecting invoke<3.0.0,>=2.2.0 (from mistralai)
  Downloading invoke-2.2.0-py3-none-any.whl.metadata (3.3 kB)
Downloading mistralai-1.9.11-py3-none-any.whl (442 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m442.8/442.8 kB[0m [31m15.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m15.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading eval_type_backport-0.2.2-py3-none-any.whl (5.8 kB)
Downloading invoke-2.2.0-py3-none-any.whl (160 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m160.3/160.3 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m


In [2]:
import gradio as gr
from mistralai import Mistral
import PyPDF2
import io
from google.colab import userdata

# Get API key from Colab secrets
try:
    API_KEY = userdata.get('api_key')
except:
    API_KEY = None

# Initialize Mistral client
def init_client():
    try:
        return Mistral(api_key=API_KEY)
    except Exception as e:
        print(f"Client init error: {e}")
        return None

# Extract text from PDF
def get_pdf_text(file):
    pdf_reader = PyPDF2.PdfReader(file)
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text()
    return text

# Summarize text function
def summarize_text(text, style):
    if not API_KEY:
        return "Error: API key not found in Colab secrets"

    if not text or len(text.strip()) == 0:
        return "Error: No text provided"

    client = init_client()
    if not client:
        return "Error: Failed to initialize Mistral client"

    # Different prompts based on style
    prompts = {
        "Brief": "Summarize this text in 2-3 sentences:",
        "Detailed": "Provide a comprehensive summary of this text with all key points:",
        "Bullet Points": "Summarize this text as bullet points covering main ideas:"
    }

    prompt = prompts.get(style, prompts["Brief"])
    full_prompt = f"{prompt}\n\n{text}"

    try:
        response = client.chat.complete(
            model="mistral-small-latest",
            messages=[{"role": "user", "content": full_prompt}]
        )
        return response.choices[0].message.content
    except Exception as e:
        return f"Error: {str(e)}"

def process_input(file, text_input, style):
    content = ""

    if file is not None:
        try:
            file_path = file if isinstance(file, str) else file.name

            if file_path.endswith('.pdf'):
                with open(file_path, 'rb') as f:
                    content = get_pdf_text(f)
            else:
                with open(file_path, 'r', encoding='utf-8') as f:
                    content = f.read()
        except Exception as e:
            return f"Error reading file: {str(e)}"
    elif text_input:
        content = text_input
    else:
        return "Please provide text or upload a file"


    if len(content) > 50000:
        return "Error: Text too long. Please use shorter documents."

    return summarize_text(content, style)


with gr.Blocks(title="Doc Summarizer") as app:
    gr.Markdown("# Document Summarizer")
    gr.Markdown("Upload a document or paste text to get a summary")

    with gr.Row():
        with gr.Column():
            file_input = gr.File(
                label="Upload File (PDF or TXT)",
                file_types=[".pdf", ".txt"]
            )

            text_input = gr.Textbox(
                label="Or Paste Text Here",
                lines=10,
                placeholder="Enter text to summarize..."
            )

            style_dropdown = gr.Dropdown(
                choices=["Brief", "Detailed", "Bullet Points"],
                value="Brief",
                label="Summary Style"
            )

            submit_btn = gr.Button("Summarize", variant="primary")

        with gr.Column():
            output = gr.Markdown()


    submit_btn.click(
        fn=process_input,
        inputs=[file_input, text_input, style_dropdown],
        outputs=output
    )

# Launch with public link for Colab
app.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://72458594c5faa5f2b1.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


