In [1]:
# 1️⃣ Install dependencies
!pip install -U transformers datasets accelerate

import os
import torch
os.environ["WANDB_DISABLED"] = "true"   # Disable W&B logging

# 2️⃣ Imports
from datasets import load_dataset
from transformers import (
    T5Tokenizer,
    T5ForConditionalGeneration,
    Trainer,
    TrainingArguments,
    DataCollatorForSeq2Seq
)

# 3️⃣ Load dataset
dataset = load_dataset("cnn_dailymail", "3.0.0")

# 4️⃣ Load model & tokenizer
model_name = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

# 5️⃣ Preprocess function (dynamic padding handled by collator)
def preprocess(examples):
    inputs = ["summarize: " + doc for doc in examples["article"]]
    model_inputs = tokenizer(inputs, max_length=512, truncation=True)

    labels = tokenizer(examples["highlights"], max_length=150, truncation=True)
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized_datasets = dataset.map(preprocess, batched=True)

# 6️⃣ Data collator for dynamic padding
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

# 7️⃣ Training arguments
training_args = TrainingArguments(
    output_dir="./results",
    learning_rate=5e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=1,   # Small for Colab
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=100,
    save_steps=500
)

# 8️⃣ Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"].shuffle(seed=42).select(range(2000)),
    eval_dataset=tokenized_datasets["validation"].shuffle(seed=42).select(range(500)),
    tokenizer=tokenizer,
    data_collator=data_collator
)

# 9️⃣ Train
trainer.train()

# 🔟 Save fine-tuned model
model.save_pretrained("./summarizer_model")
tokenizer.save_pretrained("./summarizer_model")




Collecting transformers
  Downloading transformers-4.55.0-py3-none-any.whl.metadata (39 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

train-00000-of-00003.parquet:   0%|          | 0.00/257M [00:00<?, ?B/s]

train-00001-of-00003.parquet:   0%|          | 0.00/257M [00:00<?, ?B/s]

train-00002-of-00003.parquet:   0%|          | 0.00/259M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/34.7M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/30.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/287113 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/13368 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/11490 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Map:   0%|          | 0/287113 [00:00<?, ? examples/s]

Map:   0%|          | 0/13368 [00:00<?, ? examples/s]

Map:   0%|          | 0/11490 [00:00<?, ? examples/s]

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
  trainer = Trainer(


Step,Training Loss
100,2.2894
200,2.1133
300,2.182
400,2.0497
500,2.1465


('./summarizer_model/tokenizer_config.json',
 './summarizer_model/special_tokens_map.json',
 './summarizer_model/spiece.model',
 './summarizer_model/added_tokens.json')

In [15]:
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration

# 1️⃣ Load saved model & tokenizer
model_path = "./summarizer_model"
model = T5ForConditionalGeneration.from_pretrained(model_path)
tokenizer = T5Tokenizer.from_pretrained(model_path)

# 2️⃣ Set device (GPU if available)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# 3️⃣ Test function
def summarize_text(text):
    inputs = tokenizer("summarize: " + text, return_tensors="pt", max_length=512, truncation=True).to(device)
    outputs = model.generate(**inputs, max_length=100, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return summary

# 4️⃣ Example usage
test_text = """
Israeli forces have killed 69000 people, wounded 140,000 and 20,000 people are missing, the food suuplies have been blocked from entering Gaza, 80% of the people of gaza are malnurished, 160 people including 96 children have died of starvation
"""
summary = summarize_text(test_text)
print("\nSummary:", summary)

# 5️⃣ Test with your own input
custom_text = input("\nEnter text to summarize:\n")
print("\nSummary:", summarize_text(custom_text))



Summary: Israeli forces have killed 69000 people, wounded 140,000 and 20,000 people are missing. 80% of the people of gaza are malnurished, 160 people including 96 children have died of starvation.

Enter text to summarize:
i love my son saad, he is 7 years old, he is cute, polite and naughty

Summary: i love my son saad, he is 7 years old, he is cute, polite and naughty.


In [17]:
from fastapi import FastAPI, Form
from fastapi.responses import HTMLResponse
from pydantic import BaseModel
from transformers import T5Tokenizer, T5ForConditionalGeneration
from pyngrok import ngrok
import nest_asyncio
import uvicorn
import os

MODEL_PATH = "./summarizer_model"

# Load model & tokenizer
tokenizer = T5Tokenizer.from_pretrained(MODEL_PATH)
model = T5ForConditionalGeneration.from_pretrained(MODEL_PATH)

app = FastAPI()

class TextInput(BaseModel):
    text: str

# HTML form at root
@app.get("/", response_class=HTMLResponse)
def home():
    return """
    <html>
        <head><title>Summarizer</title></head>
        <body style="font-family:Arial; margin:40px;">
            <h2>Text Summarizer</h2>
            <form action="/summarize_form" method="post">
                <textarea name="text" rows="10" cols="80" placeholder="Enter text to summarize"></textarea><br><br>
                <input type="submit" value="Summarize">
            </form>
        </body>
    </html>
    """

# API endpoint for JSON requests
@app.post("/summarize")
async def summarize_api(input_data: TextInput):
    inputs = tokenizer("summarize: " + input_data.text,
                       return_tensors="pt",
                       max_length=512,
                       truncation=True)
    outputs = model.generate(**inputs, max_length=150, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return {"summary": summary}

# Handle form submission
@app.post("/summarize_form", response_class=HTMLResponse)
async def summarize_form(text: str = Form(...)):
    inputs = tokenizer("summarize: " + text,
                       return_tensors="pt",
                       max_length=512,
                       truncation=True)
    outputs = model.generate(**inputs, max_length=150, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return f"""
    <html>
        <body style="font-family:Arial; margin:40px;">
            <h2>Summary</h2>
            <p>{summary}</p>
            <a href="/">Go Back</a>
        </body>
    </html>
    """

if __name__ == "__main__":
    # Start ngrok tunnel
    ngrok.set_auth_token("30Hp3ggHO2VyEkOtL7XXIrnzgBv_2dhqUQVo6XVQzzgpt5McZ")
    public_url = ngrok.connect(8000)
    print(f"Public URL: {public_url}")

    # Start server
    nest_asyncio.apply()
    uvicorn.run(app, host="0.0.0.0", port=8000)


Public URL: NgrokTunnel: "https://4210d390cd1a.ngrok-free.app" -> "http://localhost:8000"


INFO:     Started server process [258]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


INFO:     39.62.152.250:0 - "GET / HTTP/1.1" 200 OK
INFO:     39.62.152.250:0 - "GET /favicon.ico HTTP/1.1" 404 Not Found
INFO:     39.62.152.250:0 - "POST /summarize_form HTTP/1.1" 200 OK


INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [258]


In [18]:
from google.colab import files
import shutil

# Zip the model folder
shutil.make_archive("summarizer_model", 'zip', "./summarizer_model")

# Download the zip file
files.download("summarizer_model.zip")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>