In [1]:
# ==============================
# INSTALL PACKAGES
# ==============================

# Install PyTorch with CUDA
!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

# Install other libraries normally
!pip install -q fastapi uvicorn pyngrok nest_asyncio transformers

# ==============================
# IMPORTS
# ==============================
import nest_asyncio
import threading
import time
import torch
from fastapi import FastAPI
from pydantic import BaseModel
from pyngrok import ngrok
import uvicorn
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

nest_asyncio.apply()

# ==============================
# LOAD MODEL
# ==============================
print("Loading model...")

model_name = "google/long-t5-tglobal-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

if torch.cuda.is_available():
    model = model.cuda()
    print("Using GPU")
else:
    print("Using CPU")

print("Model loaded successfully")

# ==============================
# FASTAPI
# ==============================
app = FastAPI()

class TextRequest(BaseModel):
    text: str

@app.get("/")
def home():
    return {"message": "Lecture AI Backend Running"}

@app.post("/summarize")
def summarize(data: TextRequest):
    inputs = tokenizer(
        data.text,
        return_tensors="pt",
        truncation=True,
        max_length=2048
    )

    if torch.cuda.is_available():
        inputs = {k: v.cuda() for k, v in inputs.items()}

    outputs = model.generate(
        **inputs,
        max_length=200,
        min_length=50,
        do_sample=False
    )

    summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return {"summary": summary}

# ==============================
# START SERVER
# ==============================
def run():
    uvicorn.run(app, host="0.0.0.0", port=8000)

thread = threading.Thread(target=run, daemon=True)
thread.start()

time.sleep(5)

# ==============================
# START NGROK
# ==============================
ngrok.set_auth_token("39fZOPymtjZSoPwshFjBfxQZKOx_3i2DTFm5LjUuqEHVXQht8")
public_url = ngrok.connect(8000)

print("\nðŸš€ Backend LIVE at:")
print(public_url)


Loading model...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading weights:   0%|          | 0/297 [00:00<?, ?it/s]

INFO:     Started server process [2547]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


Using GPU
Model loaded successfully

ðŸš€ Backend LIVE at:
NgrokTunnel: "https://evolutional-viciously-winnifred.ngrok-free.dev" -> "http://localhost:8000"


In [2]:
import requests

url = "https://evolutional-viciously-winnifred.ngrok-free.dev/summarize"

response = requests.post(
    url,
    json={"text": "Artificial Intelligence is transforming education systems worldwide by enabling personalized learning experiences."}
)

print(response.json())


INFO:     136.118.73.189:0 - "POST /summarize HTTP/1.1" 200 OK
{'summary': 'Artificial Intelligence is transforming education systems worldwide by enabling personalized learning experiences..Artificial Intelligence is transforming education systems worldwide by enabling personalized learning experiences..Artificial Intelligence is transforming education systems worldwide by enabling personalized learning experiences.'}
