In [1]:
!pip install fastapi uvicorn pyngrok pydantic transformers torch

Collecting fastapi
  Downloading fastapi-0.115.5-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn
  Downloading uvicorn-0.32.1-py3-none-any.whl.metadata (6.6 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.1-py3-none-any.whl.metadata (8.3 kB)
Collecting starlette<0.42.0,>=0.40.0 (from fastapi)
  Downloading starlette-0.41.3-py3-none-any.whl.metadata (6.0 kB)
Downloading fastapi-0.115.5-py3-none-any.whl (94 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.9/94.9 kB[0m [31m221.8 kB/s[0m eta [36m0:00:00[0m
[?25hDownloading uvicorn-0.32.1-py3-none-any.whl (63 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.8/63.8 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.2.1-py3-none-any.whl (22 kB)
Downloading starlette-0.41.3-py3-none-any.whl (73 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.2/73.2 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: uvicorn, pyngrok, s

In [2]:
# Install library tambahan jika belum ada
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
from pyngrok import ngrok
import torch
import os
import uvicorn
import threading

In [3]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [17]:
# Path ke model di Google Drive
model_dir = "/content/drive/My Drive/mentoring/Kelas-NLP/models/text-gen"

# Initialize the device
device = torch.device("cpu")

# Load the model and tokenizer from the Google Drive directory
model = AutoModelForCausalLM.from_pretrained(model_dir)
tokenizer = AutoTokenizer.from_pretrained(model_dir)

# Create the pipeline
# pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device=device,
    top_k=50,  # Memfilter kandidat output
    top_p=0.9,  # Sampling dengan probabilitas kumulatif
    temperature=0.8  # Mengontrol keacakan
)


In [18]:
# FastAPI app
app = FastAPI(title="Text Generation Model Serving")

# Request body schema
class TextGenerationRequest(BaseModel):
    input_text: str
    num_return_sequences: int = 1

# Response body schema
class TextGenerationResponse(BaseModel):
    generated_text: str

@app.post("/generate", response_model=TextGenerationResponse)
def generate_text(request: TextGenerationRequest):
    """
    Generate text based on the input prompt.
    """
    try:
        # Generate text
        results = pipe(request.input_text, num_return_sequences=request.num_return_sequences)
        # Return the first generated sequence
        return TextGenerationResponse(generated_text=results[0]["generated_text"])
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Text generation failed: {str(e)}")

@app.get("/")
def read_root():
    return {"message": "Welcome to the Text Generation API!"}

In [6]:
# !ngrok config add-authtoken YOUR_NGROK_AUTHTOKEN

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [7]:
# Function to run FastAPI server
def run_app():
    uvicorn.run(app, host="0.0.0.0", port=8000)

# Start ngrok and FastAPI server
ngrok_tunnel = ngrok.connect(8000)
print(f"Public URL: {ngrok_tunnel.public_url}")

# Run FastAPI in a thread
thread = threading.Thread(target=run_app)
thread.start()

Public URL: https://3e3f-34-23-185-71.ngrok-free.app


In [25]:
import requests

NGROK_URL = "https://3e3f-34-23-185-71.ngrok-free.app"

url = f"{NGROK_URL}/generate"

data = {
    "input_text": "Hello, how are you?",
    "num_return_sequences": 2
}

response = requests.post(url, json=data)

print(response.json())

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


INFO:     34.23.185.71:0 - "POST /generate HTTP/1.1" 200 OK
{'generated_text': 'Hello, how are you?\n    """\n    return to_sql(\n        tbl,\n        if_exists=\'append\',\n        index_col=index_col,\n        chunksize=chunksize,\n        dtype={\n            \'names\': [\n                (name,'}
