In [1]:
!pip install fastapi uvicorn pyngrok pydantic torch transformers

Collecting fastapi
  Downloading fastapi-0.115.7-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn
  Downloading uvicorn-0.34.0-py3-none-any.whl.metadata (6.5 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.3-py3-none-any.whl.metadata (8.7 kB)
Collecting starlette<0.46.0,>=0.40.0 (from fastapi)
  Downloading starlette-0.45.2-py3-none-any.whl.metadata (6.3 kB)
Downloading fastapi-0.115.7-py3-none-any.whl (94 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.8/94.8 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading uvicorn-0.34.0-py3-none-any.whl (62 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.3/62.3 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.2.3-py3-none-any.whl (23 kB)
Downloading starlette-0.45.2-py3-none-any.whl (71 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.5/71.5 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: uvicorn, pyngrok, sta

In [12]:
import torch
from torch.nn.functional import softmax
from transformers import AutoTokenizer
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List
from pyngrok import ngrok
import uvicorn
import threading
import os

# Load model and tokenizer
device = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
class TransformerClassifier(torch.nn.Module):
    def __init__(self, num_labels, hidden_dim=768, nhead=8, num_layers=6, max_length=128, dropout=0.1):
        super(TransformerClassifier, self).__init__()
        self.embedding = torch.nn.Embedding(30522, hidden_dim)  # Using BERT vocab size
        self.positional_encoding = torch.nn.Parameter(torch.zeros(max_length, hidden_dim))
        encoder_layer = torch.nn.TransformerEncoderLayer(d_model=hidden_dim, nhead=nhead, dropout=dropout)
        self.transformer = torch.nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.classifier = torch.nn.Linear(hidden_dim, num_labels)
        self.dropout = torch.nn.Dropout(dropout)

    def forward(self, input_ids, attention_mask):
        embeddings = self.embedding(input_ids) + self.positional_encoding[:input_ids.size(1), :]
        embeddings = self.dropout(embeddings)
        transformer_out = self.transformer(embeddings.permute(1, 0, 2), src_key_padding_mask=(attention_mask == 0))
        pooled_output = transformer_out.mean(dim=0)  # Pooling
        logits = self.classifier(pooled_output)
        return logits

In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
import os

# Path to the model in Google Drive
drive_path = '/content/drive/My Drive/mentoring/Kelas-NLP/models/intent_transformer_model.pth'

# Ensure model file exists
if not os.path.exists(drive_path):
  raise FileNotFoundError(f"Model file not found at {drive_path}")

In [8]:
# Load trained model and tokenizer
num_labels = 3  # Adjust according to your dataset
model = TransformerClassifier(num_labels=num_labels)
model.load_state_dict(torch.load(drive_path, map_location=device))
model.to(device)
model.eval()

tokenizer = AutoTokenizer.from_pretrained("cahya/bert-base-indonesian-522M")

  model.load_state_dict(torch.load(drive_path, map_location=device))
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/62.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/468 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/230k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [9]:
# Label map
label_map = {
    0: "greeting",
    1: "sekarang_jam_berapa",
    2: "siapa_anda",  # Update with actual labels
}

# FastAPI app setup
app = FastAPI(title="Intent Classification API", version="1.0")

class TextRequest(BaseModel):
    text: str

class BatchRequest(BaseModel):
    texts: List[str]

class PredictionResponse(BaseModel):
    intent: str
    confidence: float

In [10]:
@app.post("/predict", response_model=PredictionResponse)
async def predict(request: TextRequest):
    text = request.text
    if not text.strip():
        raise HTTPException(status_code=400, detail="Text cannot be empty.")

    # Tokenize input text
    tokens = tokenizer(
        text, padding="max_length", truncation=True, max_length=128, return_tensors="pt"
    )
    input_ids = tokens["input_ids"].to(device)
    attention_mask = tokens["attention_mask"].to(device)

    # Make prediction
    with torch.no_grad():
        logits = model(input_ids, attention_mask)
        probabilities = softmax(logits, dim=1)
        confidence, predicted_label = torch.max(probabilities, dim=1)

    # Return prediction
    intent = label_map[predicted_label.item()]
    return {"intent": intent, "confidence": confidence.item()}

@app.post("/predict_batch", response_model=List[PredictionResponse])
async def predict_batch(request: BatchRequest):
    texts = request.texts
    if not texts:
        raise HTTPException(status_code=400, detail="Text list cannot be empty.")

    predictions = []
    for text in texts:
        # Tokenize input text
        tokens = tokenizer(
            text, padding="max_length", truncation=True, max_length=128, return_tensors="pt"
        )
        input_ids = tokens["input_ids"].to(device)
        attention_mask = tokens["attention_mask"].to(device)

        # Make prediction
        with torch.no_grad():
            logits = model(input_ids, attention_mask)
            probabilities = softmax(logits, dim=1)
            confidence, predicted_label = torch.max(probabilities, dim=1)

        # Append result
        intent = label_map[predicted_label.item()]
        predictions.append({"intent": intent, "confidence": confidence.item()})

    return predictions

# Health check
@app.get("/")
async def health_check():
    return {"message": "Intent Classification API is up and running!"}

In [11]:
!ngrok config add-authtoken YOUR_NGROK_AUTHTOKEN

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [13]:
# Function to run FastAPI server
def run_app():
    uvicorn.run(app, host="0.0.0.0", port=8000)

# Start ngrok and FastAPI server
ngrok_tunnel = ngrok.connect(8000)
print(f"Public URL: {ngrok_tunnel.public_url}")

# Run FastAPI in a thread
thread = threading.Thread(target=run_app)
thread.start()

Public URL: https://6704-34-123-51-223.ngrok-free.app


INFO:     Started server process [489]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


In [16]:
import requests

url = "https://6704-34-123-51-223.ngrok-free.app/predict"
data = {"text": "Siapa nama kamu?"}
response = requests.post(url, json=data)
print(response.json())

INFO:     34.123.51.223:0 - "POST /predict HTTP/1.1" 200 OK
{'intent': 'siapa_anda', 'confidence': 0.9997878670692444}


In [20]:
import requests

url = "https://6704-34-123-51-223.ngrok-free.app/predict_batch"
data = {"texts": ["Nama kamu siapa?", "Selamat malam kawan"]}
response = requests.post(url, json=data)
print(response.json())

INFO:     34.123.51.223:0 - "POST /predict_batch HTTP/1.1" 200 OK
[{'intent': 'siapa_anda', 'confidence': 0.9997879862785339}, {'intent': 'greeting', 'confidence': 0.9916836023330688}]
