In [1]:
!pip install fastapi uvicorn transformers torch pyngrok nest-asyncio

Collecting fastapi
  Downloading fastapi-0.115.5-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn
  Downloading uvicorn-0.32.1-py3-none-any.whl.metadata (6.6 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.1-py3-none-any.whl.metadata (8.3 kB)
Collecting starlette<0.42.0,>=0.40.0 (from fastapi)
  Downloading starlette-0.41.3-py3-none-any.whl.metadata (6.0 kB)
Downloading fastapi-0.115.5-py3-none-any.whl (94 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.9/94.9 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading uvicorn-0.32.1-py3-none-any.whl (63 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.8/63.8 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.2.1-py3-none-any.whl (22 kB)
Downloading starlette-0.41.3-py3-none-any.whl (73 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.2/73.2 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: uvicorn, pyngrok, sta

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import torch
import torch.nn as nn
from transformers import AutoModel, BertTokenizerFast
from pyngrok import ngrok

In [4]:
# Define the FastAPI app
app = FastAPI(title="Spam Classifier API", description="A BERT-based spam classification API", version="1.0")

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load pre-trained BERT model and tokenizer
bert = AutoModel.from_pretrained('bert-base-uncased')
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

# Define the BERT-based model architecture
class BERT_Arch(nn.Module):
    def __init__(self, bert):
        super(BERT_Arch, self).__init__()
        self.bert = bert
        self.dropout = nn.Dropout(0.1)
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(768, 512)
        self.fc2 = nn.Linear(512, 2)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, sent_id, mask):
        _, cls_hs = self.bert(sent_id, attention_mask=mask, return_dict=False)
        x = self.fc1(cls_hs)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.softmax(x)
        return x

# Load the model and set it to evaluation mode
model_path = '/content/drive/MyDrive/mentoring/Kelas-NLP/models/spam_model.pt'
model = BERT_Arch(bert)
model.load_state_dict(torch.load(model_path, map_location=device))
model = model.to(device)
model.eval()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

  model.load_state_dict(torch.load(model_path, map_location=device))


BERT_Arch(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_aff

In [5]:
# Define a request schema
class SpamPredictionRequest(BaseModel):
    text: str

# Define a response schema
class SpamPredictionResponse(BaseModel):
    label: str
    confidence: float

# Define a prediction function
def predict_spam(text: str):
    # Tokenize input text
    tokens = tokenizer.batch_encode_plus(
        [text],
        max_length=25,
        pad_to_max_length=True,
        truncation=True,
        return_token_type_ids=False
    )

    # Convert tokens to tensors
    input_ids = torch.tensor(tokens['input_ids']).to(device)
    attention_mask = torch.tensor(tokens['attention_mask']).to(device)

    # Make predictions
    with torch.no_grad():
        outputs = model(input_ids, attention_mask)
        probs = torch.softmax(outputs, dim=1)
        confidence, predicted_label = torch.max(probs, dim=1)

    # Map labels to human-readable categories
    label_map = {0: "Not Spam", 1: "Spam"}
    return label_map[predicted_label.item()], confidence.item()

In [6]:
# Define API endpoint for prediction
@app.post("/predict", response_model=SpamPredictionResponse)
async def predict(request: SpamPredictionRequest):
    try:
        label, confidence = predict_spam(request.text)
        return SpamPredictionResponse(label=label, confidence=confidence)
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/")
async def root():
    return {"message": "BERT Model is running successfully"}

In [9]:
!ngrok config add-authtoken YOUR_NGROK_AUTHTOKEN

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [12]:
from threading import Thread

# Run the server in a separate thread
def run():
    uvicorn.run(app, host="0.0.0.0", port=8000)

# Start ngrok tunnel
public_url = ngrok.connect(8000)
print("Public URL:", public_url)

# Run the FastAPI server
thread = Thread(target=run)
thread.start()

Public URL: NgrokTunnel: "https://74a2-34-16-170-228.ngrok-free.app" -> "http://localhost:8000"


INFO:     Started server process [158]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


In [14]:
import requests

url = "https://74a2-34-16-170-228.ngrok-free.app/predict"
data = {"text": "Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question(std txt rate)T&C's apply 08452810075over18's"}
response = requests.post(url, json=data)
print(response.json())

INFO:     34.16.170.228:0 - "POST /predict HTTP/1.1" 200 OK
{'label': 'Spam', 'confidence': 0.793820321559906}
