# Homework 2 - Serving a Machine Learning Model as an API Service

Mikołaj Marmurowicz 151956

## Loading required libraries and data

In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import lightning as L
import bentoml

Recreating the model.

In [2]:
class LitFashionMNIST(L.LightningModule):
    def __init__(
        self,
        lr: float = 1e-3,
        dropout: float = 0.3,
        hidden_size: int = 128
    ):
        super().__init__()
        self.save_hyperparameters()

        self.model = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Flatten(),
            nn.Linear(64 * 7 * 7, hidden_size),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_size, 10)
        )
        self.criterion = nn.CrossEntropyLoss()

    def forward(self, x):
        return self.model(x)

    def _shared_step(self, batch, stage: str):
        x, y = batch
        logits = self(x)
        loss = self.criterion(logits, y)
        preds = torch.argmax(logits, dim=1)
        acc = (preds == y).float().mean()
        self.log(f"{stage}_loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        self.log(f"{stage}_acc", acc, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def training_step(self, batch, batch_idx):
        return self._shared_step(batch, "train")

    def validation_step(self, batch, batch_idx):
        return self._shared_step(batch, "val")

    def test_step(self, batch, batch_idx):
        return self._shared_step(batch, "test")

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.lr)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode="min", factor=0.5, patience=3
        )
        return {
            "optimizer": optimizer,
            "lr_scheduler": {
                "scheduler": scheduler,
                "monitor": "val_loss"
            },
        }

Loading the model from a previously created checkpoint.

In [3]:
CKPT_PATH = "..\\Homework1\\pl_hw1_fashionmnist\\3sahzi9m\\checkpoints\\fashionmnist-optuna-epoch=05-val_loss=0.1950.ckpt"  

lit_model = LitFashionMNIST.load_from_checkpoint(CKPT_PATH)
lit_model.to("cuda")
lit_model.eval()

print("Loaded Lightning model.")
print("Hyperparameters:", lit_model.hparams)

Loaded Lightning model.
Hyperparameters: "dropout":     0.2266455049847091
"hidden_size": 256
"lr":          0.0008339802352744992


Quick check whether the model can predict values.

In [4]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

test_dataset = datasets.FashionMNIST(root="data", train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=True)

images, labels = next(iter(test_loader))
device = next(lit_model.parameters()).device
images = images.to(device)

with torch.no_grad():
    logits = lit_model(images)
    preds = logits.argmax(dim=1)

print("True labels:", labels.tolist())
print("Preds      :", preds.tolist())

True labels: [1, 5, 5, 6]
Preds      : [1, 5, 5, 0]


## Saving the model to BentoML

In [5]:
bento_tag = bentoml.pytorch.save_model(
    "fashion_mnist_lit",
    lit_model,
    signatures={
        "__call__": {"batchable": True}
    },
    metadata={
        "source": "HW1 LitFashionMNIST",
        "framework": "pytorch_lightning",
        "dataset": "FashionMNIST",
        "task": "image_classification"
    }
)

print("Saved model tag:", bento_tag)

  bento_tag = bentoml.pytorch.save_model(


Saved model tag: Model(tag="fashion_mnist_lit:adjznmwip272pkab")


Checking if it exists within terminal

In [6]:
!bentoml models list
!bentoml models get fashion_mnist_lit:latest

 Tag                            Module           Size      Creation Time       
 fashion_mnist_lit:adjznmwip2…  bentoml.pytorch  3.16 MiB  2025-11-23 16:06:35 
 fashion_mnist_lit:5xdcezwipw…  bentoml.pytorch  3.16 MiB  2025-11-23 16:06:03 
 fashion_mnist_lit:vg6qhcgipw…  bentoml.pytorch  3.16 MiB  2025-11-23 16:04:09 
 fashion_mnist_lit:6dlzrwwipo…  bentoml.pytorch  3.16 MiB  2025-11-23 15:51:49 
name: fashion_mnist_lit
version: adjznmwip272pkab
module: bentoml.pytorch
labels: {}
options:
  partial_kwargs: {}
metadata:
  source: HW1 LitFashionMNIST
  framework: pytorch_lightning
  dataset: FashionMNIST
  task: image_classification
context:
  framework_name: torch
  framework_versions:
    torch: 2.5.1
  bentoml_version: 1.4.29
  python_version: 3.11.14
signatures:
  __call__:
    batchable: true
    batch_dim:
    - 0
    - 0
api_version: v1
creation_time: '2025-11-23T15:06:35.386490+00:00'



The code for service.py file is as follows:

```python
import torch
import bentoml

MODEL_TAG = "fashion_mnist_lit:latest"

@bentoml.service
class FashionMNISTService:
    def __init__(self) -> None:
        device_str = "cuda" if torch.cuda.is_available() else "cpu"
        self.device = torch.device(device_str)
        self.model = bentoml.pytorch.load_model(MODEL_TAG, device_id=device_str)
        self.model.eval()

    @bentoml.api
    def predict(self, images: torch.Tensor) -> list[int]:
        with torch.no_grad():
            images = images.to(self.device)
            logits = self.model(images)
            preds = logits.argmax(dim=1)
        return preds.cpu().tolist()
```

Running the service in a seperate window command line via:
```bash
bentoml serve
```

## Testing the served model 

In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),
])
test_dataset = datasets.FashionMNIST(
    root="./data", train=False, download=True, transform=transform
)
img, label = test_dataset[5]
batch = img.unsqueeze(0)

with bentoml.SyncHTTPClient("http://localhost:3000") as client:
    preds = client.predict(batch)
    print("True label:", label)
    print("Predicted:", preds[0])

True label: 1
Predicted: 1


As we can see the model is able to successfully connect via the BentoML client to the serviced model and provide its predictions.