In [5]:
import mlflow
import torch
import numpy as np

from torch import nn
from torch.utils.data import DataLoader
from torchinfo import summary
from torchmetrics import Accuracy
from torchvision import datasets
from torchvision.transforms import ToTensor

from mlflow.types import Schema, TensorSpec
from mlflow.models import ModelSignature

In [6]:
training_data = datasets.FashionMNIST(
    root="data",
    train=True, 
    download=True, 
    transform=ToTensor(),
)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26.4M/26.4M [00:11<00:00, 2.37MB/s]


Extracting data/FashionMNIST/raw/train-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29.5k/29.5k [00:00<00:00, 388kB/s]


Extracting data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4.42M/4.42M [00:02<00:00, 1.66MB/s]


Extracting data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5.15k/5.15k [00:00<00:00, 40.8MB/s]

Extracting data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw






In [7]:
train_dataloader = DataLoader(training_data, batch_size=64)

In [8]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [9]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28 * 28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [10]:
def train(dataloader, model, loss_fn, metrics_fn, optimizer):
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        
        pred = model(X)
        loss = loss_fn(pred, y)
        accuracy = metrics_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        if batch % 100 == 0:
            loss, current = loss.item(), batch
            mlflow.log_metric("loss", f"{loss:3f}", step=(batch // 100))
            mlflow.log_metric("accuracy", f"{accuracy:3f}", step=(batch // 100))
            print(f"loss: {loss:3f} accuracy: {accuracy:3f} [{current} / {len(dataloader)}]")

In [11]:
epochs = 3
loss_fn = nn.CrossEntropyLoss()
metric_fn = Accuracy(task="multiclass", num_classes=10).to(device)
model = NeuralNetwork().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 1e-3)

with mlflow.start_run() as run:
    params = {
        "epochs": epochs,
        "learning_rate": 1e-3,
        "batch_size": 64,
        "loss_function": loss_fn.__class__.__name__,
        "metric_function": metric_fn.__class__.__name__,
        "optimizer": "SGD",
    }

    mlflow.log_params(params)

    with open("model_summary.txt", "w") as f:
        f.write(str(summary(model)))
    mlflow.log_artifact("model_summary.txt")

    for t in range(epochs):
        print(f"Epoch {t+1}\n----------------------------")
        train(train_dataloader, model, loss_fn, metric_fn, optimizer)

    input_schema = Schema([TensorSpec(np.dtype(np.float32), (-1, 28, 28))])
    output_schema = Schema([TensorSpec(np.dtype(np.float32), (-1, 10))])
    signature = ModelSignature(inputs = input_schema, outputs = output_schema)

    mlflow.pytorch.log_model(model, "model", signature=signature)

logged_model = f"runs:/{run.info.run_id}/model"
loaded_model = mlflow.pyfunc.load_model(logged_model)
loaded_model.predict(np.random.uniform(size=[1, 28, 28]).astype(np.float32))

Epoch 1
----------------------------
loss: 2.295373 accuracy: 0.156250 [0 / 938]
loss: 2.290316 accuracy: 0.218750 [100 / 938]
loss: 2.262481 accuracy: 0.375000 [200 / 938]
loss: 2.261474 accuracy: 0.359375 [300 / 938]
loss: 2.244850 accuracy: 0.375000 [400 / 938]
loss: 2.214631 accuracy: 0.375000 [500 / 938]
loss: 2.215840 accuracy: 0.281250 [600 / 938]
loss: 2.182413 accuracy: 0.406250 [700 / 938]
loss: 2.180620 accuracy: 0.390625 [800 / 938]
loss: 2.146815 accuracy: 0.453125 [900 / 938]
Epoch 2
----------------------------
loss: 2.154326 accuracy: 0.390625 [0 / 938]
loss: 2.150261 accuracy: 0.375000 [100 / 938]
loss: 2.077930 accuracy: 0.484375 [200 / 938]
loss: 2.104153 accuracy: 0.484375 [300 / 938]
loss: 2.043926 accuracy: 0.484375 [400 / 938]
loss: 1.985053 accuracy: 0.531250 [500 / 938]
loss: 2.010692 accuracy: 0.406250 [600 / 938]
loss: 1.930540 accuracy: 0.468750 [700 / 938]
loss: 1.937635 accuracy: 0.468750 [800 / 938]
loss: 1.865651 accuracy: 0.546875 [900 / 938]
Epoch 3
--

array([[ 0.02098488, -0.73580617,  0.64805454, -0.5261527 ,  0.52522826,
        -0.4032299 ,  0.36929798, -0.52065235,  0.69928557, -0.00839979]],
      dtype=float32)