# **Experiment Track with using MLFlow**

In [257]:

from sympy import print_tree

''' Import all important libraries '''
import os
import pandas as pd
import torch
import torch.nn as nn
from torchvision import transforms
from torch.utils.data import Dataset, random_split, DataLoader
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
import pytorch_lightning as pl
import mlflow
from mlflow.models import infer_signature

In [258]:
Device = (torch.device('mps') if torch.mps.is_available() else torch.device('cpu'))
print(Device)

mps


In [259]:
''' Hyperparameters '''
torch.manual_seed(40)
Batch_size = 16
Epochs = 15
Learning_Rate = 0.0001

In [260]:
''' Load data and define Source code path '''
Root_path = '/Users/mahadiur/Desktop/Experiment Track Using MLFlow/Data'
dataset_path = os.path.join(Root_path, 'DigitDataset.csv')

saved_model_dir = 'models'
source_code_path = os.path.join(
    os.getcwd(),
    'Experiment_Track_Using_MLFlow.ipynb'
) # current file path

source_code = 'trainer.ipynb'

# **Data Pipeline**

In [261]:
digit_data = pd.read_csv(dataset_path)
example = digit_data.iloc[0]
pixel = example.values[1:]
label = int(example.values[0:1])
print(pixel.shape)
print(label)

(784,)
1


  label = int(example.values[0:1])


In [262]:
class DataPipeline(Dataset):
    def __init__(self, data_path, transform=None):
        super().__init__()
        self.data = pd.read_csv(data_path)
        self.transformation = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        example = self.data.iloc[idx]
        pixels = example.values[1:].astype('float32')
        pixels /= 255
        labels = int(example[0:1])

        pixels = torch.tensor(pixels).reshape(28, 28).unsqueeze(0)
        labels = torch.tensor(labels)


        if self.transformation:
            pixels = self.transformation(pixels)

        return  pixels, labels

In [263]:
Transformation = transforms.Compose([
    transforms.Normalize(
        mean=(torch.tensor([0.1307])),
        std=(torch.tensor([0.3081])),
    )
])

In [264]:
dataset = DataPipeline(
    dataset_path,
    Transformation
)
print(len(dataset))

42000


# **Split Train, Test & Validation**

In [265]:
Train_size = int(0.7 * len(dataset))
Validation_size = int(0.15 * len(dataset))
Test_size = len(dataset) - Train_size - Validation_size

Training_dataset, Validation_dataset, Test_dataset = random_split(
    dataset=dataset,
    lengths=[Train_size, Validation_size, Test_size],
)

print(len(Training_dataset))
print(len(Validation_dataset))
print(len(Test_dataset))

29399
6300
6301


# **Dataloader for Train, Test & Validation**

In [266]:
Train_Dataloader = DataLoader(
    dataset=Training_dataset,
    batch_size=Batch_size,
    shuffle=True,

)

Validation_Dataloader = DataLoader(
    dataset=Validation_dataset,
    batch_size=Batch_size,
    shuffle=False,
)

Test_Dataloader = DataLoader(
    dataset=Test_dataset,
    batch_size=Batch_size,
    shuffle=False,

)

In [267]:
for pixels, labels in Train_Dataloader:
    print(pixels.shape)
    print(labels.shape)
    break

torch.Size([16, 1, 28, 28])
torch.Size([16])


  labels = int(example[0:1])


# **DigitClassifiar class**

In [268]:
class DigitClass(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.criterion = nn.CrossEntropyLoss()
        self.layer1 = nn.Linear(28 * 28, 128)
        self.layer2 = nn.Linear(128, 32)
        self.layer3 = nn.Linear(32, 10)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = x.view(-1, 28 * 28)
        x = self.layer1(x)
        x = self.relu(x)
        x = self.layer2(x)
        x = self.relu(x)
        x = self.layer3(x)
        return x

    # Gradient Decent
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=Learning_Rate)
        return optimizer

    # Training Step
    def training_step(self, batch, batch_idx):
        pixels, labels = batch
        pixels = pixels.to(Device)
        labels = labels.to(Device)
        outputs = self.forward(pixels)
        loss = self.criterion(outputs, labels)
        self.log('Train_loss', loss)
        return loss

    # Validation Step
    def validation_step(self, batch, batch_idx):
        pixels, labels = batch
        pixels = pixels.to(Device)
        labels = labels.to(Device)
        outputs = self.forward(pixels)
        loss = self.criterion(outputs, labels)
        accuracy = (torch.argmax(outputs, dim=1) == labels).float().mean()
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_accuracy', accuracy, prog_bar=True)

    # Test Step
    def test_step(self, batch, batch_idx):
        pixels, labels= batch
        pixels = pixels.to(Device)
        labels = labels.to(Device)
        outputs = self.forward(pixels)
        loss = self.criterion(outputs, labels)
        accuracy = (torch.argmax(outputs, dim=1) == labels).float().mean()
        self.log('test_loss', loss, prog_bar=True)
        self.log('test_accuracy', accuracy, prog_bar=True)



In [269]:
Model = DigitClass().to(Device)

In [270]:
Early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=3,
    verbose=True,
)

checkpoints_callback = ModelCheckpoint(
    monitor='val_accuracy',
    save_top_k=1,
    mode='max',
)

checkpoints_path = os.path.join(
    os.getcwd(),'checkpoints','Best_Model.pth'
)

# **Train**

In [271]:
Training = pl.Trainer(
    max_epochs=Epochs,
    callbacks=[checkpoints_callback, Early_stopping],
)

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [272]:
Training.fit(
    model=Model,
    train_dataloaders=Train_Dataloader,
    val_dataloaders=Validation_Dataloader,
)


  | Name      | Type             | Params | Mode 
-------------------------------------------------------
0 | criterion | CrossEntropyLoss | 0      | train
1 | layer1    | Linear           | 100 K  | train
2 | layer2    | Linear           | 4.1 K  | train
3 | layer3    | Linear           | 330    | train
4 | relu      | ReLU             | 0      | train
-------------------------------------------------------
104 K     Trainable params
0         Non-trainable params
104 K     Total params
0.420     Total estimated model params size (MB)
5         Modules in train mode
0         Modules in eval mode


                                                                           

/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:433: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=9` in the `DataLoader` to improve performance.
  labels = int(example[0:1])
/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:433: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=9` in the `DataLoader` to improve performance.


Epoch 0: 100%|██████████| 1838/1838 [00:08<00:00, 215.25it/s, v_num=11]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/394 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/394 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 1/394 [00:00<00:01, 269.76it/s][A
Validation DataLoader 0:   1%|          | 2/394 [00:00<00:01, 251.16it/s][A
Validation DataLoader 0:   1%|          | 3/394 [00:00<00:01, 228.14it/s][A
Validation DataLoader 0:   1%|          | 4/394 [00:00<00:01, 225.16it/s][A
Validation DataLoader 0:   1%|▏         | 5/394 [00:00<00:01, 217.74it/s][A
Validation DataLoader 0:   2%|▏         | 6/394 [00:00<00:01, 223.77it/s][A
Validation DataLoader 0:   2%|▏         | 7/394 [00:00<00:01, 221.32it/s][A
Validation DataLoader 0:   2%|▏         | 8/394 [00:00<00:01, 226.59it/s][A
Validation DataLoader 0:   2%|▏         | 9/394 [00:00<00:01, 230.63it/s][A
Validation DataLoader 0:   3%|▎         | 10/394 [00:00<00:01, 

Metric val_loss improved. New best score: 0.318


Epoch 1: 100%|██████████| 1838/1838 [00:07<00:00, 241.31it/s, v_num=11, val_loss=0.318, val_accuracy=0.906]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/394 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/394 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 1/394 [00:00<00:00, 422.64it/s][A
Validation DataLoader 0:   1%|          | 2/394 [00:00<00:01, 281.57it/s][A
Validation DataLoader 0:   1%|          | 3/394 [00:00<00:01, 256.67it/s][A
Validation DataLoader 0:   1%|          | 4/394 [00:00<00:01, 246.70it/s][A
Validation DataLoader 0:   1%|▏         | 5/394 [00:00<00:01, 254.13it/s][A
Validation DataLoader 0:   2%|▏         | 6/394 [00:00<00:01, 259.54it/s][A
Validation DataLoader 0:   2%|▏         | 7/394 [00:00<00:01, 256.19it/s][A
Validation DataLoader 0:   2%|▏         | 8/394 [00:00<00:01, 257.01it/s][A
Validation DataLoader 0:   2%|▏         | 9/394 [00:00<00:01, 256.46it/s][A
Validation DataLoader 0:   

Metric val_loss improved by 0.061 >= min_delta = 0.0. New best score: 0.257


Epoch 2: 100%|██████████| 1838/1838 [00:07<00:00, 244.04it/s, v_num=11, val_loss=0.257, val_accuracy=0.924]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/394 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/394 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 1/394 [00:00<00:02, 177.15it/s][A
Validation DataLoader 0:   1%|          | 2/394 [00:00<00:01, 212.54it/s][A
Validation DataLoader 0:   1%|          | 3/394 [00:00<00:01, 232.51it/s][A
Validation DataLoader 0:   1%|          | 4/394 [00:00<00:01, 246.85it/s][A
Validation DataLoader 0:   1%|▏         | 5/394 [00:00<00:01, 239.36it/s][A
Validation DataLoader 0:   2%|▏         | 6/394 [00:00<00:01, 245.99it/s][A
Validation DataLoader 0:   2%|▏         | 7/394 [00:00<00:01, 250.22it/s][A
Validation DataLoader 0:   2%|▏         | 8/394 [00:00<00:01, 250.94it/s][A
Validation DataLoader 0:   2%|▏         | 9/394 [00:00<00:01, 255.51it/s][A
Validation DataLoader 0:   

Metric val_loss improved by 0.038 >= min_delta = 0.0. New best score: 0.219


Epoch 3: 100%|██████████| 1838/1838 [00:07<00:00, 251.00it/s, v_num=11, val_loss=0.219, val_accuracy=0.935]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/394 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/394 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 1/394 [00:00<00:01, 356.66it/s][A
Validation DataLoader 0:   1%|          | 2/394 [00:00<00:01, 273.72it/s][A
Validation DataLoader 0:   1%|          | 3/394 [00:00<00:01, 258.21it/s][A
Validation DataLoader 0:   1%|          | 4/394 [00:00<00:01, 258.04it/s][A
Validation DataLoader 0:   1%|▏         | 5/394 [00:00<00:01, 264.20it/s][A
Validation DataLoader 0:   2%|▏         | 6/394 [00:00<00:01, 269.99it/s][A
Validation DataLoader 0:   2%|▏         | 7/394 [00:00<00:01, 274.95it/s][A
Validation DataLoader 0:   2%|▏         | 8/394 [00:00<00:01, 276.83it/s][A
Validation DataLoader 0:   2%|▏         | 9/394 [00:00<00:01, 278.92it/s][A
Validation DataLoader 0:   

Metric val_loss improved by 0.024 >= min_delta = 0.0. New best score: 0.194


Epoch 4: 100%|██████████| 1838/1838 [00:07<00:00, 246.19it/s, v_num=11, val_loss=0.194, val_accuracy=0.942]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/394 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/394 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 1/394 [00:00<00:00, 416.47it/s][A
Validation DataLoader 0:   1%|          | 2/394 [00:00<00:01, 289.68it/s][A
Validation DataLoader 0:   1%|          | 3/394 [00:00<00:01, 291.60it/s][A
Validation DataLoader 0:   1%|          | 4/394 [00:00<00:01, 279.70it/s][A
Validation DataLoader 0:   1%|▏         | 5/394 [00:00<00:01, 282.87it/s][A
Validation DataLoader 0:   2%|▏         | 6/394 [00:00<00:01, 287.11it/s][A
Validation DataLoader 0:   2%|▏         | 7/394 [00:00<00:01, 289.58it/s][A
Validation DataLoader 0:   2%|▏         | 8/394 [00:00<00:01, 281.53it/s][A
Validation DataLoader 0:   2%|▏         | 9/394 [00:00<00:01, 282.10it/s][A
Validation DataLoader 0:   

Metric val_loss improved by 0.013 >= min_delta = 0.0. New best score: 0.182


Epoch 5: 100%|██████████| 1838/1838 [00:07<00:00, 245.08it/s, v_num=11, val_loss=0.182, val_accuracy=0.945]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/394 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/394 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 1/394 [00:00<00:00, 425.13it/s][A
Validation DataLoader 0:   1%|          | 2/394 [00:00<00:01, 297.84it/s][A
Validation DataLoader 0:   1%|          | 3/394 [00:00<00:01, 296.03it/s][A
Validation DataLoader 0:   1%|          | 4/394 [00:00<00:01, 292.29it/s][A
Validation DataLoader 0:   1%|▏         | 5/394 [00:00<00:01, 274.17it/s][A
Validation DataLoader 0:   2%|▏         | 6/394 [00:00<00:01, 277.79it/s][A
Validation DataLoader 0:   2%|▏         | 7/394 [00:00<00:01, 281.76it/s][A
Validation DataLoader 0:   2%|▏         | 8/394 [00:00<00:01, 284.84it/s][A
Validation DataLoader 0:   2%|▏         | 9/394 [00:00<00:01, 286.97it/s][A
Validation DataLoader 0:   

Metric val_loss improved by 0.014 >= min_delta = 0.0. New best score: 0.168


Epoch 6: 100%|██████████| 1838/1838 [00:07<00:00, 239.22it/s, v_num=11, val_loss=0.168, val_accuracy=0.949]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/394 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/394 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 1/394 [00:00<00:01, 290.61it/s][A
Validation DataLoader 0:   1%|          | 2/394 [00:00<00:01, 280.81it/s][A
Validation DataLoader 0:   1%|          | 3/394 [00:00<00:01, 275.89it/s][A
Validation DataLoader 0:   1%|          | 4/394 [00:00<00:01, 281.17it/s][A
Validation DataLoader 0:   1%|▏         | 5/394 [00:00<00:01, 259.46it/s][A
Validation DataLoader 0:   2%|▏         | 6/394 [00:00<00:01, 263.13it/s][A
Validation DataLoader 0:   2%|▏         | 7/394 [00:00<00:01, 246.10it/s][A
Validation DataLoader 0:   2%|▏         | 8/394 [00:00<00:01, 247.45it/s][A
Validation DataLoader 0:   2%|▏         | 9/394 [00:00<00:01, 251.77it/s][A
Validation DataLoader 0:   

Metric val_loss improved by 0.015 >= min_delta = 0.0. New best score: 0.153


Epoch 7: 100%|██████████| 1838/1838 [00:07<00:00, 243.55it/s, v_num=11, val_loss=0.153, val_accuracy=0.953]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/394 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/394 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 1/394 [00:00<00:01, 378.10it/s][A
Validation DataLoader 0:   1%|          | 2/394 [00:00<00:01, 282.61it/s][A
Validation DataLoader 0:   1%|          | 3/394 [00:00<00:01, 282.48it/s][A
Validation DataLoader 0:   1%|          | 4/394 [00:00<00:01, 284.62it/s][A
Validation DataLoader 0:   1%|▏         | 5/394 [00:00<00:01, 283.30it/s][A
Validation DataLoader 0:   2%|▏         | 6/394 [00:00<00:01, 284.58it/s][A
Validation DataLoader 0:   2%|▏         | 7/394 [00:00<00:01, 277.35it/s][A
Validation DataLoader 0:   2%|▏         | 8/394 [00:00<00:01, 268.54it/s][A
Validation DataLoader 0:   2%|▏         | 9/394 [00:00<00:01, 271.52it/s][A
Validation DataLoader 0:   

Metric val_loss improved by 0.013 >= min_delta = 0.0. New best score: 0.140


Epoch 8: 100%|██████████| 1838/1838 [00:07<00:00, 243.83it/s, v_num=11, val_loss=0.140, val_accuracy=0.958]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/394 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/394 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 1/394 [00:00<00:01, 299.40it/s][A
Validation DataLoader 0:   1%|          | 2/394 [00:00<00:01, 246.16it/s][A
Validation DataLoader 0:   1%|          | 3/394 [00:00<00:01, 261.19it/s][A
Validation DataLoader 0:   1%|          | 4/394 [00:00<00:01, 264.23it/s][A
Validation DataLoader 0:   1%|▏         | 5/394 [00:00<00:01, 268.69it/s][A
Validation DataLoader 0:   2%|▏         | 6/394 [00:00<00:01, 260.37it/s][A
Validation DataLoader 0:   2%|▏         | 7/394 [00:00<00:01, 260.77it/s][A
Validation DataLoader 0:   2%|▏         | 8/394 [00:00<00:01, 265.06it/s][A
Validation DataLoader 0:   2%|▏         | 9/394 [00:00<00:01, 267.51it/s][A
Validation DataLoader 0:   

Metric val_loss improved by 0.006 >= min_delta = 0.0. New best score: 0.135


Epoch 9: 100%|██████████| 1838/1838 [00:07<00:00, 241.41it/s, v_num=11, val_loss=0.135, val_accuracy=0.959]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/394 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/394 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 1/394 [00:00<00:01, 379.09it/s][A
Validation DataLoader 0:   1%|          | 2/394 [00:00<00:01, 276.50it/s][A
Validation DataLoader 0:   1%|          | 3/394 [00:00<00:01, 277.11it/s][A
Validation DataLoader 0:   1%|          | 4/394 [00:00<00:01, 281.16it/s][A
Validation DataLoader 0:   1%|▏         | 5/394 [00:00<00:01, 278.21it/s][A
Validation DataLoader 0:   2%|▏         | 6/394 [00:00<00:01, 265.93it/s][A
Validation DataLoader 0:   2%|▏         | 7/394 [00:00<00:01, 256.64it/s][A
Validation DataLoader 0:   2%|▏         | 8/394 [00:00<00:01, 256.91it/s][A
Validation DataLoader 0:   2%|▏         | 9/394 [00:00<00:01, 259.36it/s][A
Validation DataLoader 0:   

Metric val_loss improved by 0.008 >= min_delta = 0.0. New best score: 0.127


Epoch 10: 100%|██████████| 1838/1838 [00:07<00:00, 239.42it/s, v_num=11, val_loss=0.127, val_accuracy=0.963]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/394 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/394 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 1/394 [00:00<00:00, 408.80it/s][A
Validation DataLoader 0:   1%|          | 2/394 [00:00<00:01, 339.78it/s][A
Validation DataLoader 0:   1%|          | 3/394 [00:00<00:01, 314.89it/s][A
Validation DataLoader 0:   1%|          | 4/394 [00:00<00:01, 309.48it/s][A
Validation DataLoader 0:   1%|▏         | 5/394 [00:00<00:01, 296.98it/s][A
Validation DataLoader 0:   2%|▏         | 6/394 [00:00<00:01, 284.43it/s][A
Validation DataLoader 0:   2%|▏         | 7/394 [00:00<00:01, 274.00it/s][A
Validation DataLoader 0:   2%|▏         | 8/394 [00:00<00:01, 276.34it/s][A
Validation DataLoader 0:   2%|▏         | 9/394 [00:00<00:01, 278.85it/s][A
Validation DataLoader 0:  

Metric val_loss improved by 0.003 >= min_delta = 0.0. New best score: 0.124


Epoch 11: 100%|██████████| 1838/1838 [00:07<00:00, 237.34it/s, v_num=11, val_loss=0.124, val_accuracy=0.963]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/394 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/394 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 1/394 [00:00<00:01, 364.31it/s][A
Validation DataLoader 0:   1%|          | 2/394 [00:00<00:01, 306.47it/s][A
Validation DataLoader 0:   1%|          | 3/394 [00:00<00:01, 272.48it/s][A
Validation DataLoader 0:   1%|          | 4/394 [00:00<00:01, 273.69it/s][A
Validation DataLoader 0:   1%|▏         | 5/394 [00:00<00:01, 256.09it/s][A
Validation DataLoader 0:   2%|▏         | 6/394 [00:00<00:01, 263.26it/s][A
Validation DataLoader 0:   2%|▏         | 7/394 [00:00<00:01, 266.72it/s][A
Validation DataLoader 0:   2%|▏         | 8/394 [00:00<00:01, 266.30it/s][A
Validation DataLoader 0:   2%|▏         | 9/394 [00:00<00:01, 271.31it/s][A
Validation DataLoader 0:  

Metric val_loss improved by 0.004 >= min_delta = 0.0. New best score: 0.120


Epoch 12: 100%|██████████| 1838/1838 [00:07<00:00, 239.63it/s, v_num=11, val_loss=0.120, val_accuracy=0.963]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/394 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/394 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 1/394 [00:00<00:00, 429.22it/s][A
Validation DataLoader 0:   1%|          | 2/394 [00:00<00:01, 289.32it/s][A
Validation DataLoader 0:   1%|          | 3/394 [00:00<00:01, 284.21it/s][A
Validation DataLoader 0:   1%|          | 4/394 [00:00<00:01, 290.81it/s][A
Validation DataLoader 0:   1%|▏         | 5/394 [00:00<00:01, 287.44it/s][A
Validation DataLoader 0:   2%|▏         | 6/394 [00:00<00:01, 285.73it/s][A
Validation DataLoader 0:   2%|▏         | 7/394 [00:00<00:01, 289.27it/s][A
Validation DataLoader 0:   2%|▏         | 8/394 [00:00<00:01, 278.39it/s][A
Validation DataLoader 0:   2%|▏         | 9/394 [00:00<00:01, 277.69it/s][A
Validation DataLoader 0:  

Metric val_loss improved by 0.003 >= min_delta = 0.0. New best score: 0.117


Epoch 13: 100%|██████████| 1838/1838 [00:07<00:00, 234.22it/s, v_num=11, val_loss=0.117, val_accuracy=0.964]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/394 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/394 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 1/394 [00:00<00:01, 242.47it/s][A
Validation DataLoader 0:   1%|          | 2/394 [00:00<00:01, 231.37it/s][A
Validation DataLoader 0:   1%|          | 3/394 [00:00<00:01, 249.50it/s][A
Validation DataLoader 0:   1%|          | 4/394 [00:00<00:01, 248.84it/s][A
Validation DataLoader 0:   1%|▏         | 5/394 [00:00<00:01, 254.93it/s][A
Validation DataLoader 0:   2%|▏         | 6/394 [00:00<00:01, 261.69it/s][A
Validation DataLoader 0:   2%|▏         | 7/394 [00:00<00:01, 266.22it/s][A
Validation DataLoader 0:   2%|▏         | 8/394 [00:00<00:01, 266.47it/s][A
Validation DataLoader 0:   2%|▏         | 9/394 [00:00<00:01, 270.44it/s][A
Validation DataLoader 0:  

Metric val_loss improved by 0.006 >= min_delta = 0.0. New best score: 0.111
`Trainer.fit` stopped: `max_epochs=15` reached.


Epoch 14: 100%|██████████| 1838/1838 [00:10<00:00, 176.08it/s, v_num=11, val_loss=0.111, val_accuracy=0.967]


In [273]:
best_model_path = checkpoints_callback.best_model_path
print(best_model_path)
best_model = DigitClass.load_from_checkpoint(best_model_path)

/Users/mahadiur/Desktop/Experiment Track Using MLFlow/Notebook/lightning_logs/version_11/checkpoints/epoch=14-step=27570.ckpt


In [274]:
score = Training.test(
    model=best_model,
    dataloaders=Test_Dataloader,
)

/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:433: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=9` in the `DataLoader` to improve performance.


Testing DataLoader 0:   8%|▊         | 32/394 [00:00<00:01, 182.59it/s]

  labels = int(example[0:1])


Testing DataLoader 0: 100%|██████████| 394/394 [00:02<00:00, 187.28it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      test_accuracy         0.9660371541976929
        test_loss           0.10710974782705307
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


# **Use MLFlow for track experiment**

In [275]:
''' Set Experiment name '''
mlflow.set_experiment('Deep_Learning')

<Experiment: artifact_location='file:///Users/mahadiur/Desktop/Experiment%20Track%20Using%20MLFlow/Notebook/mlruns/414246106038771448', creation_time=1756301445107, experiment_id='414246106038771448', last_update_time=1756301445107, lifecycle_stage='active', name='Deep_Learning', tags={}>

In [276]:
with mlflow.start_run():
    # Save Model Hyperparameters
    mlflow.log_param('Learning Rate', Learning_Rate)
    mlflow.log_param('Epochs', Epochs)
    mlflow.log_param('Batch Size', Batch_size)

    # Training Model
    Training.fit(
        model=Model,
        train_dataloaders=Train_Dataloader,
        val_dataloaders=Validation_Dataloader,
    )

    # Save Best Model
    best_model_path = checkpoints_callback.best_model_path
    print(best_model_path)
    best_model = DigitClass.load_from_checkpoint(best_model_path)

    # Evaluate Model
    score = Training.test(
        model=best_model,
        dataloaders=Test_Dataloader,
    )

    # Save Model Test loss & Test Accuracy
    mlflow.log_metric('test_accuracy', score[0]['test_accuracy'])
    mlflow.log_metric('test_loss', score[0]['test_loss'])

    # Save Model
    pixels_batch = next(iter(Test_Dataloader))[0]
    pixels_batch = pixels_batch.cpu().numpy()

    signature = infer_signature(Model, pixels_batch)

    mlflow.pytorch.log_model(
        pytorch_model=best_model,
        artifact_path=saved_model_dir,
        input_example=pixels_batch,
        signature=signature,
    )

    import shutil
    shutil.copyfile(source_code_path,source_code)
    mlflow.log_artifact(source_code)

/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:701: Checkpoint directory /Users/mahadiur/Desktop/Experiment Track Using MLFlow/Notebook/lightning_logs/version_11/checkpoints exists and is not empty.

  | Name      | Type             | Params | Mode 
-------------------------------------------------------
0 | criterion | CrossEntropyLoss | 0      | train
1 | layer1    | Linear           | 100 K  | train
2 | layer2    | Linear           | 4.1 K  | train
3 | layer3    | Linear           | 330    | train
4 | relu      | ReLU             | 0      | train
-------------------------------------------------------
104 K     Trainable params
0         Non-trainable params
104 K     Total params
0.420     Total estimated model params size (MB)
5         Modules in train mode
0         Modules in eval mode


                                                                            

  labels = int(example[0:1])
`Trainer.fit` stopped: `max_epochs=15` reached.


/Users/mahadiur/Desktop/Experiment Track Using MLFlow/Notebook/lightning_logs/version_11/checkpoints/epoch=14-step=27570.ckpt
Testing DataLoader 0: 100%|██████████| 394/394 [00:01<00:00, 261.33it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      test_accuracy         0.9660371541976929
        test_loss           0.10710974782705307
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


  "inputs": [
    [
      [
        [
          .... Alternatively, you can avoid passing input example and pass model signature instead when logging the model. To ensure the input example is valid prior to serving, please try calling `mlflow.models.validate_serving_input` on the model uri and serving input example. A serving input example can be generated from model input example using `mlflow.models.convert_input_example_to_serving_input` function.
Got error: setting an array element with a sequence.


In [277]:
''' Launch the experiment '''
print(f'mlflow ui --backend-store-uri {mlflow.get_tracking_uri()}')

mlflow ui --backend-store-uri file:///Users/mahadiur/Desktop/Experiment%20Track%20Using%20MLFlow/Notebook/mlruns


### **Thank You**