# **Experiment Track with using MLFlow**

In [46]:
from pytorch_lightning.utilities.types import OptimizerLRScheduler
from werkzeug.serving import load_ssl_context

''' Import all important libraries '''
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset, random_split
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
import pytorch_lightning as pl
from mlflow.models import infer_signature

In [47]:
torch.manual_seed(40)
Device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using Device: {Device}')

Using Device: cpu


In [48]:
''' Load data and define Source code path '''
Root_path = '/Users/mahadiur/Desktop/Experiment Track Using MLFlow/Data'
dataset_path = os.path.join(Root_path, 'DigitDataset.csv')

saved_model_dir = 'models'
source_code_path = os.path.join(
    os.getcwd(),
    'Experiment_Track_Using_MLFlow.ipynb'
) # current file path

source_code = 'trainer.ipynb'

# **Data Pipeline**

In [49]:
digit_data = pd.read_csv(dataset_path)
idx = 0
pixel = digit_data.iloc[idx].values[1:]
label = digit_data.iloc[idx].values[0:1]
print(pixel.shape)
print(label.shape)

(784,)
(1,)


In [50]:
class DataPipeline(Dataset):
    def __init__(self, data_path, transform=None):
        super().__init__()
        self.data = pd.read_csv(data_path)
        self.transformation = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        pixels = self.data.iloc[idx].values[1:].astype('float32')
        labels = self.data.iloc[idx].values[0:1]

        pixels = torch.tensor(pixels)
        labels = torch.tensor(labels)

        pixels = pixels.reshape(28, 28).unsqueeze(0) / 255.0

        if self.transformation:
            pixels = self.transformation(pixels)

        return  pixels, labels

In [51]:
Transformation = transforms.Compose([
    transforms.Normalize(
        mean=(torch.tensor([0.1307])),
        std=(torch.tensor([0.3081])),
    )
])

In [52]:
dataset = DataPipeline(
    dataset_path,
    transform=Transformation
)

print(len(dataset))

42000


# **Split Train, Test & Validation**

In [53]:
Train_size = int(0.7 * len(dataset))
Validation_size = int(0.15 * len(dataset))
Test_size = len(dataset) - Train_size - Validation_size

Training_dataset, Validation_dataset, Test_dataset = random_split(
    dataset=dataset,
    lengths=[Train_size, Validation_size, Test_size],
)

print(len(Training_dataset))
print(len(Validation_dataset))
print(len(Test_dataset))

29399
6300
6301


# **Dataloader for Train, Test & Validation**

In [54]:
Train_Dataloader = DataLoader(
    dataset=Training_dataset,
    batch_size=32,
    shuffle=True,
)

Test_Dataloader = DataLoader(
    dataset=Test_dataset,
    batch_size=32,
    shuffle=False,
)

Validation_Dataloader = DataLoader(
    dataset=Validation_dataset,
    batch_size=32,
    shuffle=False,
)

In [56]:
for pixels, labels in Train_Dataloader:
    print(pixels.shape)
    print(labels.shape)
    break

torch.Size([32, 1, 28, 28])
torch.Size([32, 1])


# **DigitClassifiar class**

In [57]:
class DigitClass(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.criterion = nn.CrossEntropyLoss()
        self.layer1 = nn.Linear(28 * 28, 128)
        self.layer2 = nn.Linear(128, 10)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = x.view(-1, 28 * 28)
        x = self.layer1(x)
        x = self.relu(x)
        x = self.layer2(x)
        return x

    # Gradient Decent
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=0.001)
        return optimizer

    # Training Step
    def training_step(self, batch, batch_idx):
        Training_loss = []
        pixels, labels = batch
        pixels, labels = pixels.to(Device), labels.to(Device)
        outputs = self.forward(pixels)
        loss = self.criterion(outputs, labels)
        Training_loss.append(loss.item())
        self.log('Train_loss', loss)
        return loss

    # Validation Step
    def validation_step(self, batch, batch_idx):
        pixels, labels = batch
        pixels, labels = pixels.to(Device), labels.to(Device)
        outputs = self.forward(pixels)
        loss = self.criterion(outputs, labels)
        accuracy = (torch.argmax(outputs, dim=1) == labels).float().mean()
        self.log('val_loss', loss)
        self.log('val_accuracy', accuracy)

    # Test Step
    def test_step(self, batch, batch_idx):
        pixels, labels= batch
        pixels, labels = pixels.to(Device), labels.to(Device)
        outputs = self.forward(pixels)
        loss = self.criterion(outputs, labels)
        accuracy = (torch.argmax(outputs, dim=1) == labels).float().mean()
        self.log('test_loss', loss)
        self.log('test_accuracy', accuracy)

In [58]:
Model = DigitClass()