# **Experiment Track with using MLFlow**

In [31]:
''' Import all important libraries '''
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset, random_split
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
import pytorch_lightning as pl
from mlflow.models import infer_signature

In [32]:
torch.manual_seed(40)
Device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using Device: {Device}')

Using Device: cpu


In [33]:
''' Load data and define Source code path '''
Root_path = '/Users/mahadiur/Desktop/Experiment Track Using MLFlow/Data'
dataset_path = os.path.join(Root_path, 'DigitDataset.csv')

saved_model_dir = 'models'
source_code_path = os.path.join(
    os.getcwd(),
    'Experiment_Track_Using_MLFlow.ipynb'
) # current file path

source_code = 'trainer.ipynb'

# **Data Pipeline**

In [34]:
class DataPipeline(Dataset):
    def __init__(self, data_path, transform=None):
        super().__init__()
        self.data = pd.read_csv(data_path)
        self.transformation = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        pixels = self.data[idx].values[1:].astype('float32')
        labels = self.data[idx].values[0:1]

        pixels = torch.tensor(pixels)
        labels = torch.tensor(labels)

        pixels = pixels.reshape(28, 28).unsqueeze(0) / 255.0

        if self.transformation:
            pixels = self.transformation(pixels)

        return  pixels, labels

In [35]:
Transformation = transforms.Compose([
    transforms.Normalize(
        mean=(torch.tensor([0.1307])),
        std=(torch.tensor([0.3081])),
    )
])

In [36]:
dataset = DataPipeline(
    dataset_path,
    transform=Transformation
)

print(len(dataset))

42000


# **Split Train, Test & Validation**

In [37]:
Train_size = int(0.7 * len(dataset))
Validation_size = int(0.15 * len(dataset))
Test_size = len(dataset) - Train_size - Validation_size

Training_dataset, Validation_dataset, Test_dataset = random_split(
    dataset=dataset,
    lengths=[Train_size, Validation_size, Test_size],
)

print(len(Training_dataset))
print(len(Validation_dataset))
print(len(Test_dataset))

29399
6300
6301


# **Dataloader for Train, Test & Validation**

In [38]:
Train_Dataloader = DataLoader(
    dataset=Training_dataset,
    batch_size=32,
    shuffle=True,
)

Test_Dataloader = DataLoader(
    dataset=Test_dataset,
    batch_size=32,
    shuffle=False,
)

Validation_Dataloader = DataLoader(
    dataset=Validation_dataset,
    batch_size=32,
    shuffle=False,
)