In [1]:
import torch 
import torch.nn as nn
from torch.utils import data

import torchvision
import torchvision.datasets

import sklearn
from sklearn.metrics import roc_auc_score, accuracy_score

import numpy as np 

import mlflow 
import mlflow.pytorch

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

  from .autonotebook import tqdm as notebook_tqdm
  del sys.path[0]


In [3]:
print("PyTorch: {}".format(torch.__version__))
print("torchvision: {}".format(torchvision.__version__))
print("sklearn: {}".format(sklearn.__version__))
print("MLFlow: {}".format(mlflow.__version__))
print("Numpy: {}".format(np.__version__))
print("Device: ", device)   # tells PyTorch which device to run the code on



PyTorch: 1.6.0
torchvision: 0.7.0
sklearn: 0.22.1
MLFlow: 1.23.1
Numpy: 1.19.5
Device:  cpu


In [4]:
# define basic hyperparameters

batch_size = 256
num_classes = 10 
learning_rate = 0.001

In [5]:
# load the MNIST dataset - included as example dataset
# defining the training and testing sets by loading the data from PyTorch
train_set = torchvision.datasets.MNIST(root='./data', 
                                       train=True, download=True, transform=None)
test_set = torchvision.datasets.MNIST(root='./data', 
                                      train=False, download=True, transform=None)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data\MNIST\raw\train-images-idx3-ubyte.gz


9920512it [00:21, 471491.91it/s]                             


Extracting ./data\MNIST\raw\train-images-idx3-ubyte.gz to ./data\MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data\MNIST\raw\train-labels-idx1-ubyte.gz


32768it [00:00, 36172.59it/s]            


Extracting ./data\MNIST\raw\train-labels-idx1-ubyte.gz to ./data\MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data\MNIST\raw\t10k-images-idx3-ubyte.gz


1654784it [00:04, 391775.53it/s]                             


Extracting ./data\MNIST\raw\t10k-images-idx3-ubyte.gz to ./data\MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz


8192it [00:00, 12700.57it/s]            
  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


Extracting ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw
Processing...
Done!


In [6]:
# define x_train, y_train, x_test, and y_test datasets from the training & testing sets 

x_train, y_train = train_set.data, train_set.targets
x_test, y_test = test_set.data, test_set.targets

Now, you'd want the data to be in channels first. `format(m, c, h, w)` 
* `m` -> number of samples
* `w` -> width of the samples   
`opposite` format of how Keras and TF2.0 would have


In [11]:
print(f"x_train shape: {x_train.shape} \n x_test shape: {x_test.shape}")

x_train shape: torch.Size([60000, 28, 28]) 
 x_test shape: torch.Size([10000, 28, 28])


In [12]:
x_train = x_train.reshape(x_train.shape[0], 1, x_train.shape[1], x_train.shape[2])
x_test = x_test.reshape(x_test.shape[0], 1, x_test.shape[1], x_test.shape[2])


In [13]:
print(f"x_train shape: {x_train.shape} \n x_test shape: {x_test.shape}")

x_train shape: torch.Size([60000, 1, 28, 28]) 
 x_test shape: torch.Size([10000, 1, 28, 28])


* reshaping the x-sets to encode the data in a `channels-first format (1 is the channel)` which is different to keras/tf

In [14]:
y_train[0]   # output of first sample in the y_train set - not in one-hot encoded format

tensor(5)

* outputs a number not a vector. In keras/tf we were using `keras.utils.to_categorical()`. here will have to create a function

In [16]:
def to_one_hot(num_classes, labels): 
    one_hot = torch.zeros(([labels.shape[0], num_classes]))
    for f in range(len(labels)): 
        one_hot[f][labels[f]] = 1

    return one_hot

* A custom function that converts the input called `labels`, given the number of classes, into a one-hot encoded format and returns it

In [17]:
# convert your y-sets into one-hot encoded format

y_train = to_one_hot(num_classes, y_train)
y_test = to_one_hot(num_classes, y_test)


In [18]:
y_train[0]  # tensor now converted into one-hot encoded format

tensor([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.])

In [19]:
print("Shapes")
print("x_train: {}\ny_train: {}".format(x_train.shape, y_train.shape))
print("x_test: {}\ny_test: {}".format(x_test.shape, y_test.shape))


Shapes
x_train: torch.Size([60000, 1, 28, 28])
y_train: torch.Size([60000, 10])
x_test: torch.Size([10000, 1, 28, 28])
y_test: torch.Size([10000, 10])


* x-sets in `channels-first` format and y-sets in `one-hot encoded` format

### MLFlow Run - Training and Evaluating

In [23]:
# define your model architecture as a class

class model(nn.Module): 
    def __init__(self):
        super(model, self).__init__()

        # IN 1x28x28 OUT 16x14x14
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, stride=2, 
                               padding=1, dilation=1)
        # IN 16x14x14 OUT 32x6x6
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=2, 
                               padding=0, dilation=1)
        # IN 32x6x6 OUT 64x2x2
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=2, 
                               padding=0, dilation=1)
         # IN 64x2x2 OUT 256
        self.flat1 = nn.Flatten()
        self.dense1 = nn.Linear(in_features=256, 
                                out_features=128)
        self.dense2 = nn.Linear(in_features=128, 
                                out_features=64)
        self.dense3 = nn.Linear(in_features=64, 
                                out_features=10)
        
    def forward(self, x):
       x = self.conv1(x)
       x = nn.ReLU()(x)
       x = self.conv2(x)
       x = nn.ReLU()(x)
       x = self.conv3(x)
       x = nn.ReLU()(x)
       x = self.flat1(x)
       x = self.dense1(x)
       x = nn.ReLU()(x)
       x = self.dense2(x)
       x = nn.ReLU()(x)
       x = self.dense3(x)
       x = nn.Softmax()(x)
       return x

        


In [24]:
# send the model to the device
model = model().to(device)
optimizer = torch.optim.Adam(model.parameters(), 
                             lr=learning_rate)     # defining an adam optimizer with lr
criterion = nn.BCELoss()  # initializing the loss

In [25]:
# defining a data loader to take care of batching your data set
dataset = data.TensorDataset(x_train, y_train)
train_loader = data.DataLoader(dataset, batch_size=batch_size)

* creating a data loader object out of your data set -> PyTorch batches your data set for you, allowing you to pass in a `minibatch at a time` in your training loop. This essentially is what the tf2.0/Keras.fit() function does, but it's all abstracted for you

In [27]:
# define a training loop 
num_epochs = 5
for f in range(num_epochs):
    for batch_num, minibatch in enumerate(train_loader):
        minibatch_x, minibatch_y = minibatch[0], minibatch[1]

        output = model.forward(torch.Tensor(minibatch_x.float()))
        loss = criterion(output, torch.Tensor(minibatch_y.float()))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        print(f"Epoch {f} Batch_Num {batch_num} Loss {loss}")



Epoch 0 Batch_Num 0 Loss 0.3306376039981842
Epoch 0 Batch_Num 1 Loss 0.3082179129123688
Epoch 0 Batch_Num 2 Loss 0.29878902435302734
Epoch 0 Batch_Num 3 Loss 0.2863521873950958
Epoch 0 Batch_Num 4 Loss 0.27009230852127075
Epoch 0 Batch_Num 5 Loss 0.24328410625457764
Epoch 0 Batch_Num 6 Loss 0.21504172682762146
Epoch 0 Batch_Num 7 Loss 0.18875226378440857
Epoch 0 Batch_Num 8 Loss 0.16653653979301453
Epoch 0 Batch_Num 9 Loss 0.15639765560626984
Epoch 0 Batch_Num 10 Loss 0.1510590761899948
Epoch 0 Batch_Num 11 Loss 0.1324325054883957
Epoch 0 Batch_Num 12 Loss 0.1265551894903183
Epoch 0 Batch_Num 13 Loss 0.12236194312572479
Epoch 0 Batch_Num 14 Loss 0.12856902182102203
Epoch 0 Batch_Num 15 Loss 0.12779393792152405
Epoch 0 Batch_Num 16 Loss 0.10219927132129669
Epoch 0 Batch_Num 17 Loss 0.09441255033016205
Epoch 0 Batch_Num 18 Loss 0.11018530279397964
Epoch 0 Batch_Num 19 Loss 0.09857948869466782
Epoch 0 Batch_Num 20 Loss 0.09449602663516998
Epoch 0 Batch_Num 21 Loss 0.07608035951852798
Epoc

In [29]:
# start an MLFlow run and log everything 
mlflow.set_experiment("PyTorch_MNIST")

with mlflow.start_run(): 
    preds = model.forward(torch.Tensor(x_test.float()))
    preds = np.round(preds.detach().cpu().numpy()) # there's no need to detach & move to cpu
                                            #   since i'm using cpu 

    eval_acc = accuracy_score(y_test, preds)
    auc_score = roc_auc_score(y_test, preds)

    mlflow.log_param("batch_size", batch_size)
    mlflow.log_param("num_epochs", num_epochs)
    mlflow.log_param("learning_rate", learning_rate)

    mlflow.log_metric("eval_acc", eval_acc)
    mlflow.log_metric("auc_score", auc_score)

    print("eval_acc: ", eval_acc)
    print("auc_score: ", auc_score)

    mlflow.pytorch.log_model(model, "PyTorch_MNIST")
mlflow.end_run()



eval_acc:  0.9758
auc_score:  0.986748232709577


### Loading an MLFlow Model

In [31]:
loaded_model = mlflow.pytorch.load_model("runs:/47c97f305d1045e7a4949b406f593514/PyTorch_MNIST")

In [32]:
# make predictions and calculate metrics
preds = loaded_model.forward(torch.Tensor(x_test.float()))
preds = np.round(preds.detach().cpu().numpy())
eval_acc = accuracy_score(y_test, preds)
auc_score = roc_auc_score(y_test, preds)

print("eval_acc: ", eval_acc)
print("auc_score: ", auc_score)

eval_acc:  0.9758
auc_score:  0.986748232709577




* output of calculating the evaluation metrics from earlier but with logged model. 
* the `scores match`