# Train model with Pytorch

## Import libraries

In [47]:
import boto3
import os
from datetime import datetime

import random
from pathlib import Path

import torch

from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
import torch.nn as nn
from torch import optim
from torch.autograd import Variable
from torchinfo import summary

## Preparing data for training with DataLoaders

The Dataset retrieves our dataset’s features and labels one sample at a time. While training a model, we typically want to pass samples in “minibatches”, reshuffle the data at every epoch to reduce model overfitting, and use Python’s multiprocessing to speed up data retrieval.
DataLoader is an iterable that abstracts this complexity for us in an easy API.

In [48]:
# Prepare MNIST data.
train_data = datasets.MNIST(
    root = '../../data/raw/pytorch-mnist-dataset',
    train = True,
    transform = ToTensor(),
)
test_data = datasets.MNIST(
    root = '../../data/raw/pytorch-mnist-dataset', 
    train = False, 
    transform = ToTensor(),
)

In [49]:
loaders = {
    'train' : torch.utils.data.DataLoader(train_data, 
                                          batch_size=128, 
                                          shuffle=True, 
                                          num_workers=1),
    
    'test'  : torch.utils.data.DataLoader(test_data, 
                                          batch_size=128, 
                                          shuffle=True, 
                                          num_workers=1),
}
loaders

{'train': <torch.utils.data.dataloader.DataLoader at 0x7f9937d147c0>,
 'test': <torch.utils.data.dataloader.DataLoader at 0x7f9937d14df0>}

## Define the Convolutional Neural Network model

## Set parameters

In [50]:
# MNIST dataset parameters.
num_classes = 10  # total classes (0-9 digits).
batch_size = 128

## Create CNN Model

In [72]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(         
            nn.Conv2d(
                in_channels=1,              
                out_channels=32,            
                kernel_size=5,              
                stride=1,                   
                padding=2,                  
            ),                              
            nn.ReLU(),                      
            nn.MaxPool2d(kernel_size=2),    
        )
        self.conv2 = nn.Sequential(         
            nn.Conv2d(
                in_channels=32,              
                out_channels=64,            
                kernel_size=5,              
                stride=1,                   
                padding=2
            ),     
            nn.ReLU(),                      
            nn.MaxPool2d(kernel_size=2),                
        )
#         self.drop = nn.Dropout(0.5)
        
        # fully connected layer, output 10 classes
        self.out = nn.Linear(64 * 7 * 7, num_classes)
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        # flatten the output of conv2 to (batch_size, 64 * 7 * 7)
        x = x.view(x.size(0), -1)      
#         x = self.drop(x)
        output = self.out(x)
        return output, x    # return x for visualization

In [73]:
cnn = CNN()
print(cnn)

CNN(
  (conv1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (out): Linear(in_features=3136, out_features=10, bias=True)
)


In [74]:
summary(cnn, input_size=(batch_size, 1, 28, 28))

Layer (type:depth-idx)                   Output Shape              Param #
CNN                                      --                        --
├─Sequential: 1-1                        [128, 32, 14, 14]         --
│    └─Conv2d: 2-1                       [128, 32, 28, 28]         832
│    └─ReLU: 2-2                         [128, 32, 28, 28]         --
│    └─MaxPool2d: 2-3                    [128, 32, 14, 14]         --
├─Sequential: 1-2                        [128, 64, 7, 7]           --
│    └─Conv2d: 2-4                       [128, 64, 14, 14]         51,264
│    └─ReLU: 2-5                         [128, 64, 14, 14]         --
│    └─MaxPool2d: 2-6                    [128, 64, 7, 7]           --
├─Linear: 1-3                            [128, 10]                 31,370
Total params: 83,466
Trainable params: 83,466
Non-trainable params: 0
Total mult-adds (G): 1.37
Input size (MB): 0.40
Forward/backward pass size (MB): 38.55
Params size (MB): 0.33
Estimated Total Size (MB): 39.28

## Define the loss function

In [75]:
loss_func = nn.CrossEntropyLoss()
loss_func

CrossEntropyLoss()

## Define a Optimization Function

In [76]:
optimizer = optim.Adam(cnn.parameters(), lr = 0.01)
optimizer

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.01
    weight_decay: 0
)

## Train the model

In [77]:
num_epochs = 1

def train(num_epochs, cnn, loaders, loss_func, optimizer):

    cnn.train()

    # Train the model
    total_step = len(loaders['train'])

    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(loaders['train']):
            
            # gives batch data, normalize x when iterate train_loader
            b_x = Variable(images)   # batch x
            b_y = Variable(labels)   # batch y
            output = cnn(b_x)[0]
            loss = loss_func(output, b_y)
            
            # clear gradients for this training step
            optimizer.zero_grad()
            
            # backpropagation, compute gradients 
            loss.backward()
            # apply gradients
            optimizer.step()
            
            if (i+1) % 100 == 0:
                print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                       .format(epoch + 1, num_epochs, i + 1, total_step, loss.item()))
                pass
        
            pass
    
        pass

In [78]:
train(num_epochs, cnn, loaders, loss_func, optimizer)

Epoch [1/1], Step [100/469], Loss: 0.1514
Epoch [1/1], Step [200/469], Loss: 0.0564
Epoch [1/1], Step [300/469], Loss: 0.0237
Epoch [1/1], Step [400/469], Loss: 0.0872


## Evaluate the model on test data

In [81]:
def test():
    # Test the model
    cnn.eval()

    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in loaders['test']:
            test_output, last_layer = cnn(images)
            pred_y = torch.max(test_output, 1)[1].data.squeeze()
            accuracy = (pred_y == labels).sum().item() / float(labels.size(0))
            pass
        
    print('Test Accuracy of the model on the 10000 test images: %.2f' % accuracy)
    
    pass

In [82]:
test()

Test Accuracy of the model on the 10000 test images: 0.94


## Store trained model

In [83]:
use_ceph = bool(int(os.getenv("USE_CEPH", 0)))
automation = bool(int(os.getenv("AUTOMATION", 0)))

time_version = f"torch-{datetime.now():%y%m%d%H%M%S}-{random.getrandbits(64):08x}"

# Path to data
directory_path = Path.cwd().parents[0]
trained_model_path = directory_path.joinpath(
    str(os.environ.get("TRAINED_MODEL_PATH", "../models"))
)

if not os.path.exists(f"{trained_model_path}/{time_version}"):
    Path(f"{trained_model_path}/{time_version}").mkdir(parents=True, exist_ok=True)


torch.save(cnn.state_dict(),f"{trained_model_path}/{time_version}/pytorch_model.pt")

if automation or use_ceph:
    # Download files from S3
    s3_endpoint_url = os.environ["OBJECT_STORAGE_ENDPOINT_URL"]
    s3_access_key = os.environ["AWS_ACCESS_KEY_ID"]
    s3_secret_key = os.environ["AWS_SECRET_ACCESS_KEY"]
    s3_bucket = os.environ["OBJECT_STORAGE_BUCKET_NAME"]

    # Create an S3 client
    s3 = boto3.client(
        service_name="s3",
        aws_access_key_id=s3_access_key,
        aws_secret_access_key=s3_secret_key,
        endpoint_url=s3_endpoint_url,
    )

    p = Path(f"{trained_model_path}/{time_version}/pytorch_model.pt")
    key = f"{project_name}/models{p}"
    print(key)
    s3.upload_file(Bucket=s3_bucket, Key=key, Filename=f"{trained_model_path}/{time_version}/pytorch_model.pt")