# Sparsify Model

Sparsification is the process of taking a trained deep learning model and removing redundant information from the overprecise and over-parameterized network resulting in a faster and smaller model. Techniques for sparsification are all encompassing including everything from inducing sparsity using pruning and quantization to enabling naturally occurring sparsity using activation sparsity or winograd/FFT . When implemented correctly, these techniques result in significantly more performant and smaller models with limited to no effect on the baseline metrics. [1]

References:
- [1] https://neuralmagic.com/ 

## Import libraries

In [3]:
import os
import time
import copy
import random
from datetime import datetime
from pathlib import Path

import torch
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
import torch.nn as nn
from torch import optim
from torch.autograd import Variable


from sparseml.pytorch.optim import (
    ScheduledModifierManager,
)
from sparseml.pytorch.utils import ModuleExporter

## Preparing data for training with DataLoaders

In [6]:
train_data = datasets.MNIST(
    root = '../../data/raw/pytorch-mnist-dataset',
    train = True,
    transform = ToTensor(),
)
test_data = datasets.MNIST(
    root = '../../data/raw/pytorch-mnist-dataset', 
    train = False, 
    transform = ToTensor(),
)

In [7]:
# setup data loaders
dataloaders = {
    'train' : torch.utils.data.DataLoader(train_data, 
                                          batch_size=128, 
                                          shuffle=True, 
                                          num_workers=1),
    
    'test'  : torch.utils.data.DataLoader(test_data, 
                                          batch_size=128, 
                                          shuffle=True, 
                                          num_workers=1),
}

## Set parameters

In [8]:
# MNIST dataset parameters.
num_classes = 10  # total classes (0-9 digits).

## Create CNN Model

In [9]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(         
            nn.Conv2d(
                in_channels=1,              
                out_channels=32,            
                kernel_size=5,              
                stride=1,                   
                padding=2,                  
            ),                              
            nn.ReLU(),                      
            nn.MaxPool2d(kernel_size=2),    
        )
        self.conv2 = nn.Sequential(         
            nn.Conv2d(
                in_channels=32,              
                out_channels=64,            
                kernel_size=5,              
                stride=1,                   
                padding=2
            ),     
            nn.ReLU(),                      
            nn.MaxPool2d(kernel_size=2),                
        )

        # fully connected layer, output 10 classes
        self.out = nn.Linear(64 * 7 * 7, num_classes)
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        # flatten the output of conv2 to (batch_size, 32 * 7 * 7)
        x = x.view(x.size(0), -1)       
        output = self.out(x)
        return output, x    # return x for visualization

In [11]:
cnn = CNN()
print(cnn)

CNN(
  (conv1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (out): Linear(in_features=3136, out_features=10, bias=True)
)


In [12]:
# setup loss function and optimizer, LR will be overriden by sparseml
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(cnn.parameters(), lr = 0.01)   

In [13]:
# create ScheduledModifierManager and Optimizer wrapper
# Path to data
directory_path = Path.cwd().parents[1]
trained_model_path = directory_path.joinpath(
    str(os.environ.get("TRAINED_MODEL_PATH", "models"))
)

manager = ScheduledModifierManager.from_yaml(f'{trained_model_path}/pytorch-nm-mnist-recipe.yaml')
optimizer = manager.modify(cnn, optimizer, steps_per_epoch=len(dataloaders['train']))

## Train model with sparseML

In [14]:
def train_model(
    model, loaders, criterion, optimizer, num_epochs=1
):

    model.train()

    # Train the model
    total_step = len(loaders['train'])

    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(loaders['train']):

            # gives batch data, normalize x when iterate train_loader
            b_x = Variable(images)   # batch x
            b_y = Variable(labels)   # batch y
            output = model(b_x)[0]
            loss = criterion(output, b_y)
            
            # clear gradients for this training step   
            optimizer.zero_grad()
            
            # backpropagation, compute gradients 
            loss.backward()
            # apply gradients
            optimizer.step()
            
            if (i+1) % 100 == 0:
                print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                       .format(epoch + 1, num_epochs, i + 1, total_step, loss.item()))
                pass
        
            pass
    
        pass

In [15]:
train_model(model=cnn, loaders=dataloaders, criterion=criterion, optimizer=optimizer)

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Epoch [1/1], Step [100/469], Loss: 0.1142
Epoch [1/1], Step [200/469], Loss: 0.1555
Epoch [1/1], Step [300/469], Loss: 0.0101
Epoch [1/1], Step [400/469], Loss: 0.1120


## Evaluate Model

In [16]:
def test(model, loaders):
    # Test the model
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in loaders['test']:
            test_output, last_layer = cnn(images)
            pred_y = torch.max(test_output, 1)[1].data.squeeze()
            accuracy = (pred_y == labels).sum().item() / float(labels.size(0))
            pass
        
    print('Test Accuracy of the model on the 10000 test images: %.2f' % accuracy)
    
    pass

In [17]:
test(model=cnn, loaders=dataloaders,)

Test Accuracy of the model on the 10000 test images: 1.00


In [18]:
manager.finalize(cnn)

## View Model Sparsity

To see the effects of the model pruning, in this step, you will print out the sparsities of each Conv and FC layer in your model.

In [19]:
from sparseml.pytorch.utils import get_prunable_layers, tensor_sparsity

# print sparsities of each layer
for (name, layer) in get_prunable_layers(cnn):
    print("{}.weight: {:.4f}".format(name, tensor_sparsity(layer.weight).item()))

conv1.0.weight: 0.0000
conv2.0.weight: 0.0000
out.weight: 0.0000


## Save Model and Export to ONNX

Now that the model is fully recalibrated, you need to export it to an ONNX format, which is the format used by the DeepSparse Engine. For PyTorch, exporting to ONNX is natively supported. In the cell block below, a convenience class, ModuleExporter(), is used to handle exporting.

Once the model is saved as an ONNX ﬁle, it is ready to be used for inference with the DeepSparse Engine. For saving a custom model, you can override the sample batch for ONNX graph freezing and locations to save to.

In [20]:
use_ceph = bool(int(os.getenv("USE_CEPH", 0)))
automation = bool(int(os.getenv("AUTOMATION", 0)))


time_version = f"torch-{datetime.now():%y%m%d%H%M%S}-{random.getrandbits(64):08x}"

# Path to data
directory_path = Path.cwd().parents[0]
trained_model_path = directory_path.joinpath(
    str(os.environ.get("TRAINED_MODEL_PATH", "../models"))
)

exporter = ModuleExporter(cnn, output_dir=trained_model_path)
# exporter.export_pytorch(name="mnist_classification_pruned.pth")
exporter.export_onnx(torch.randn(1, 1, 28, 28), name=f"{time_version}_mnist_classification_pruned.onnx")

In [21]:
if automation or use_ceph:
    # Download files from S3
    s3_endpoint_url = os.environ["OBJECT_STORAGE_ENDPOINT_URL"]
    s3_access_key = os.environ["AWS_ACCESS_KEY_ID"]
    s3_secret_key = os.environ["AWS_SECRET_ACCESS_KEY"]
    s3_bucket = os.environ["OBJECT_STORAGE_BUCKET_NAME"]

    # Create an S3 client
    s3 = boto3.client(
        service_name="s3",
        aws_access_key_id=s3_access_key,
        aws_secret_access_key=s3_secret_key,
        endpoint_url=s3_endpoint_url,
    )

    p = Path(f"{trained_model_path}/{time_version}_model_pruned.onnx")
    key = f"{project_name}/models{p}"
    print(key)
    s3.upload_file(Bucket=s3_bucket, Key=key, Filename=str(p))