# VGG16 reduction tutorial

In this tutorial we will present how to create a reduced version of VGG16 using the techniques described in the article ''A Dimensionality Reduction Approach for Convolutional Neural Networks'', Meneghetti L., Demo N., Rozza G., https://arxiv.org/abs/2110.09163 (2021)


### IMPORTS

In [1]:
import torch
import numpy as np
import torchvision
from torch import nn
import sys
import os

import torchvision.transforms as transforms
import torchvision.datasets as datasets
import pandas as pd
import torch.optim as optim

from smithers.ml.vgg import VGG
from smithers.ml.utils import get_seq_model

import matplotlib.pyplot as plt

from smithers.ml.utils import randomized_range_finder
from smithers.ml.utils import randomized_svd



import warnings
warnings.filterwarnings("ignore")


torch.cuda.empty_cache()
import datetime
import time

### SETTING PROPER DEVICE

In [2]:
sys.path.insert(0, '../')
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

### VGG INSTANTIATION

In [3]:
VGGnet = VGG(    cfg=None,
                 classifier='cifar',
                 batch_norm=False,
                 num_classes=10,
                 init_weights=False,
                 pretrain_weights=None)
VGGnet = VGGnet.to(device) #MODIF
VGGnet.make_layers()
VGGnet._initialize_weights()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(VGGnet.parameters(), lr=0.001, momentum=0.9)


Loaded base model.



### IMPORT & EXPORT FUNCTIONS

In [4]:
def save_checkpoint_torch(epoch, model, path, optimizer):
    torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict()
            }, path)


def load_checkpoint(model, checkpoint_path):
    model.load_state_dict(torch.load(checkpoint_path)['model_state_dict'])

### CIFAR10 LOADING

In [5]:
batch_size = 8 #this can be changed
data_path = '../datasets/' 
# transform functions: take in input a PIL image and apply this
# transformations
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
train_dataset = datasets.CIFAR10(root=data_path + 'CIFAR10/',
                                 train=True,
                                 download=True,
                                 transform=transform_train)
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)
test_dataset = datasets.CIFAR10(root=data_path + 'CIFAR10/',
                                train=False,
                                download=True,
                                transform=transform_test)
test_loader = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=batch_size,
                                          shuffle=True)
train_labels = torch.tensor(train_loader.dataset.targets).to(device)
targets = list(train_labels)

Files already downloaded and verified
Files already downloaded and verified


### TRAINING (UNCOMMENT IF NO CHECKPOINT AVAILABLE)

In [6]:
""" print('inizio training', flush=True)
print('Training iniziato') #MODIF
for epoch in range(60):  # loop over the dataset multiple times
    print("Inizia ora l'epoca "+str(epoch), flush=True)
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)


        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = VGGnet(inputs)
        outputs = outputs[1]
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        #if i % 2000 == 1999:    # print every 2000 mini-batches
        if i % 200 == 199:    # print every 200 mini-batches #MODIF
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000), flush=True)
            running_loss = 0.0




save_checkpoint_torch(60, VGGnet, '/u/s/szanin/Smithers/smithers/ml/tutorials/check_vgg_cifar10_60_stefano.pth.tar', optimizer)
 """

' print(\'inizio training\', flush=True)\nprint(\'Training iniziato\') #MODIF\nfor epoch in range(60):  # loop over the dataset multiple times\n    print("Inizia ora l\'epoca "+str(epoch), flush=True)\n    running_loss = 0.0\n    for i, data in enumerate(train_loader, 0):\n        # get the inputs; data is a list of [inputs, labels]\n        inputs, labels = data\n        inputs = inputs.to(device)\n        labels = labels.to(device)\n\n\n        # zero the parameter gradients\n        optimizer.zero_grad()\n\n        # forward + backward + optimize\n        outputs = VGGnet(inputs)\n        outputs = outputs[1]\n        loss = criterion(outputs, labels)\n        loss.backward()\n        optimizer.step()\n\n        # print statistics\n        running_loss += loss.item()\n        #if i % 2000 == 1999:    # print every 2000 mini-batches\n        if i % 200 == 199:    # print every 200 mini-batches #MODIF\n            print(\'[%d, %5d] loss: %.3f\' %\n                  (epoch + 1, i + 1, 

### LOADING A CHECKPOINT

In [7]:
pretrained = '/u/s/szanin/Smithers/smithers/ml/tutorials/check_vgg_cifar10_60_v2.pth.tar' #Stefano's
model = VGGnet
load_checkpoint(model, pretrained)
seq_model = get_seq_model(model)
model = model.to(device) #MODIF
seq_model = seq_model.to(device) #MODIF

### ACCURACY OF THE MODEL

In [8]:
total = 0
correct = 0
count = 0
VGGnet.eval()
for test, y_test in iter(test_loader):
    with torch.no_grad():
        output = seq_model(test.to(device))
        ps = torch.exp(output)
        _, predicted = torch.max(output.data,1)
        total += y_test.size(0)
        correct += (predicted == y_test.to(device)).sum().item() #MODIF
        count += 1
print('Accuracy of network on test images is {:.4f}'.format(100*correct/total), flush=True)


Accuracy of network on test images is 88.8200


### REDUCTION OF THE NETWORK

In [9]:
total = 0
correct = 0
count = 0
seq_model.eval()
for test, y_test in iter(test_loader):
#Calculate the class probabilities (softmax) for img
    with torch.no_grad():
        output = seq_model(test.to(device)) #MODIF
        ps = torch.exp(output)
        _, predicted = torch.max(output.data,1)
        total += y_test.size(0)
        correct += (predicted == y_test.to(device)).sum().item() #MODIF
        count += 1
        #print("Accuracy of network on test images is {:.4f}....count: {}".format(100*correct/total,  count ))
        if count%300 == 0:
            print("Accuracy of network on test images is {:.4f}....count: {}".format(100*correct/total,  count), flush=True)


from smithers.ml.netadapter import NetAdapter

cutoff_idx = 7
red_dim = 50 
#red_method = 'POD' 
red_method = 'POD'
inout_method = 'FNN'
n_class = 10 #MODIF

netadapter = NetAdapter(cutoff_idx, red_dim, red_method, inout_method)
red_model = netadapter.reduce_net(seq_model, train_dataset, train_labels, train_loader, n_class).to(device) #MODIF
print(red_model, flush=True)

Accuracy of network on test images is 89.4167....count: 300
Accuracy of network on test images is 89.0833....count: 600
Accuracy of network on test images is 89.1389....count: 900
Accuracy of network on test images is 88.9271....count: 1200
Initializing reduction. Chosen reduction method is: POD
Siamo alla batch batch 0
Siamo alla batch batch 1000
Siamo alla batch batch 2000
Siamo alla batch batch 3000
Siamo alla batch batch 4000
Siamo alla batch batch 5000
Siamo alla batch batch 6000
Siamo alla batch batch 0
Siamo alla batch batch 1000
Siamo alla batch batch 2000
Siamo alla batch batch 3000
Siamo alla batch batch 4000
Siamo alla batch batch 5000
Siamo alla batch batch 6000
Le dimensioni delle due matrici sono: proj_mat = torch.Size([4096, 50]) e matrix (input di projection) = torch.Size([50000, 4096])
si dovranno moltiplicare alcune righe di input_matrix per proj_matrix
proj_mat è salvata su 0 (-1 = cpu, 0 = gpu)
matrix è salvata su 0 (-1 = cpu, 0 = gpu)
Comincia ora il training della

### RED MODEL STORAGE SUMMARY

In [10]:
print("RED_MODEL's state_dict:")
storage = 0
for param_tensor in red_model.state_dict():
    print(param_tensor, "\t", red_model.state_dict()[param_tensor].size())
    storage += torch.prod(torch.tensor(list(red_model.state_dict()[param_tensor].size())))
print(f"The used MB are: {4 * storage / 10 ** 6}")

RED_MODEL's state_dict:
premodel.0.weight 	 torch.Size([64, 3, 3, 3])
premodel.0.bias 	 torch.Size([64])
premodel.2.weight 	 torch.Size([64, 64, 3, 3])
premodel.2.bias 	 torch.Size([64])
premodel.5.weight 	 torch.Size([128, 64, 3, 3])
premodel.5.bias 	 torch.Size([128])
premodel.7.weight 	 torch.Size([128, 128, 3, 3])
premodel.7.bias 	 torch.Size([128])
premodel.10.weight 	 torch.Size([256, 128, 3, 3])
premodel.10.bias 	 torch.Size([256])
premodel.12.weight 	 torch.Size([256, 256, 3, 3])
premodel.12.bias 	 torch.Size([256])
premodel.14.weight 	 torch.Size([256, 256, 3, 3])
premodel.14.bias 	 torch.Size([256])
proj_model.weight 	 torch.Size([50, 4096])
inout_map.model.0.weight 	 torch.Size([20, 50])
inout_map.model.0.bias 	 torch.Size([20])
inout_map.model.2.weight 	 torch.Size([10, 20])
inout_map.model.2.bias 	 torch.Size([10])
The used MB are: 7.766071796417236


### FULL MODEL STORAGE SUMMARY

In [11]:
print("FULL MODEL's state_dict:")
storage = 0
for param_tensor in VGGnet.state_dict():
    print(param_tensor, "\t", VGGnet.state_dict()[param_tensor].size())
    storage += torch.prod(torch.tensor(list(VGGnet.state_dict()[param_tensor].size())))
print(f"The used bytes are: {4 * storage / 10 ** 6}")

FULL MODEL's state_dict:
features.0.weight 	 torch.Size([64, 3, 3, 3])
features.0.bias 	 torch.Size([64])
features.2.weight 	 torch.Size([64, 64, 3, 3])
features.2.bias 	 torch.Size([64])
features.5.weight 	 torch.Size([128, 64, 3, 3])
features.5.bias 	 torch.Size([128])
features.7.weight 	 torch.Size([128, 128, 3, 3])
features.7.bias 	 torch.Size([128])
features.10.weight 	 torch.Size([256, 128, 3, 3])
features.10.bias 	 torch.Size([256])
features.12.weight 	 torch.Size([256, 256, 3, 3])
features.12.bias 	 torch.Size([256])
features.14.weight 	 torch.Size([256, 256, 3, 3])
features.14.bias 	 torch.Size([256])
features.17.weight 	 torch.Size([512, 256, 3, 3])
features.17.bias 	 torch.Size([512])
features.19.weight 	 torch.Size([512, 512, 3, 3])
features.19.bias 	 torch.Size([512])
features.21.weight 	 torch.Size([512, 512, 3, 3])
features.21.bias 	 torch.Size([512])
features.24.weight 	 torch.Size([512, 512, 3, 3])
features.24.bias 	 torch.Size([512])
features.26.weight 	 torch.Size([5

In [12]:
for param_tensor in red_model.state_dict():
    param_tensor_numpy = red_model.state_dict()[param_tensor].detach().to('cpu').numpy()
    np.save('numpy_tensor_'+str(param_tensor), param_tensor_numpy)

### STORAGE AND FLOPS COUNTS

In [17]:
from smithers.ml.utils import Total_param, Total_flops
from smithers.ml.utils import compute_loss, train_kd


rednet_storage = torch.zeros(3)
rednet_flops = torch.zeros(3)

rednet_storage[0], rednet_storage[1], rednet_storage[2] = [
    Total_param(red_model.premodel),
    Total_param(red_model.proj_model),
    Total_param(red_model.inout_map)]

rednet_flops[0], rednet_flops[1], rednet_flops[2] = [
    Total_flops(red_model.premodel, device),
    Total_flops(red_model.proj_model, device),
    Total_flops(red_model.inout_map, device)]
print(rednet_storage, rednet_flops)

tensor([6.6204e+00, 7.8125e-01, 4.6921e-03]) tensor([1.9051e+02, 2.0480e-01, 1.2000e-03])


### TRAINING OF THE REDUCED NETWORK

In [18]:
total = 0
correct = 0
count = 0
for test, y_test in iter(test_loader):
#Calculate the class probabilities (softmax) for img
    with torch.no_grad():
        output = red_model(test)
        ps = torch.exp(output)
        _, predicted = torch.max(output.data,1)
        total += y_test.size(0)
        correct += (predicted == y_test.to(device)).sum().item() #MODIF
        count += 1
        #print("Accuracy of network on test images is {:.4f}....count: {}".format(100*correct/total,  count ))
        if count%50 == 0:
            print("Accuracy of network on test images is {:.4f}....count: {}".format(100*correct/total,  count), flush=True)


print(
      'Pre nnz = {:.2f}, proj_model nnz={:.2f}, FNN nnz={:.4f}'.format(
      rednet_storage[0], rednet_storage[1],
      rednet_storage[2]), flush=True)
print(
      'flops:  Pre = {:.2f}, proj_model = {:.2f}, FNN ={:.2f}'.format(
       rednet_flops[0], rednet_flops[1], rednet_flops[2]), flush=True)

optimizer = torch.optim.Adam([{
            'params': red_model.premodel.parameters(),
            'lr': 1e-4
            }, {
            'params': red_model.proj_model.parameters(),
            'lr': 1e-5
            }, {
            'params': red_model.inout_map.parameters(),
            'lr': 1e-5
            }])

train_loss = []
test_loss = []
train_loss.append(compute_loss(red_model, device, train_loader))
test_loss.append(compute_loss(red_model, device, test_loader))

        
epochs = 10
filename = './cifar10_VGG16_RedNet'+red_method+\
            '_cutIDx_%d.pth'%(cutoff_idx)

""" if os.path.isfile(filename):
    [rednet_pretrained, train_loss,test_loss] = torch.load(filename)
    red_model.load_state_dict(rednet_pretrained)
    print('rednet trained {} epoches is loaded'.format(epochs), flush=True)
else:
    train_loss = []
    test_loss = []
    train_loss.append(compute_loss(red_model, device, train_loader))
    test_loss.append(compute_loss(red_model, device, test_loader))
    for epoch in range(1, epochs + 1):
        print('EPOCH {}'.format(epoch), flush=True)
        train_loss.append(
                train_kd(red_model,
                model,
                device,
                train_loader,
                optimizer,
                train_max_batch=200,
                alpha=0.1,
                temperature=1.,
                epoch=epoch))
        test_loss.append(compute_loss(red_model, device, test_loader))
    torch.save([red_model.state_dict(), train_loss, test_loss], filename) """

for epoch in range(1, epochs + 1):                       #da qui alla fine era dentro l'else commentato
    print('EPOCH {}'.format(epoch), flush=True)
    train_loss.append(
            train_kd(red_model,
            model,
            device,
            train_loader,
            optimizer,
            train_max_batch=200,
            alpha=0.1,
            temperature=1.,
            epoch=epoch))
    test_loss.append(compute_loss(red_model, device, test_loader))
torch.save([red_model.state_dict(), train_loss, test_loss], filename)


Accuracy of network on test images is 7.5000....count: 50
Accuracy of network on test images is 8.6250....count: 100
Accuracy of network on test images is 8.8333....count: 150
Accuracy of network on test images is 9.3750....count: 200
Accuracy of network on test images is 9.1500....count: 250
Accuracy of network on test images is 9.1250....count: 300
Accuracy of network on test images is 8.9286....count: 350
Accuracy of network on test images is 8.9375....count: 400
Accuracy of network on test images is 9.0000....count: 450
Accuracy of network on test images is 9.0750....count: 500
Accuracy of network on test images is 9.0455....count: 550
Accuracy of network on test images is 8.9583....count: 600
Accuracy of network on test images is 8.9423....count: 650
Accuracy of network on test images is 9.0000....count: 700
Accuracy of network on test images is 8.8500....count: 750
Accuracy of network on test images is 8.7812....count: 800
Accuracy of network on test images is 8.8382....count: 85