# VGG16 reduction tutorial
### IMPORTS

In [1]:
import torch
import numpy as np
import torchvision
from torch import nn
import sys
import os

import torchvision.transforms as transforms
import torchvision.datasets as datasets
import pandas as pd
import torch.optim as optim

from smithers.ml.vgg import VGG
from smithers.ml.utils import get_seq_model

import matplotlib.pyplot as plt

from smithers.ml.utils import randomized_range_finder
from smithers.ml.utils import randomized_svd



import warnings
warnings.filterwarnings("ignore")


torch.cuda.empty_cache()
import datetime
import time

### SETTING PROPER DEVICE

In [2]:
sys.path.insert(0, '../')
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

### DEFINIZIONE VGG

In [3]:
VGGnet = VGG(    cfg=None,
                 classifier='cifar',
                 batch_norm=False,
                 num_classes=10,
                 init_weights=False,
                 pretrain_weights=None)
VGGnet = VGGnet.to(device) #MODIF
VGGnet.make_layers()
VGGnet._initialize_weights()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(VGGnet.parameters(), lr=0.001, momentum=0.9)


Loaded base model.



### FUNZIONI PER IMPORT E EXPORT DEI MODELLI

In [4]:
def save_checkpoint_torch(epoch, model, path, optimizer):
    torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict()
            }, path)


def load_checkpoint(model, checkpoint_path):
    model.load_state_dict(torch.load(checkpoint_path)['model_state_dict'])

### LOADING DEL DATASET CIFAR10

In [5]:
batch_size = 8 #this can be changed
data_path = '../datasets/' 
# transform functions: take in input a PIL image and apply this
# transformations
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
train_dataset = datasets.CIFAR10(root=data_path + 'CIFAR10/',
                                 train=True,
                                 download=True,
                                 transform=transform_train)
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)
test_dataset = datasets.CIFAR10(root=data_path + 'CIFAR10/',
                                train=False,
                                download=True,
                                transform=transform_test)
test_loader = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=batch_size,
                                          shuffle=True)
train_labels = torch.tensor(train_loader.dataset.targets).to(device)
targets = list(train_labels)

Files already downloaded and verified
Files already downloaded and verified


### TRAINING (UNCOMMENT IF NO CHECKPOINT AVAILABLE)

In [6]:
""" print('inizio training', flush=True)
print('Training iniziato') #MODIF
for epoch in range(60):  # loop over the dataset multiple times
    print("Inizia ora l'epoca "+str(epoch), flush=True)
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)


        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = VGGnet(inputs)
        outputs = outputs[1]
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        #if i % 2000 == 1999:    # print every 2000 mini-batches
        if i % 200 == 199:    # print every 200 mini-batches #MODIF
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000), flush=True)
            running_loss = 0.0




save_checkpoint_torch(60, VGGnet, '/u/s/szanin/Smithers/smithers/ml/tutorials/check_vgg_cifar10_60_stefano.pth.tar', optimizer)
 """

' print(\'inizio training\', flush=True)\nprint(\'Training iniziato\') #MODIF\nfor epoch in range(60):  # loop over the dataset multiple times\n    print("Inizia ora l\'epoca "+str(epoch), flush=True)\n    running_loss = 0.0\n    for i, data in enumerate(train_loader, 0):\n        # get the inputs; data is a list of [inputs, labels]\n        inputs, labels = data\n        inputs = inputs.to(device)\n        labels = labels.to(device)\n\n\n        # zero the parameter gradients\n        optimizer.zero_grad()\n\n        # forward + backward + optimize\n        outputs = VGGnet(inputs)\n        outputs = outputs[1]\n        loss = criterion(outputs, labels)\n        loss.backward()\n        optimizer.step()\n\n        # print statistics\n        running_loss += loss.item()\n        #if i % 2000 == 1999:    # print every 2000 mini-batches\n        if i % 200 == 199:    # print every 200 mini-batches #MODIF\n            print(\'[%d, %5d] loss: %.3f\' %\n                  (epoch + 1, i + 1, 

### LOADING A CHECKPOINT

In [7]:
pretrained = '/u/s/szanin/Smithers/smithers/ml/tutorials/check_vgg_cifar10_60_v2.pth.tar' #Stefano's
model = VGGnet
load_checkpoint(model, pretrained)
seq_model = get_seq_model(model)
model = model.to(device) #MODIF
seq_model = seq_model.to(device) #MODIF

### ACCURACY OF THE MODEL

In [8]:
total = 0
correct = 0
count = 0
VGGnet.eval()
for test, y_test in iter(test_loader):
    with torch.no_grad():
        output = seq_model(test.to(device))
        ps = torch.exp(output)
        _, predicted = torch.max(output.data,1)
        total += y_test.size(0)
        correct += (predicted == y_test.to(device)).sum().item() #MODIF
        count += 1
print('Accuracy of network on test images is {:.4f}'.format(100*correct/total), flush=True)


Accuracy of network on test images is 88.8200


### REDUCTION OF VGG

In [8]:
total = 0
correct = 0
count = 0
seq_model.eval()
for test, y_test in iter(test_loader):
#Calculate the class probabilities (softmax) for img
    with torch.no_grad():
        output = seq_model(test.to(device)) #MODIF
        ps = torch.exp(output)
        _, predicted = torch.max(output.data,1)
        total += y_test.size(0)
        correct += (predicted == y_test.to(device)).sum().item() #MODIF
        count += 1
        #print("Accuracy of network on test images is {:.4f}....count: {}".format(100*correct/total,  count ))
        if count%300 == 0:
            print("Accuracy of network on test images is {:.4f}....count: {}".format(100*correct/total,  count), flush=True)


from smithers.ml.netadapter import NetAdapter

cutoff_idx = 7
red_dim = 50 
#red_method = 'POD' 
red_method = 'RandSVD'
inout_method = 'FNN'
n_class = 10 #MODIF

netadapter = NetAdapter(cutoff_idx, red_dim, red_method, inout_method)
print(netadapter.red_method, netadapter.red_dim, flush=True)
red_model = netadapter.reduce_net(seq_model, train_dataset, train_labels, train_loader, n_class).to(device) #MODIF
print(red_model, flush=True)

Accuracy of network on test images is 88.2917....count: 300
Accuracy of network on test images is 88.4167....count: 600
Accuracy of network on test images is 88.5556....count: 900
Accuracy of network on test images is 88.7396....count: 1200
RandSVD 50
Initializing reduction. Chosen reduction method is: RandSVD


TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

In [9]:
#####CELLA AGGIUNTA, SI POTRÀ CANCELLARE
total = 0
correct = 0
count = 0
seq_model.eval()
for test, y_test in iter(test_loader):
#Calculate the class probabilities (softmax) for img
    with torch.no_grad():
        output = seq_model(test.to(device)) #MODIF
        ps = torch.exp(output)
        _, predicted = torch.max(output.data,1)
        total += y_test.size(0)
        correct += (predicted == y_test.to(device)).sum().item() #MODIF
        count += 1
        #print("Accuracy of network on test images is {:.4f}....count: {}".format(100*correct/total,  count ))
        if count%300 == 0:
            print("Accuracy of network on test images is {:.4f}....count: {}".format(100*correct/total,  count), flush=True)


from smithers.ml.netadapter import NetAdapter

cutoff_idx = 7
red_dim = 50 
#red_method = 'POD' 
red_method = 'RandSVD'
inout_method = 'FNN'
n_class = 10 #MODIF

netadapter = NetAdapter(cutoff_idx, red_dim, red_method, inout_method)
print(netadapter.red_method, netadapter.red_dim, flush=True)

#red_model = netadapter.reduce_net(seq_model, train_dataset, train_labels, train_loader, n_class).to(device) #MODIF
from smithers.ml.utils import PossibleCutIdx, spatial_gradients, forward_dataset, projection
from smithers.ml.rednet import RedNet


print('Initializing reduction. Chosen reduction method is: '+netadapter.red_method, flush=True)
input_type = train_dataset.__getitem__(0)[0].dtype
possible_cut_idx = PossibleCutIdx(seq_model)
cut_idxlayer = possible_cut_idx[netadapter.cutoff_idx]
pre_model = seq_model[:cut_idxlayer].to(device, dtype=input_type)
post_model = seq_model[cut_idxlayer:].to(device, dtype=input_type)
out_model = forward_dataset(seq_model, train_loader)
#matrix_red, proj_mat = netadapter._reduce(pre_model, post_model, train_dataset, train_loader) #### Riga incriminata
matrix_features = forward_dataset(pre_model, train_loader)
proj_mat = netadapter._reduce_RandSVD(matrix_features)
matrix_red = projection(proj_mat, train_loader, matrix_features)

inout_map = netadapter._inout_mapping(matrix_red, n_class, out_model,train_labels, train_loader)
reduced_net = RedNet(n_class, pre_model, proj_mat, inout_map)


print(red_model, flush=True)

Accuracy of network on test images is 87.4167....count: 300
Accuracy of network on test images is 88.6042....count: 600
Accuracy of network on test images is 88.6111....count: 900
Accuracy of network on test images is 88.8021....count: 1200
RandSVD 50
Initializing reduction. Chosen reduction method is: RandSVD


RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument mat2 in method wrapper_mm)

In [11]:
A = torch.randn(4,4)
A

tensor([[ 0.0117,  0.9208,  0.3108,  1.4690],
        [ 0.6546,  0.0175, -0.9479,  2.2479],
        [-0.0893, -2.1956,  1.2245, -0.6197],
        [ 1.3845,  0.1049,  0.7796,  0.3271]])

In [27]:
np.random.normal(size=(A.shape[1],2))

array([[-0.4175633 , -0.84560187],
       [ 1.3246683 , -0.15937175],
       [-0.88493098, -1.21231413],
       [-0.26287561, -0.43962654]])

In [21]:
torch.linalg.lu(A)

torch.return_types.linalg_lu(
P=tensor([[0., 0., 0., 1.],
        [0., 0., 1., 0.],
        [0., 1., 0., 0.],
        [1., 0., 0., 0.]]),
L=tensor([[ 1.0000,  0.0000,  0.0000,  0.0000],
        [-0.0645,  1.0000,  0.0000,  0.0000],
        [ 0.4728,  0.0147,  1.0000,  0.0000],
        [ 0.0084, -0.4203, -0.6291,  1.0000]]),
U=tensor([[ 1.3845,  0.1049,  0.7796,  0.3271],
        [ 0.0000, -2.1888,  1.2748, -0.5986],
        [ 0.0000,  0.0000, -1.3351,  2.1021],
        [ 0.0000,  0.0000,  0.0000,  2.5371]]))

In [23]:
print('hello world')

hello world


In [14]:
from smithers.ml.utils import Total_param, Total_flops
from smithers.ml.utils import compute_loss, train_kd


rednet_storage = torch.zeros(3)
rednet_flops = torch.zeros(3)

rednet_storage[0], rednet_storage[1], rednet_storage[2] = [
    Total_param(red_model.premodel),
    Total_param(red_model.proj_model),
    Total_param(red_model.inout_map)]

rednet_flops[0], rednet_flops[1], rednet_flops[2] = [
    Total_flops(red_model.premodel, device),
    Total_flops(red_model.proj_model, device),
    Total_flops(red_model.inout_map, device)]

total = 0
correct = 0
count = 0
for test, y_test in iter(test_loader):
#Calculate the class probabilities (softmax) for img
    with torch.no_grad():
        output = red_model(test)
        ps = torch.exp(output)
        _, predicted = torch.max(output.data,1)
        total += y_test.size(0)
        correct += (predicted == y_test.to(device)).sum().item() #MODIF
        count += 1
        #print("Accuracy of network on test images is {:.4f}....count: {}".format(100*correct/total,  count ))
        if count%50 == 0:
            print("Accuracy of network on test images is {:.4f}....count: {}".format(100*correct/total,  count), flush=True)


print(
      'Pre nnz = {:.2f}, proj_model nnz={:.2f}, FNN nnz={:.4f}'.format(
      rednet_storage[0], rednet_storage[1],
      rednet_storage[2]), flush=True)
print(
      'flops:  Pre = {:.2f}, proj_model = {:.2f}, FNN ={:.2f}'.format(
       rednet_flops[0], rednet_flops[1], rednet_flops[2]), flush=True)

optimizer = torch.optim.Adam([{
            'params': red_model.premodel.parameters(),
            'lr': 1e-4
            }, {
            'params': red_model.proj_model.parameters(),
            'lr': 1e-5
            }, {
            'params': red_model.inout_map.parameters(),
            'lr': 1e-5
            }])

train_loss = []
test_loss = []
train_loss.append(compute_loss(red_model, device, train_loader))
test_loss.append(compute_loss(red_model, device, test_loader))

        
epochs = 10
filename = './cifar10_VGG16_RedNet'+\
            '_cutIDx_%d.pth'%(cutoff_idx)

""" if os.path.isfile(filename):
    [rednet_pretrained, train_loss,test_loss] = torch.load(filename)
    red_model.load_state_dict(rednet_pretrained)
    print('rednet trained {} epoches is loaded'.format(epochs), flush=True)
else:
    train_loss = []
    test_loss = []
    train_loss.append(compute_loss(red_model, device, train_loader))
    test_loss.append(compute_loss(red_model, device, test_loader))
    for epoch in range(1, epochs + 1):
        print('EPOCH {}'.format(epoch), flush=True)
        train_loss.append(
                train_kd(red_model,
                model,
                device,
                train_loader,
                optimizer,
                train_max_batch=200,
                alpha=0.1,
                temperature=1.,
                epoch=epoch))
        test_loss.append(compute_loss(red_model, device, test_loader))
    torch.save([red_model.state_dict(), train_loss, test_loss], filename) """

for epoch in range(1, epochs + 1):                       #da qui alla fine era dentro l'else commentato
    print('EPOCH {}'.format(epoch), flush=True)
    train_loss.append(
            train_kd(red_model,
            model,
            device,
            train_loader,
            optimizer,
            train_max_batch=200,
            alpha=0.1,
            temperature=1.,
            epoch=epoch))
    test_loss.append(compute_loss(red_model, device, test_loader))
torch.save([red_model.state_dict(), train_loss, test_loss], filename)


Accuracy of network on test images is 11.5000....count: 50
Accuracy of network on test images is 11.6250....count: 100
Accuracy of network on test images is 12.2500....count: 150
Accuracy of network on test images is 12.6250....count: 200
Accuracy of network on test images is 12.2500....count: 250
Accuracy of network on test images is 12.2917....count: 300
Accuracy of network on test images is 12.0000....count: 350
Accuracy of network on test images is 12.1250....count: 400
Accuracy of network on test images is 11.7778....count: 450
Accuracy of network on test images is 11.8250....count: 500
Accuracy of network on test images is 11.8182....count: 550
Accuracy of network on test images is 11.8750....count: 600
Accuracy of network on test images is 12.0962....count: 650
Accuracy of network on test images is 12.3750....count: 700
Accuracy of network on test images is 12.2667....count: 750
Accuracy of network on test images is 12.2344....count: 800
Accuracy of network on test images is 12.

In [17]:
""" from smithers.ml.utils import Total_param, Total_flops
from smithers.ml.utils import compute_loss, train_kd

rednet_storage = torch.zeros(3)
rednet_flops = torch.zeros(3) """

rednet_storage[0], rednet_storage[1], rednet_storage[2] = [
    Total_param(red_model.premodel),
    Total_param(red_model.proj_model),
    Total_param(red_model.inout_map)]
rednet_storage

tensor([6.6204e+00, 7.8125e-01, 4.6921e-03])

In [23]:
""" from smithers.ml.utils import Total_param, Total_flops
from smithers.ml.utils import compute_loss, train_kd"""

rednet_storage_VGG = torch.zeros(1)

rednet_storage_VGG[0] = Total_param(VGGnet)
rednet_storage_VGG

tensor([56.1516])

In [27]:
inputs_trial, classes_trial = next(iter(test_loader))

In [28]:
outputs = [torch.argmax(red_model(inputs_trial)[i]) for i in range(8)]
outputs

[tensor(4, device='cuda:0'),
 tensor(7, device='cuda:0'),
 tensor(6, device='cuda:0'),
 tensor(7, device='cuda:0'),
 tensor(5, device='cuda:0'),
 tensor(8, device='cuda:0'),
 tensor(8, device='cuda:0'),
 tensor(1, device='cuda:0')]

In [29]:
classes_trial

tensor([4, 7, 6, 7, 4, 8, 8, 1])