In [1]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
import torch.nn as nn
import torch
import torchvision
from torchvision import models, transforms
from torch.optim.lr_scheduler import ReduceLROnPlateau, StepLR

import numpy as np
import random

%matplotlib inline
import matplotlib.pyplot as plt

from copy import deepcopy

basepath = '/content/drive/My Drive/CS618_Project/MNIST_LeNet4'

In [0]:
class EarlyStopping:
    def __init__(self, patience=7, filename='checkpoint_0', verbose=False):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.filename = filename

    def __call__(self, val_loss, model):
        score = -val_loss
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        if self.verbose: print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(),basepath+'/CheckPoints/'+self.filename+'.pt')
        self.val_loss_min = val_loss

In [0]:
def get_data_loaders():
  transform = transforms.Compose([transforms.Resize((28, 28)),transforms.ToTensor(),])
  mnist_train = torchvision.datasets.MNIST(basepath,transform=transform,train=True,download=True)
  mnist_test = torchvision.datasets.MNIST(basepath,transform=transform,train=False,download=True)
  data_loader_train = torch.utils.data.DataLoader(mnist_train, batch_size=1024, shuffle=True, num_workers=8)
  data_loader_test = torch.utils.data.DataLoader(mnist_test, batch_size=4096, shuffle=True, num_workers=8)
  return(data_loader_train,data_loader_test)

data_loader_train,data_loader_test = get_data_loaders()

In [0]:
def defineMasks():
  mask_conv1 = torch.ones(20,1,5,5)
  mask_conv2 = torch.ones(50,20,5,5)
  mask_linear1 = torch.ones(500,800)
  mask_linear2 = torch.ones(10,500)
  return mask_conv1, mask_conv2, mask_linear1, mask_linear2

In [0]:

class LeNet4(nn.Module):
    def __init__(self):
        super(LeNet4, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 20, kernel_size=(5, 5)),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2), stride=2),
            nn.Conv2d(20, 50, kernel_size=(5, 5)),
            nn.MaxPool2d(kernel_size=(2, 2), stride=2),
            nn.ReLU()
        )
        self.classifier = nn.Sequential(
            nn.Linear(800, 500),
            nn.ReLU(),
            nn.Linear(500, 10),
            nn.LogSoftmax(dim=-1)
        )
    def forward(self, img):
        output = self.features(img)
        output = output.view(img.size(0), -1)
        output = self.classifier(output)
        return output


In [0]:
num_classes = 10
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

mask_conv1, mask_conv2, mask_linear1, mask_linear2 = defineMasks()

def get_unpruned_model(num_classes):
  model = LeNet4()
  return(model)

model = get_unpruned_model(num_classes)

In [0]:
def plot_losses(train_loss,test_loss,filename,i):
  x_axis = list(range(1,len(train_loss)+1))
  fig = plt.figure(i,figsize=(20,10))
  plt.axes(yscale='log')
  plt.plot(x_axis, train_loss, color='blue', label='Train Loss Loss')
  plt.plot(x_axis, test_loss, color='green', label='Validation Loss')
  plt.legend(loc=1)
  plt.xlabel('Iterations over entire dataset')
  plt.ylabel('Loss')
  plt.ylim(0.01,3)
  plt.savefig(basepath+'/Losses/'+filename+'.png')
  plt.close(fig)

def plot_accuracies(train_accuracy,test_accuracy,filename,i):
  x_axis = list(range(1,len(train_accuracy)+1))
  fig = plt.figure(i,figsize=(20,10))
  plt.yscale('log')
  plt.plot(x_axis, train_accuracy, color='blue', label='Train Accuracy')
  plt.plot(x_axis, test_accuracy, color='green', label='Validation Accuracy')
  plt.legend(loc=1)
  plt.xlabel('Iterations over entire dataset')
  plt.ylabel('Accuracy')
  plt.ylim(0.8,1)
  plt.savefig(basepath+'/Accuracies/'+filename+'.png')
  plt.close(fig)

In [0]:
def train(model,prune_itr,lr_multiplier=1):
  
  scale_factor = 16

  learning_rate = 1e-3*lr_multiplier; wt_dcy =1e-7; lr_patience = int(500/scale_factor); lr_stepsize = int(1500/scale_factor); es_patience = int(4000/scale_factor)
  criterion = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=wt_dcy)
  scheduler_plateau = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=lr_patience, verbose=True, threshold=1e-7)
  scheduler_stepLR = StepLR(optimizer, step_size=lr_stepsize, gamma=0.8)
  early_stopping = EarlyStopping(patience=es_patience,filename='checkpoint_'+str(prune_itr), verbose=True)

  torch.cuda.empty_cache()
  model = model.to(device)
  num_epochs = 25
  es_flag = 0

  train_loss = []; test_loss = []; train_accuracy = []; test_accuracy = []
  
  best_loss = 100000; best_accuracy = 2

  for epoch in range(num_epochs):
    
    correct = 0; data_size = 0; t_loss = 0
    
    for i, (images, labels) in enumerate(data_loader_train):

      model.train()
      images = images.to(device); labels = labels.to(device)
      data_size += len(images)
      outputs = torch.nn.functional.log_softmax(model(images), dim=1)
      trainloss = criterion(outputs,labels)
      optimizer.zero_grad()
      trainloss.backward()
      optimizer.step()
      _, predicted = torch.max(outputs.data, 1)
      correct += (predicted == labels).sum().item()
      t_loss += trainloss.item()
    
      correct_t = 0; data_size_t = 0; tt_loss = 0
      model.eval()
      with torch.no_grad():
        for images, labels in data_loader_test:
          images = images.to(device); labels = labels.to(device)
          data_size_t += len(images)
          outputs = torch.nn.functional.log_softmax(model(images), dim=1)
          _, predicted = torch.max(outputs.data, 1)
          correct_t += (predicted == labels).sum().item()
          tt_loss += criterion(outputs, labels).item()
      print('Epoch: {}, Batch: {}, Train Loss: {:.3f} Train Accuracy: {:.3f}%, Validation Loss: {:.3f} Validation Accuracy: {:.3f}%'.
            format(epoch+1, i+1, trainloss.item(), 100*(predicted==labels).sum().item()/len(images), tt_loss/len(data_loader_test), 100*correct_t/data_size_t))
      
      train_loss.append(trainloss.item()); test_loss.append(tt_loss/len(data_loader_test))
      train_accuracy.append((predicted==labels).sum().item()/len(images)); test_accuracy.append(correct_t/data_size_t)
      
      if (tt_loss/len(data_loader_test)) < best_loss:
        best_loss = tt_loss/len(data_loader_test)
        best_accuracy = correct_t/data_size_t
        

      scheduler_stepLR.step()
      scheduler_plateau.step(tt_loss/len(data_loader_test))

      early_stopping(tt_loss/len(data_loader_test), model)
      if early_stopping.early_stop:
        print("Early stopping")
        es_flag = 1
        break
    if es_flag == 1: break
  
  model.load_state_dict(torch.load(basepath+'/CheckPoints/'+'checkpoint_'+str(prune_itr)+'.pt'))

  return(model,train_loss,test_loss,train_accuracy,test_accuracy,best_loss,best_accuracy)

In [10]:
lr_multiplier = 1
dict_best_info = {}
#model.load_state_dict(torch.load(basepath+'/CheckPoints/'+'checkpoint_0.pt'))
model,train_loss,test_loss,train_accuracy,test_accuracy,best_loss,best_accuracy = train(model,0,lr_multiplier)
dict_best_info[0] = (best_loss,best_accuracy)
plot_losses(train_loss,test_loss,'l0',1)
plot_accuracies(train_accuracy,test_accuracy,'a0',2)

model.load_state_dict(torch.load(basepath+'/CheckPoints/'+'checkpoint_0.pt'))
model = model.to(device)
print(model)

print('Best Accuracy is ' + str(round(best_accuracy*100,2)))
print('Best Loss is ' + str(round(best_loss,3)))

Epoch: 1, Batch: 1, Train Loss: 2.307 Train Accuracy: 16.206%, Validation Loss: 2.264 Validation Accuracy: 16.560%
Validation loss decreased (inf --> 2.264455).  Saving model ...
Epoch: 1, Batch: 2, Train Loss: 2.264 Train Accuracy: 32.246%, Validation Loss: 2.216 Validation Accuracy: 31.220%
Validation loss decreased (2.264455 --> 2.216419).  Saving model ...
Epoch: 1, Batch: 3, Train Loss: 2.223 Train Accuracy: 67.146%, Validation Loss: 2.152 Validation Accuracy: 66.440%
Validation loss decreased (2.216419 --> 2.151540).  Saving model ...
Epoch: 1, Batch: 4, Train Loss: 2.155 Train Accuracy: 67.920%, Validation Loss: 2.071 Validation Accuracy: 67.930%
Validation loss decreased (2.151540 --> 2.070892).  Saving model ...
Epoch: 1, Batch: 5, Train Loss: 2.076 Train Accuracy: 68.142%, Validation Loss: 1.972 Validation Accuracy: 68.640%
Validation loss decreased (2.070892 --> 1.971541).  Saving model ...
Epoch: 1, Batch: 6, Train Loss: 1.980 Train Accuracy: 68.363%, Validation Loss: 1.857

In [11]:
def get_model_layerwise_analysis(model):

  for layer, (name, module) in enumerate(model.features._modules.items()):
    if name == '0':
      weights = torch.abs(module.weight).cpu()
      print(weights.max(),weights.mean(),weights.std())
    if name == '3':
      weights = torch.abs(module.weight).cpu()
      print(weights.max(),weights.mean(),weights.std())

  for layer, (name, module) in enumerate(model.classifier._modules.items()):
    if name == '0':
      weights = torch.abs(module.weight).cpu()
      print(weights.max(),weights.mean(),weights.std())
    if name == '2':
      weights = torch.abs(module.weight).cpu()
      print(weights.max(),weights.mean(),weights.std())

get_model_layerwise_analysis(model)

tensor(0.3248, grad_fn=<MaxBackward1>) tensor(0.1194, grad_fn=<MeanBackward0>) tensor(0.0738, grad_fn=<StdBackward0>)
tensor(0.2471, grad_fn=<MaxBackward1>) tensor(0.0369, grad_fn=<MeanBackward0>) tensor(0.0286, grad_fn=<StdBackward0>)
tensor(0.1543, grad_fn=<MaxBackward1>) tensor(0.0199, grad_fn=<MeanBackward0>) tensor(0.0166, grad_fn=<StdBackward0>)
tensor(0.1762, grad_fn=<MaxBackward1>) tensor(0.0315, grad_fn=<MeanBackward0>) tensor(0.0235, grad_fn=<StdBackward0>)


In [0]:
initial_parameters = sum([p.numel() for p in model.parameters()])

In [0]:
def count_nonzeros(model):
  nonzeros = 0
  for param in model.parameters():
    if param is not None: nonzeros += torch.sum((param != 0).int()).item()
  return(nonzeros)

In [0]:
def update_masks(model, mask_conv1, mask_conv2, mask_linear1, mask_linear2, k1, k2, k3, k4):
    
  for layer, (name, module) in enumerate(model.features._modules.items()):
    if name == '0':
      a_c1 = 0.23*torch.abs(module.weight.max()).data * k1
      t_c1 = 2.5*torch.abs(module.weight.std()).data
      b_c1 = a_c1 + t_c1
    if name == '3':
      a_c2 = 0.11*torch.abs(module.weight.max()).data * k2
      t_c2 = 4*torch.abs(module.weight.std()).data
      b_c2 = a_c2 + t_c2
  
  for layer, (name, module) in enumerate(model.classifier._modules.items()):
    if name == '0':
      a_l1 = 0.3*torch.abs(module.weight.max()).data * k3
      t_l1 = 5*torch.abs(module.weight.std()).data
      b_l1 = a_l1 + t_l1
    if name == '2':
      a_l2 = 0.23*torch.abs(module.weight.max()).data * k4
      t_l2 = 2.5*torch.abs(module.weight.std()).data
      b_l2 = a_l2 + t_l2
  
  for layer, (name, module) in enumerate(model.features._modules.items()):
    if name == '0':
      weights = torch.abs(module.weight).cpu()
      temp_mask1 = (weights > b_c1).float() * 1; temp_mask2 = (weights > a_c1).float() * 1
      temp_mask = (temp_mask2 * mask_conv1) + temp_mask1
      mask_conv1 = (temp_mask >= 1.).float() * 1
    if name == '3':
      weights = torch.abs(module.weight).cpu()
      temp_mask1 = (weights > b_c2).float() * 1; temp_mask2 = (weights > a_c2).float() * 1
      temp_mask = (temp_mask2 * mask_conv2) + temp_mask1
      mask_conv2 = (temp_mask >= 1.).float() * 1

    
  for layer, (name, module) in enumerate(model.classifier._modules.items()):
    if name == '0':
      weights = torch.abs(module.weight).cpu()
      temp_mask1 = (weights > b_l1).float() * 1; temp_mask2 = (weights > a_l1).float() * 1
      temp_mask = (temp_mask2 * mask_linear1) + temp_mask1
      mask_linear1 = (temp_mask >= 1.).float() * 1
    if name == '2':
      weights = torch.abs(module.weight).cpu()
      temp_mask1 = (weights > b_l2).float() * 1; temp_mask2 = (weights > a_l2).float() * 1
      temp_mask = (temp_mask2 * mask_linear2) + temp_mask1
      mask_linear2 = (temp_mask >= 1.).float() * 1

  return mask_conv1, mask_conv2, mask_linear1, mask_linear2

In [0]:
def model_surgery(model, mask_conv1, mask_conv2, mask_linear1, mask_linear2):

  for layer, (name, module) in enumerate(model.features._modules.items()):
    if name == '0':
      weights = (module.weight.data) * mask_conv1.to(device)
      model.features._modules[name].weight.data = weights.to(device)
    if name == '3':
      weights = (module.weight.data) * mask_conv2.to(device)
      model.features._modules[name].weight.data = weights.to(device)
  
  for layer, (name, module) in enumerate(model.classifier._modules.items()):
    if name == '0':
      weights = (module.weight.data) * mask_linear1.to(device)
      model.classifier._modules[name].weight.data = weights.to(device)
    if name == '2':
      weights = (module.weight.data) * mask_linear2.to(device)
      model.classifier._modules[name].weight.data = weights.to(device)

  for param in model.parameters(): param.requires_grad = True
  
  return(model)

In [0]:
def perform_surgery_training(model,prune_itr,lr_multiplier,mask_conv1,mask_conv2,mask_linear1,mask_linear2):
  
  scale_factor = 16

  learning_rate = 1e-3*lr_multiplier; wt_dcy =1e-7; lr_patience = int(500/scale_factor); lr_stepsize = 1; es_patience = int(30000/scale_factor)
  criterion = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=wt_dcy)
  scheduler_plateau = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=lr_patience, verbose=True, threshold=1e-7)
  scheduler_stepLR = StepLR(optimizer, step_size=lr_stepsize, gamma=0.995)
  early_stopping = EarlyStopping(patience=es_patience,filename='checkpoint_'+str(prune_itr), verbose=True)

  torch.cuda.empty_cache()
  model = model.to(device)
  num_epochs = 40
  es_flag = 0

  final_model = deepcopy(model)

  train_loss = []; test_loss = []; train_accuracy = []; test_accuracy = []

  nonzero_parameter_list = []
  
  best_loss = 100000; best_accuracy = 2; best_parameter_size = 0

  prob_itr = 0; prob_threshold = 1.0; k1 = 1; k2 = 1; k3 = 1; k4 = 1 

  for epoch in range(num_epochs):
    
    correct = 0; data_size = 0; t_loss = 0
    
    for i, (images, labels) in enumerate(data_loader_train):

      model.train()
      images = images.to(device); labels = labels.to(device)
      data_size += len(images)
      outputs = torch.nn.functional.log_softmax(model(images), dim=1)
      trainloss = criterion(outputs,labels)
      optimizer.zero_grad()
      trainloss.backward()

      choice = random.random()
      if choice <= prob_threshold:
        print('Performing Surgery for choice as ' + str(round(choice,4)) + ' for threshold ' + str(round(prob_threshold,4)) + '.')
        mask_conv1, mask_conv2, mask_linear1, mask_linear2 = update_masks(model, mask_conv1, mask_conv2, mask_linear1, mask_linear2, k1, k2, k3, k4)
        k1 = k1 * 1.00067; k2 = k2 * 1.00095; k3 = k3 * 1.0009; k4 = k4 * 1.00085

      optimizer.step()
      model = model_surgery(model, mask_conv1, mask_conv2, mask_linear1, mask_linear2)

      prob_itr += 1
      prob_threshold *= 0.99925
      nonzero_parameters = count_nonzeros(model)
      nonzero_parameter_list.append(nonzero_parameters)
      print('Parameters are: ' + str(nonzero_parameters))
      
      _, predicted = torch.max(outputs.data, 1)
      correct += (predicted == labels).sum().item()
      t_loss += trainloss.item()
    
      correct_t = 0; data_size_t = 0; tt_loss = 0

      model.eval()
      with torch.no_grad():
        for images, labels in data_loader_test:
          images = images.to(device); labels = labels.to(device)
          data_size_t += len(images)
          outputs = torch.nn.functional.log_softmax(model(images), dim=1)
          _, predicted = torch.max(outputs.data, 1)
          correct_t += (predicted == labels).sum().item()
          tt_loss += criterion(outputs, labels).item()
      print('Epoch: {}, Batch: {}, Train Loss: {:.3f} Train Accuracy: {:.3f}%, Validation Loss: {:.3f} Validation Accuracy: {:.3f}%'.
            format(epoch+1, i+1, trainloss.item(), 100*(predicted==labels).sum().item()/len(images), tt_loss/len(data_loader_test), 100*correct_t/data_size_t))
      
      train_loss.append(trainloss.item()); test_loss.append(tt_loss/len(data_loader_test))
      train_accuracy.append((predicted==labels).sum().item()/len(images)); test_accuracy.append(correct_t/data_size_t)
      
      if (tt_loss/len(data_loader_test)) < best_loss:
        best_loss = tt_loss/len(data_loader_test)
        best_accuracy = correct_t/data_size_t
        best_parameter_size = nonzero_parameters
        
      scheduler_stepLR.step()
      scheduler_plateau.step(tt_loss/len(data_loader_test))

      if (correct_t/data_size_t) >= (best_accuracy-(0.15*best_accuracy)):
        last_loss = tt_loss/len(data_loader_test)
        last_accuracy = correct_t/data_size_t
        last_parameter_size = nonzero_parameters
        final_model = deepcopy(model)

      early_stopping(tt_loss/len(data_loader_test), model)
      if early_stopping.early_stop:
        print("Early stopping")
        es_flag = 1
        break
    if es_flag == 1: break

  model.load_state_dict(torch.load(basepath+'/CheckPoints/'+'checkpoint_'+str(prune_itr)+'.pt'))

  return(model, final_model, train_loss, test_loss, train_accuracy, test_accuracy, best_loss, best_accuracy, best_parameter_size, last_loss, last_accuracy, last_parameter_size, nonzero_parameter_list)

In [0]:
def plot_parameters(parameter_list,filename,i):
  x_axis = list(range(1,len(parameter_list)+1))
  fig = plt.figure(i,figsize=(20,10))
  plt.plot(x_axis, parameter_list, color='blue', label='Total Parameters')
  plt.legend(loc=1)
  plt.xlabel('Iterations over entire dataset')
  plt.ylabel('Parameters')
  plt.ylim(0,70000)
  plt.savefig(basepath+'/Parameters/'+filename+'.png')
  plt.close(fig)

In [18]:
lr_multiplier = 1
new_model = deepcopy(model)
#new_model.load_state_dict(torch.load(basepath+'/CheckPoints/'+'checkpoint_1.pt'))
pruned_model, final_model, train_loss, test_loss, train_accuracy, test_accuracy, best_loss, best_accuracy, best_parameter_size, last_loss, last_accuracy, last_parameter_size, parameter_list = perform_surgery_training(new_model,1,lr_multiplier,mask_conv1,mask_conv2,mask_linear1,mask_linear2)
dict_best_info[1] = (best_loss,best_accuracy)
plot_losses(train_loss,test_loss,'l1',1)
plot_accuracies(train_accuracy,test_accuracy,'a1',2)
plot_parameters(parameter_list,'p1',3)

pruned_model.load_state_dict(torch.load(basepath+'/CheckPoints/'+'checkpoint_1.pt'))
pruned_model = pruned_model.to(device)
print(pruned_model)

Performing Surgery for choice as 0.1203 for threshold 1.0.
Parameters are: 48533
Epoch: 1, Batch: 1, Train Loss: 0.025 Train Accuracy: 96.958%, Validation Loss: 0.237 Validation Accuracy: 97.110%
Validation loss decreased (inf --> 0.237165).  Saving model ...
Performing Surgery for choice as 0.9559 for threshold 0.9992.
Parameters are: 46856
Epoch: 1, Batch: 2, Train Loss: 0.236 Train Accuracy: 96.350%, Validation Loss: 0.259 Validation Accuracy: 96.230%
EarlyStopping counter: 1 out of 1875
Performing Surgery for choice as 0.2518 for threshold 0.9985.
Parameters are: 46508
Epoch: 1, Batch: 3, Train Loss: 0.266 Train Accuracy: 97.124%, Validation Loss: 0.214 Validation Accuracy: 97.440%
Validation loss decreased (0.237165 --> 0.213609).  Saving model ...
Performing Surgery for choice as 0.2677 for threshold 0.9978.
Parameters are: 46107
Epoch: 1, Batch: 4, Train Loss: 0.235 Train Accuracy: 98.009%, Validation Loss: 0.181 Validation Accuracy: 98.240%
Validation loss decreased (0.213609 -

In [19]:
get_model_layerwise_analysis(pruned_model)

tensor(0.3512, grad_fn=<MaxBackward1>) tensor(0.1125, grad_fn=<MeanBackward0>) tensor(0.0983, grad_fn=<StdBackward0>)
tensor(0.2915, grad_fn=<MaxBackward1>) tensor(0.0316, grad_fn=<MeanBackward0>) tensor(0.0393, grad_fn=<StdBackward0>)
tensor(0.1662, grad_fn=<MaxBackward1>) tensor(0.0036, grad_fn=<MeanBackward0>) tensor(0.0161, grad_fn=<StdBackward0>)
tensor(0.1895, grad_fn=<MaxBackward1>) tensor(0.0214, grad_fn=<MeanBackward0>) tensor(0.0353, grad_fn=<StdBackward0>)


In [20]:
remaining_parameters = count_nonzeros(pruned_model)
print('For Compression without loss...')
print('Number of Initial Parameters are ' + str(initial_parameters))
print('Number of Remaining Parameters are ' + str(remaining_parameters))
print('Compression Rate without loss is ' + str(initial_parameters/remaining_parameters))
print('Best Accuracy is ' + str(round(best_accuracy*100,2)))
print('Best Loss is ' + str(round(best_loss,3)))

For Compression without loss...
Number of Initial Parameters are 431080
Number of Remaining Parameters are 33497
Compression Rate without loss is 12.869212168253874
Best Accuracy is 98.86
Best Loss is 0.034


In [21]:
def layer_wise_comparison(model,pruned_model):
  selected_layers = [0,2,4,6]
  model_layers = []
  for param in model.parameters():
    if param is not None: model_layers.append(torch.sum((param != 0).int()).item())
  
  pruned_model_layers = []
  for param in pruned_model.parameters():
    if param is not None: pruned_model_layers.append(torch.sum((param != 0).int()).item())

  for i in range(len(model_layers)):
    if i in selected_layers:
      print('Layer Parameters: ' + str(model_layers[i]) + ', Parameters Left: ' + str(pruned_model_layers[i]) + ', Percentage Left: ' + str(round(pruned_model_layers[i]*100/model_layers[i],2)))

print('For Compression without loss...')  
layer_wise_comparison(model,pruned_model)

For Compression without loss...
Layer Parameters: 500, Parameters Left: 315, Percentage Left: 63.0
Layer Parameters: 25000, Parameters Left: 11322, Percentage Left: 45.29
Layer Parameters: 400000, Parameters Left: 19854, Percentage Left: 4.96
Layer Parameters: 5000, Parameters Left: 1426, Percentage Left: 28.52


In [22]:
print('For Best Compression...')  
layer_wise_comparison(model,final_model)

For Best Compression...
Layer Parameters: 500, Parameters Left: 243, Percentage Left: 48.6
Layer Parameters: 25000, Parameters Left: 4437, Percentage Left: 17.75
Layer Parameters: 400000, Parameters Left: 817, Percentage Left: 0.2
Layer Parameters: 5000, Parameters Left: 506, Percentage Left: 10.12


In [23]:
print('Trained for '+str(last_parameter_size)+' non-zero parameters reaching '+str(initial_parameters/last_parameter_size)+' times compression with loss '+str(round(last_loss,3))+' and accuracy '+str(round(last_accuracy*100,2))+'%.')

Trained for 6583 non-zero parameters reaching 65.48382196566915 times compression with loss 1.394 and accuracy 84.05%.
