%%latex
\tableofcontents

In [1]:
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['figure.dpi'] = 300
import random
import csv
import pandas as pd
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import ToTensor 
import matplotlib.cm as cm
# Own scripts:
%load_ext autoreload
%autoreload 2
import physics
import data
import nnc2p
from nnc2p import NeuralNetwork # our own architecture

# Introduction

We have trained model which is succesful in the C2P conversion based on the paper by Dieselhorst et al., and this neural network is called NNC2Pv0. We trained it a bit longer compared to the paper, and ended up with an efficient network called `NNC2Pv0t2`. Naturally, we want to improve on these methods. One possibility is __pruning the neural network__ to speed up the computations.

__Get the data into dataloaders:__

In [2]:
# Specify the desired CSV file locations of train and test data
training_data_csv = data.read_training_data("D:/Coding/master-thesis-AI/data/NNC2P_data_train.csv")
test_data_csv        = data.read_training_data("D:/Coding/master-thesis-AI/data/NNC2P_data_test.csv")
# Load them as CustomDatasets
training_data = data.CustomDataset(training_data_csv)
test_data        = data.CustomDataset(test_data_csv)

In [3]:
# Put this data into a DataLoader
train_dataloader = DataLoader(training_data, batch_size=32)
test_dataloader = DataLoader(test_data, batch_size=32)

# PyTorch pruning methods

Load a previously trained model:

In [4]:
device = "cpu"
# Initialize a random neural network
model = NeuralNetwork().to(device)

In [5]:
# Load the previously trained model and its parameters
old_model = torch.load("D:/Coding/master-thesis-AI/Models/NNC2Pv0t2.pth")
model_state_dict = torch.load("D:/Coding/master-thesis-AI/Models/NNC2Pv0t2_state_dict.pth")

In [6]:
print(model_state_dict.keys())

odict_keys(['stack.0.weight', 'stack.0.bias', 'stack.2.weight', 'stack.2.bias', 'stack.4.weight', 'stack.4.bias'])


In [7]:
print(model_state_dict['stack.4.bias'])

tensor([0.1305])


Load the old model's parameters into the new architecture

In [8]:
model.load_parameters(model_state_dict)

In [9]:
# Test
print(model)

NeuralNetwork(
  (linear1): Linear(in_features=3, out_features=600, bias=True)
  (linear2): Linear(in_features=600, out_features=200, bias=True)
  (linear3): Linear(in_features=200, out_features=1, bias=True)
)


In [10]:
test = torch.tensor([10.204131145455385, 12.026584842282125, 22.131296926293793])
print(model(test).item())

12.866371154785156


What is the performance of this network?

In [11]:
nnc2p.measure_performance(model, test_data)

Errors for p: 2.623259e-04  with L1 and 8.344986e-03 with Linfty


(0.00026232592464366463, 0.008344985544681549)

PyTorch has a library for pruning, see [their pruning tutorial](https://pytorch.org/tutorials/intermediate/pruning_tutorial.html).

In [27]:
import torch.nn.utils.prune as prune

For this test, we prune the second weight matrix of our network.

In [28]:
module = model.linear2
print(list(module.named_parameters()))
prune.random_unstructured(module, name="weight", amount=0.3)

[('weight', Parameter containing:
tensor([[-0.2067,  0.0381,  0.0541,  ..., -0.1053,  0.0773, -0.0022],
        [ 0.0454, -0.0570, -0.0544,  ...,  0.0199, -0.1168, -0.0555],
        [-0.0048, -0.0284, -0.0800,  ..., -0.0789, -0.0413, -0.0859],
        ...,
        [ 0.0500, -0.0569, -0.0705,  ...,  0.0048, -0.0813, -0.0858],
        [ 0.0667, -0.0904, -0.0727,  ...,  0.0317, -0.0615, -0.0056],
        [-0.1763,  0.0120,  0.0712,  ..., -0.0230,  0.0433,  0.0013]],
       requires_grad=True)), ('bias', Parameter containing:
tensor([-0.0271, -0.0239, -0.0302, -0.0114, -0.0249,  0.0093, -0.0515, -0.0817,
         0.0342, -0.0339, -0.0481,  0.0432, -0.0191,  0.0025, -0.0540, -0.0058,
        -0.0647, -0.0440, -0.0351, -0.0593, -0.0735, -0.0544, -0.0814, -0.0214,
        -0.0154, -0.0055, -0.0674,  0.0044, -0.0351, -0.0741,  0.0171, -0.0615,
        -0.0459, -0.0020, -0.0305, -0.0228, -0.0305, -0.0376, -0.0454, -0.0744,
         0.0535, -0.0635, -0.0144, -0.0232, -0.0254,  0.0227, -0.0344, -

Linear(in_features=600, out_features=200, bias=True)

In [30]:
print(list(module.named_buffers()))

[('weight_mask', tensor([[0., 1., 0.,  ..., 0., 0., 1.],
        [1., 0., 1.,  ..., 1., 1., 0.],
        [1., 1., 0.,  ..., 1., 1., 0.],
        ...,
        [1., 1., 1.,  ..., 0., 1., 1.],
        [1., 0., 1.,  ..., 1., 1., 1.],
        [1., 1., 0.,  ..., 1., 0., 0.]]))]


Pruning acts by removing weight from the parameters and replacing it with a new parameter called weight_orig (i.e. appending "_orig" to the initial parameter name). weight_orig stores the unpruned version of the tensor. The bias was not pruned, so it will remain intact.

In [29]:
print(list(module.named_parameters()))

[('bias', Parameter containing:
tensor([-0.0271, -0.0239, -0.0302, -0.0114, -0.0249,  0.0093, -0.0515, -0.0817,
         0.0342, -0.0339, -0.0481,  0.0432, -0.0191,  0.0025, -0.0540, -0.0058,
        -0.0647, -0.0440, -0.0351, -0.0593, -0.0735, -0.0544, -0.0814, -0.0214,
        -0.0154, -0.0055, -0.0674,  0.0044, -0.0351, -0.0741,  0.0171, -0.0615,
        -0.0459, -0.0020, -0.0305, -0.0228, -0.0305, -0.0376, -0.0454, -0.0744,
         0.0535, -0.0635, -0.0144, -0.0232, -0.0254,  0.0227, -0.0344, -0.0436,
        -0.0584,  0.0099, -0.0403, -0.0387, -0.0368,  0.0166,  0.0336, -0.0221,
         0.0520, -0.0206,  0.0083, -0.0750, -0.0349, -0.0069, -0.0171,  0.0392,
         0.0177, -0.0280,  0.0325,  0.0341, -0.0005, -0.0501, -0.0515, -0.0328,
        -0.0284, -0.0531,  0.0279, -0.0303,  0.0078, -0.0676, -0.0238, -0.0261,
        -0.0792,  0.0161, -0.0640,  0.0077, -0.0182,  0.0292,  0.0060,  0.0367,
        -0.0227, -0.0028,  0.0066, -0.0860, -0.0802,  0.0037, -0.0384, -0.0578,
        

In [31]:
print(list(module.named_buffers()))

[('weight_mask', tensor([[0., 1., 0.,  ..., 0., 0., 1.],
        [1., 0., 1.,  ..., 1., 1., 0.],
        [1., 1., 0.,  ..., 1., 1., 0.],
        ...,
        [1., 1., 1.,  ..., 0., 1., 1.],
        [1., 0., 1.,  ..., 1., 1., 1.],
        [1., 1., 0.,  ..., 1., 0., 0.]]))]


For the forward pass to work without modification, the weight attribute needs to exist. The pruning techniques implemented in torch.nn.utils.prune compute the pruned version of the weight (by combining the mask with the original parameter) and store them in the attribute weight. Note, this is no longer a parameter of the module, it is now simply an attribute.

In [32]:
print(module.weight)

tensor([[-0.0000,  0.0381,  0.0000,  ..., -0.0000,  0.0000, -0.0022],
        [ 0.0454, -0.0000, -0.0544,  ...,  0.0199, -0.1168, -0.0000],
        [-0.0048, -0.0284, -0.0000,  ..., -0.0789, -0.0413, -0.0000],
        ...,
        [ 0.0500, -0.0569, -0.0705,  ...,  0.0000, -0.0813, -0.0858],
        [ 0.0667, -0.0000, -0.0727,  ...,  0.0317, -0.0615, -0.0056],
        [-0.1763,  0.0120,  0.0000,  ..., -0.0230,  0.0000,  0.0000]],
       grad_fn=<MulBackward0>)


# Own pruning methods

__(From semester 1:)__ Here we implement functions which are able to prune a column and row of a tensor, thereby pruning neurons from the neural network architecture.

In [20]:
def delete_column_tensor(x, index):
    """x is a torch tensor with shape (n, m). Returns tensor with shape (n, m-1) and deletes the column at specified index."""
    
    # Delete the column by splitting into two pieces, transpose the tensors for cat
    a = torch.transpose(x[:, :index], 0, 1)
    b = torch.transpose(x[:, index+1:], 0, 1)
    
    # Concatenate the two results, with the desired column deleted
    new = torch.cat((a, b))
    
    return torch.transpose(new, 0, 1)

In [21]:
def delete_row_tensor(x, index):
    """x is a torch tensor with shape (n, m). Returns tensor with shape (n, m-1) and deletes the column at specified index."""
    
    # Delete the column by splitting into two pieces, transpose the tensors for cat
    a = x[:index]
    b = x[index+1:]
    
    # Return concatenation    
    return torch.cat((a, b))

In [22]:
# print(example_matrix)
# print(np.shape(example_matrix))
# print("---")
# new = delete_column_tensor(example_matrix, 1)
# print(new)
# print(np.shape(new))
# print("---")
# new = delete_row_tensor(example_matrix, 1)
# print(new)
# print(np.shape(new))

In [23]:
# print(example_vector)
# print(np.shape(example_vector))
# print("---")
# new = delete_row_tensor(example_vector, 1)
# print(new)
# print(np.shape(new))

## Pruning methods

The following function prunes a part of the model randomly. 

In [15]:
print(model.state_dict())
# print(state_dict.items())

OrderedDict([('linear1.weight', tensor([[-0.3647,  0.4542, -0.4356],
        [ 0.0097,  0.7044,  0.4866],
        [ 0.1109, -0.0927,  0.1088],
        ...,
        [ 0.5325, -0.4551, -0.3039],
        [-0.4302, -0.1418,  0.2808],
        [ 0.6406, -0.2961,  0.0552]])), ('linear1.bias', tensor([ 5.6580e-01,  2.9055e-01, -7.6713e-01, -3.1754e-01, -1.9565e-01,
         5.0677e-02,  5.1462e-02, -4.1435e-01,  2.3143e-01, -5.2321e-01,
         2.5009e-01, -3.1832e-01, -4.8635e-01, -5.0145e-01, -3.6713e-01,
        -2.6734e-01,  7.8623e-01,  7.4853e-01,  1.0236e-01,  8.8082e-01,
         1.5340e-01, -4.3577e-01, -3.3913e-01, -5.9674e-01, -4.3297e-01,
        -7.3644e-01, -4.7058e-01, -4.1418e-01, -1.2201e-01, -1.7898e-01,
        -7.2384e-01,  5.4838e-04,  2.8393e-01,  1.1887e-01,  5.4077e-01,
        -1.8006e-01, -2.5018e-02, -3.4073e-01, -1.1368e-01, -2.8283e-01,
         5.4974e-01, -1.4166e-01,  3.3166e-01, -5.8469e-01,  5.7012e-02,
        -2.6767e-01,  3.8736e-01,  8.4181e-01, -2.4272e-

In [24]:
def prune_nn_once(old_model):
    
    # Get the relevant info
    state_dict             = old_model.state_dict()
    state_dict_items = state_dict.items()
    
    # Get number of layers:
    number_of_layers = len(state_dict_items)//2
    
    # Get number of neurons in each layer (hidden + output, not input) by directly counting it 
    number_neurons = np.array([0 for i in range(number_of_layers)])
    counter = 0
    for name, item in state_dict_items:
        if "weight" in name:
            number_neurons[counter] = np.shape(item)[0]
            counter += 1
    number_neurons_hidden = number_neurons[:-1]
    
    # Get probabilities based on this:
    probabilities_hidden_layers = number_neurons_hidden/np.sum(number_neurons_hidden)
    
    # Get the hidden layer from which we are going to prune:
    layer_index     = np.random.choice(number_of_layers-1, p=probabilities_hidden_layers)
    # Get the index of the neuron we are going to delete in this layer (uniform probability)
    neuron_index = np.random.choice(number_neurons_hidden[layer_index])
    
    # Also get layer index: they have increments of two
    layer_number = 2*layer_index + 2
    # Testing
#     print(layer_index)
#     print(neuron_index)
#     print(layer_number)
    
    # Copy the state dict of original model
    new_state_dict = state_dict.copy()
    
    # Prune that state dict:
    for key in new_state_dict:
        # Prune previous (increment -2!) weight matrix by deleting the row:
        if  str(layer_number - 2) + ".weight" in key:
            old = new_state_dict[key]
            new = delete_row_tensor(old, neuron_index)
            new_state_dict[key] = new
            
        # Prune current weight matrix by deleting the column
        if  str(layer_number) + ".weight" in key:
            old = new_state_dict[key]
            new = delete_column_tensor(old, neuron_index)
            new_state_dict[key] = new
            
        # Prune the current bias vector by deleting the row:
        if  str(layer_number-2) + ".bias" in key:
            old = new_state_dict[key]
            new = delete_row_tensor(old, neuron_index)
            new_state_dict[key] = new        
        
    # Instantiate a new model, with the appropriate number of hidden neurons, and save the pruned state dict
    new_number_neurons_hidden = number_neurons_hidden
    new_number_neurons_hidden[layer_index] -= 1
    new_model = NeuralNetwork(new_number_neurons_hidden[0], new_number_neurons_hidden[1]).to(device)
    new_model.load_state_dict(new_state_dict)
    
    return new_model

In [25]:
def prune_nn(old_model, number=10):
    counter = 0
    while counter < number:
        new_model = prune_nn_once(old_model)
        old_model = new_model
        counter +=1
    return new_model

In [26]:
new_model = prune_nn(previous_model)

In [27]:
# torch.save(model, 'Models/NNC2Pv1.pth')

# Performance after pruning

In [35]:
number = 20
new_model = prune_nn(NNC2P, number=number)
model = new_model
models = [NNC2P, new_model]
names = ["NNC2P", "pruned"]

for i in range(len(models)):
    # Get model and name
    model = models[i]
    name = names[i]

    # Get predictions
    with torch.no_grad():
        p_hat= np.array([])
        for input_values in test_features:
            prediction = model(input_values)
            p_hat = np.append(p_hat, prediction[0].item())

    # Get labels as np arrays
    p = np.array([])
    for value in test_labels:
        p = np.append(p, value.item())

    # Get the errors:
    delta_p_L1       = L1_norm(p_hat, p)
    delta_p_Linfty = Linfty_norm(p_hat, p)

    print("Errors for p: %e  with L1 and %e with Linfty for %s with %d neurons pruned" % (delta_p_L1, delta_p_Linfty, name, number) )

Errors for p: 2.623259e-04  with L1 and 8.344986e-03 with Linfty for NNC2P with 20 neurons pruned
Errors for p: 2.649312e-01  with L1 and 1.045513e+00 with Linfty for pruned with 20 neurons pruned
