In this notebook we attempt to implement a feed forward class which also enables pruning through a mask:

In [1]:
# General imports
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from deepmod_l1.diff_library import theta_analytical

#Plotting imports
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

# Remainder imports
from os import listdir, path, getcwd

# Setting cuda
if torch.cuda.is_available():
    torch.set_default_tensor_type('torch.cuda.FloatTensor')

# Settings for reproducibility
np.random.seed(42)
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Defining output folder
output_folder = getcwd()

%load_ext autoreload
%autoreload 2

# Making data

In [2]:
D = 0.5
a = 0.25

x = np.linspace(-5, 5, 500, dtype=np.float32)
t = np.linspace(0, 5, 100, dtype=np.float32)
x_grid, t_grid = np.meshgrid(x, t, indexing='ij')
    
# Analytical
time_deriv, theta = theta_analytical(x_grid, t_grid, D, a)

In [3]:
xi_base = np.linalg.lstsq(theta, time_deriv, rcond=None)[0].squeeze()

In [4]:
xi_base

array([-2.4918742e-17,  2.0816682e-16,  5.0000000e-01,  4.4582393e-16,
       -1.2127323e-16, -1.0972126e-16, -3.7296555e-17,  1.0139718e-16,
       -7.5894152e-17], dtype=float32)

# Implementing pruning layer

In [61]:
class Regression(nn.Linear):
    '''Pytorch style linear layer which also calculates the derivatives w.r.t input. Has been written to be a thin wrapper around the pytorch layer. '''
    def __init__(self, in_features, out_features, bias=True):
        super().__init__(in_features, out_features, bias)
        self.mask = torch.ones_like(self.weight, dtype=torch.int8)
        
    def forward(self, input):
        '''Calculates output'''
        z = F.linear(input, self.sparse_weight, self.bias)
        return z
    
    def apply_pruning(self, threshold=1e-3):
        self.mask[torch.abs(self.weight) < threshold] = 0
        
    @property
    def sparse_weight(self):
        return self.weight * self.mask
    

In [48]:
a = np.mean(theta, axis=0)
b = np.std(theta, axis=0)

a[0] = 0.0 # for the ones.
b[0] = 1.0

theta_standard = (theta - a)/b

In [49]:
X_train = torch.tensor(theta_standard, dtype=torch.float32)
y_train = torch.tensor(time_deriv, dtype=torch.float32)

In [50]:
test_layer = Regression(X_train.shape[1], 1)

In [51]:
test_layer.mask

tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1]], dtype=torch.int8)

In [52]:
test_layer.weight

Parameter containing:
tensor([[ 0.3201, -0.2569, -0.1222,  0.1310,  0.2762,  0.2901,  0.2941,  0.0663,
         -0.2899]], requires_grad=True)

In [53]:
test_layer.sparse_weight

tensor([[ 0.3201, -0.2569, -0.1222,  0.1310,  0.2762,  0.2901,  0.2941,  0.0663,
         -0.2899]], grad_fn=<MulBackward0>)

In [54]:
model = nn.Sequential(*[Linear(X_train.shape[1], 1, bias=False)])

In [55]:
optimizer = torch.optim.Adam(model.parameters())
iterations = 10000

In [56]:
for it in np.arange(iterations):
    prediction = model(X_train)
    loss = torch.mean((prediction - y_train)**2)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if it % 1000 == 0:
        print(loss.item())

0.5376461148262024
0.001030363142490387
0.00020782685896847397
0.00012473341485019773
5.380609582061879e-05
1.3313541785464622e-05
1.3074671869617305e-06
2.7521378953565545e-08
4.3379941999655e-11
1.0284266410955849e-15


In [57]:
model[0].weight

Parameter containing:
tensor([[-5.2047e-04,  2.4167e-10,  2.8208e-01,  1.3780e-09, -2.3580e-09,
         -2.4481e-08, -3.4805e-09,  3.7685e-09,  3.1437e-08]],
       requires_grad=True)

In [58]:
model[0].apply_pruning()

In [59]:
model[0].sparse_weight

tensor([[-0.0000, 0.0000, 0.2821, 0.0000, -0.0000, -0.0000, -0.0000, 0.0000, 0.0000]],
       grad_fn=<MulBackward0>)

In [62]:
optimizer = torch.optim.Adam(model.parameters())
iterations = 5000

for it in np.arange(iterations):
    prediction = model(X_train)
    loss = torch.mean((prediction - y_train)**2)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if it % 1000 == 0:
        print(loss.item())

2.7088697152066743e-07
2.708864883516071e-07
2.7088691467724857e-07
2.708879947022069e-07
2.708864883516071e-07


KeyboardInterrupt: 

In [65]:
model[0].sparse_weight

tensor([[-0.0000, 0.0000, 0.2821, 0.0000, -0.0000, -0.0000, -0.0000, 0.0000, 0.0000]],
       grad_fn=<MulBackward0>)

In [66]:
model = nn.Sequential(*[Linear(X_train.shape[1], 1, bias=False)])

In [67]:
optimizer = torch.optim.Adam(model.parameters())
iterations = 2000

for it in np.arange(iterations):
    prediction = model(X_train)
    loss = torch.mean((prediction - y_train)**2)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if it % 1000 == 0:
        print(loss.item())

0.17199362814426422
0.0008728724787943065


In [69]:
optimizer = torch.optim.Adam(model.parameters())
iterations = 2000

for it in np.arange(iterations):
    prediction = model(X_train)
    loss = torch.mean((prediction - y_train)**2)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    model[0].apply_pruning()
    
    if it % 100 == 0:
        print(loss.item())
        print(model[0].sparse_weight)

3.2085437851492316e-05
tensor([[-0.0000, -0.0120,  0.2842,  0.0000,  0.0298, -0.0102, -0.0011, -0.0226,
          0.0067]], grad_fn=<MulBackward0>)
5.267392566565832e-07
tensor([[-0.0000, -0.0000,  0.2834,  0.0000, -0.0000, -0.0053,  0.0000,  0.0000,
          0.0042]], grad_fn=<MulBackward0>)
2.987150367061986e-07
tensor([[-0.0000, -0.0000,  0.2825,  0.0000, -0.0000, -0.0018,  0.0000,  0.0000,
          0.0014]], grad_fn=<MulBackward0>)
2.7125969381813775e-07
tensor([[-0.0000, -0.0000, 0.2821, 0.0000, -0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
       grad_fn=<MulBackward0>)
2.708877104851126e-07
tensor([[-0.0000, -0.0000, 0.2821, 0.0000, -0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
       grad_fn=<MulBackward0>)
2.7088697152066743e-07
tensor([[-0.0000, -0.0000, 0.2821, 0.0000, -0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
       grad_fn=<MulBackward0>)
2.7088697152066743e-07
tensor([[-0.0000, -0.0000, 0.2821, 0.0000, -0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
       grad_fn=<MulBackward0>)

KeyboardInterrupt: 

# Implementing pytorch pruning layer