In this notebook we implement an easier library layer, a regression layer and the pruning with pytorch/

In [1]:
# General imports
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

#Plotting imports
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

# Remainder imports
from os import listdir, path, getcwd

# Setting cuda
if torch.cuda.is_available():
    torch.set_default_tensor_type('torch.cuda.FloatTensor')

# Settings for reproducibility
np.random.seed(42)
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Defining output folder
output_folder = getcwd()

%load_ext autoreload
%autoreload 2

# DeepMoD stuff
from deepymod_torch.utilities import create_deriv_data
from deepymod_torch.network import Linear, Tanh
from deepymod_torch.output import progress


  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


# Implementing new layers

## Library layer

We implement a simpler library layer for now use in the network. Let's not worry about deepmod for now and just make it easy:

In [2]:
class Library(nn.Module):
    '''Abstract baseclass for library-as-layer. Child requires theta function (see library_functions). '''
    def __init__(self, input_dim, output_dim, diff_order):
        super().__init__()
        self.diff_order = diff_order
        self.total_terms = self.terms(input_dim, output_dim, self.diff_order)

    def forward(self, input):
        '''Calculates output.'''
        time_deriv_list, theta = self.theta(input)
        return input, time_deriv_list, theta

    def terms(self, input_dim, output_dim, max_order):
        '''Calculates the number of terms the library produces'''
        sample_data = (torch.ones((1, output_dim), dtype=torch.float32), torch.ones((1, max_order, input_dim, output_dim), dtype=torch.float32)) # we run a single forward pass on fake data to infer shapes
        total_terms = self.theta(sample_data)[1].shape[1]

        return total_terms

In [3]:
class library_basic(Library):
    '''Implementation of library layer. Inherets from Library layer.'''
    def __init__(self, input_dim, output_dim, diff_order, poly_order):
        self.poly_order = poly_order
        super().__init__(input_dim, output_dim, diff_order)
    
    def theta(self, input):
        '''Calculates the library and time deriv from NN output'''
        X, dX = input
        samples = X.shape[0]

        # Time derivatives
        dt = dX[:, 0, :1, :]
        time_deriv_list = torch.unbind(dt, dim=2)

        # Polynomial part
        u = torch.ones_like(X)[:, None, :]
        for order in torch.arange(1, self.poly_order+1):
            u = torch.cat((u, u[:, order-1:order, :] * X[:, None, :]), dim=1)
        poly_list = torch.unbind(u, dim=2) #list with each entry corresponding to eq.

        # Derivative part
        dx = dX[:, :, 1:, :]
        deriv_list = [torch.cat((torch.ones((samples, 1)), eq.reshape(samples, -1)), dim=1) for eq in torch.unbind(dx, dim=3)] #list with each entry corresponding to eq.
        
        # Combining to make  theta
        if len(poly_list) == 1:
            theta = torch.matmul(poly_list[0][:, :, None], deriv_list[0][:, None, :]).reshape(samples, -1) # If we have a single output, we simply calculate and flatten matrix product between polynomials and derivatives to get library
        else:
            theta_uv = torch.cat([torch.matmul(u[:, :, None], v[:, None, :]).reshape(samples, -1) for u, v in combinations(poly_list, 2)], 1)  # calculate all unique combinations between polynomials
            theta_dudv = torch.cat([torch.matmul(du[:, :, None], dv[:, None, :]).reshape(samples, -1)[:, 1:] for du, dv in combinations(deriv_list, 2)], 1) # calculate all unique combinations of derivatives
            theta_udu = torch.cat([torch.matmul(u[:, 1:, None], du[:, None, 1:]).reshape(samples, -1) for u, du in product(poly_list, deriv_list)], 1)  # calculate all unique products of polynomials and derivatives
            theta = torch.cat([theta_uv, theta_dudv, theta_udu], dim=1)

        return time_deriv_list, theta

The regression layer is a simple linear layer, but we also need to forward the output so let's build a simple wrapper:

In [4]:
class Regression(nn.Linear):
    '''Pytorch style linear layer which also calculates the derivatives w.r.t input. Has been written to be a thin wrapper around the pytorch layer. '''
    def __init__(self, in_features, out_features):
        super().__init__(in_features, out_features, bias=False)
        
    def forward(self, input):
        '''Calculates output'''
        X, time_deriv_list, theta = input
        z = F.linear(theta, self.weight)
        
        return (X, time_deriv_list[0], z)


## Building model

In [9]:
def build_network(input_dim, hidden_dim, layers, output_dim, library_function, library_args):
    ''' Build deepmod model.'''
    network = [Linear(input_dim, hidden_dim), Tanh()]  # Input layer
    for hidden_layer in torch.arange(layers):  # Hidden layers
        network.append(Linear(hidden_dim, hidden_dim))
        network.append(Tanh())
    network.append(Linear(hidden_dim, output_dim))  # Output layer
    
    network.append(library_function(input_dim, output_dim, **library_args)) # Library layer
    network.append(Regression(network[-1].total_terms, 1)) # Regression layer
    torch_network = nn.Sequential(*network)

    return torch_network

# Testing new model

In [13]:
# Loading data
data = np.load('../../tests/data/burgers.npy', allow_pickle=True).item()
X = np.transpose((data['t'].flatten(), data['x'].flatten()))
y = np.real(data['u']).reshape((data['u'].size, 1))
number_of_samples = 500

idx = np.random.permutation(y.size)
X_train = torch.tensor(X[idx, :][:number_of_samples], dtype=torch.float32)
y_train = torch.tensor(y[idx, :][:number_of_samples], dtype=torch.float32)

## Running DeepMoD
config = {'input_dim': 2, 'hidden_dim': 20, 'layers': 5, 'output_dim': 1, 'library_function': library_basic, 'library_args':{'poly_order': 2, 'diff_order': 2}}

X_input = create_deriv_data(X_train, config['library_args']['diff_order'])

In [14]:
model = build_network(**config)

In [15]:
%%timeit
model(X_input)

9.15 ms ± 2.5 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)


Now, time to train without l1 and pruning

In [16]:
optimizer = torch.optim.Adam(model.parameters())
max_iterations = 10000

In [20]:
print('| Iteration | Progress | Time remaining |     Cost |      MSE |      Reg |       L1 |')
for iteration in torch.arange(0, max_iterations + 1):
    prediction, time_deriv, f = model(X_input)
    loss_mse = torch.mean((prediction[0] - y_train)**2)
    loss_reg = torch.mean((time_deriv - f)**2)
    loss = loss_mse + loss_reg
        
    # Writing
    if iteration % 100 == 0:
        progress(iteration, 0, max_iterations, loss.item(), loss_mse.item(), loss_reg.item(), 0)
        
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()



| Iteration | Progress | Time remaining |     Cost |      MSE |      Reg |       L1 |
      10000    100.00%               0s   8.99e-07   5.34e-07   3.65e-07   0.00e+00 

let's look at the weights...

In [21]:
model[-1].weight

Parameter containing:
tensor([[-5.4734e-05, -7.3680e-04,  1.0283e-01, -5.2212e-03, -9.6069e-01,
         -1.7441e-02,  1.7855e-02, -1.1310e-01,  3.9254e-02]],
       requires_grad=True)