# Learning how to Solve a Linear Problem using Neural Networks in PyTorch

Import basic data types

In [1]:
import pandas as pd
import numpy  as np

By building our own GMRES function, we can take it appart and play with it's insides

In [2]:
from gmres import GMRES

Pyplot should display images inline

In [3]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


Work with paths...

In [4]:
from os.path import join

Import image IO libraries -- might not be needed, but I'll keep it in here or now

In [5]:
from skimage.io          import imread
import matplotlib.pyplot as     pp

Creates the validation set

In [6]:
from sklearn.model_selection import train_test_split

Evaluates the model

In [7]:
from sklearn.metrics import accuracy_score
from tqdm            import tqdm, trange

Pytorch Libraries

In [8]:
import torch
from torch.autograd import Variable
from torch.nn       import Linear, ReLU, CrossEntropyLoss, \
                           Sequential, Conv2d, MaxPool2d,  \
                           Module, Softmax, BatchNorm2d, Dropout
from torch.optim    import Adam, SGD

In [9]:
mat_to_a = lambda a    : np.squeeze(np.asarray(a))
matmul_a = lambda a, b : mat_to_a(np.dot(a, b))

In [10]:
A = np.matrix(
    [[1, 1], 
     [3, -4]]
)

In [11]:
xt = np.array([2,1])
b  = matmul_a(A, xt)
x0 = np.array([0, 0])

In [12]:
b

array([3, 2])

In [13]:
e = 0
nmax_iter = 5

In [14]:
x = GMRES(A, b, x0, e, nmax_iter)

In [15]:
x

[array([3, 2]), array([1.96153846, 1.30769231]), array([2., 1.])]

In [16]:
class TwoLayerNet(torch.nn.Module):
    def __init__(self, D_in, H, D_out):
        """
        In the constructor we instantiate two nn.Linear modules and assign them as
        member variables.
        """
        super(TwoLayerNet, self).__init__()
        self.linear1 = torch.nn.Linear(D_in, H)
        self.linear2 = torch.nn.Linear(H, D_out)

    def forward(self, x):
        """
        In the forward function we accept a Tensor of input data and we must return
        a Tensor of output data. We can use Modules defined in the constructor as
        well as arbitrary operators on Tensors.
        """
        h_relu = self.linear1(x).clamp(min=0)
        y_pred = self.linear2(h_relu)
        return y_pred

In [17]:
x=torch.empty(0, 2)
y=torch.randn(1, 2)

In [18]:
y[0, :]

tensor([-1.2058,  0.4605])

In [19]:
z = torch.cat((x, y), 0)

In [20]:
torch.cat((z, y), 0)

tensor([[-1.2058,  0.4605],
        [-1.2058,  0.4605]])

In [332]:
class NNPredictor(object):

    def __init__(self):
        # N is batch size; D_in is input dimension;
        # H is hidden dimension; D_out is output dimension.
        self.N, self.D_in, self.H, self.D_out = 1, 2, 100, 2

        # Construct our model by instantiating the class defined above
        self.model = TwoLayerNet(self.D_in, self.H, self.D_out)

        # Construct our loss function and an Optimizer. The call to model.parameters()
        # in the SGD constructor will contain the learnable parameters of the two
        # nn.Linear modules which are members of the model.
        self.criterion = torch.nn.MSELoss(reduction='sum')
        self.optimizer = torch.optim.SGD(self.model.parameters(), lr=1e-4)

        self.x = torch.empty(0, self.D_in)
        self.y = torch.empty(0, self.D_out)
        self._is_trained = False
        # Diagnostic data => remove in production
        self.loss_val = list()

    @property
    def is_trained(self):
        return self._is_trained

    @is_trained.setter
    def is_trained(self, value):
        self._is_trained = value

    def retrain(self):
        self.n_steps = 1000
        for t in range(self.n_steps):
            # Forward pass: Compute predicted y by passing x to the model
            y_pred = self.model(self.x)

            # Compute and print loss
            loss = self.criterion(y_pred, self.y)
            self.loss_val.append(loss.item())

            # Zero gradients, perform a backward pass, and update the weights.
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

        self.is_trained = True
        
    def add(self, x, y):
        # TODO: don't use `torch.cat` in this incremental mode => will scale poorly
        # instead: use batched buffers
        self.x = torch.cat((self.x, torch.from_numpy(x).unsqueeze_(0).float()), 0)
        self.y = torch.cat((self.y, torch.from_numpy(y).unsqueeze_(0).float()), 0)
    
    def predict(self, x):
        return np.squeeze(
            self.model.forward(
                torch.from_numpy(x).unsqueeze_(0).float() # inputs need to be [[x_1, x_2, ...]] as floats
            ).detach().numpy() # outputs need to be numpy (non-grad => detach)
        ) # outputs need to be [y_1, y_2, ...]

In [333]:
import functools


def speedup_decorator(retrain_freq=10):

    def my_decorator(func):
        func.predictor = NNPredictor()
        func.counter = 0;
        func.retrain_freq = retrain_freq

        @functools.wraps(func)
        def speedup_wrapper(*args, **kwargs):

            A, b, x0, e, nmax_iter, *eargs = args

            if func.predictor.is_trained:
                pred_x0 = func.predictor.predict(b)
                print(pred_x0)
            else:
                pred_x0 = x0

            target  = func(A, b, pred_x0, e, nmax_iter, *eargs)

            res = target[-1]

            func.predictor.add(b, res)
            func.counter += 1
            print(func.counter)

            if func.counter%retrain_freq == 0:
                print("retraining")
                func.predictor.retrain()

            return target

        return speedup_wrapper
    return my_decorator

In [334]:
x = GMRES(A, b, x0, e, nmax_iter)

In [335]:
x

[array([3, 2]), array([1.96153846, 1.30769231]), array([2., 1.])]

In [336]:
@speedup_decorator(retrain_freq=2)
def MLGMRES(A, b, x0, e, nmax_iter):
    return GMRES(A, b, x0, e, nmax_iter)

In [343]:
b1 = np.array([3., 2.])
MLGMRES(A, b1, x0, e, nmax_iter)

[2.0010324 1.0002786]
4
retraining


[array([-0.00131094, -0.00198269]),
 array([2.00120868, 1.00054527]),
 array([2., 1.])]

In [344]:
b2 = np.array([3.1, 2.1])
MLGMRES(A, b2, x0, e, nmax_iter)

[2.0777142 1.0373486]
5


[array([-0.01506283,  0.0162519 ]),
 array([2.07995765, 1.03492808]),
 array([2.07142857, 1.02857143])]

In [345]:
b3 = np.array([3.2, 2.0])
MLGMRES(A, b3, x0, e, nmax_iter)

[2.0923827 1.0594704]
6
retraining


[array([ 0.04814692, -0.03926635]),
 array([2.086344  , 1.06439527]),
 array([2.11428571, 1.08571429])]

In [346]:
b4 = np.array([3.2, 1.9])
MLGMRES(A, b4, x0, e, nmax_iter)

[2.0680227 1.0604382]
7


[array([ 0.07153912, -0.06231556]),
 array([2.05863556, 1.06861504]),
 array([2.1, 1.1])]

In [347]:
b5 = np.array([2.2, 2.0])
MLGMRES(A, b5, x0, e, nmax_iter)

[1.692209  0.7989203]
8
retraining


[array([-0.29112928,  0.11905408]),
 array([1.70960084, 0.79180808]),
 array([1.54285714, 0.65714286])]

In [348]:
b6 = np.array([0.2, 2.0])
MLGMRES(A, b6, x0, e, nmax_iter)

[1.042531   0.46768808]
9


[array([-1.3102191 ,  0.74315929]),
 array([1.16234441, 0.39972965]),
 array([ 0.4, -0.2])]

In [349]:
b7 = np.array([3.2, 4.0])
MLGMRES(A, b7, x0, e, nmax_iter)

[2.654543  0.9779748]
10
retraining


[array([-0.4325177 , -0.05172968]),
 array([2.57378978, 0.96831659]),
 array([2.4, 0.8])]

In [350]:
b8 = np.array([3.3, 2.0])
MLGMRES(A, b8, x0, e, nmax_iter)

[2.1938999 1.1449088]
11


[array([-0.03880866, -0.00206447]),
 array([2.18864775, 1.14462939]),
 array([2.17142857, 1.12857143])]

In [351]:
b9 = np.array([5.2, 2.0])
MLGMRES(A, b9, x0, e, nmax_iter)

[3.3756711 1.9581655]
12
retraining


[array([-0.13383667, -0.29435134]),
 array([3.40482546, 2.02228555]),
 array([3.25714286, 1.94285714])]

In [352]:
b10 = np.array([4.2, 1.0])
MLGMRES(A, b10, x0, e, nmax_iter)

[2.5921319 1.5885458]
13


[array([ 0.01932235, -0.42221236]),
 array([2.58765065, 1.68646447]),
 array([2.54285714, 1.65714286])]

In [353]:
b10 = np.array([4.2, 1.0])
MLGMRES(A, b10, x0, e, nmax_iter)

[2.5921319 1.5885458]
14
retraining


[array([ 0.01932235, -0.42221236]),
 array([2.58765065, 1.68646447]),
 array([2.54285714, 1.65714286])]

In [354]:
b10 = np.array([4.2, 1.0])
MLGMRES(A, b10, x0, e, nmax_iter)

[2.54779   1.6316874]
15


[array([ 0.02052255, -0.11662054]),
 array([2.54326242, 1.65741595]),
 array([2.54285714, 1.65714286])]

In [118]:
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 100, 2, 100, 2

# Create random Tensors to hold inputs and outputs
x = torch.randn(N, D_in)   # equivalent to b  -- in the context of Ax = b
y = torch.empty(N, D_out)  # equivalent to x  ----------- ~~ ------------
for i in range(N):
    y[i, :] = torch.from_numpy(matmul_a(A, x[i, :]))

# Construct our model by instantiating the class defined above
model = TwoLayerNet(D_in, H, D_out)

# Construct our loss function and an Optimizer. The call to model.parameters()
# in the SGD constructor will contain the learnable parameters of the two
# nn.Linear modules which are members of the model.
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)

n_steps = 10000
print_f = n_steps/100
with tqdm(total=n_steps, file=sys.stdout, leave=True) as pbar:
    for t in range(n_steps):
        # Forward pass: Compute predicted y by passing x to the model
        y_pred = model(x)

        # Compute and print loss
        loss = criterion(y_pred, y)
        if t % print_f == 0:
            # print(t, loss.item())
            loss_val = loss.item()
            pbar.set_description(f"t={t:6d}, loss={loss_val:.5f}")

        # Zero gradients, perform a backward pass, and update the weights.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        pbar.update(1)

t=  9900, loss=0.05285: 100%|██████████| 10000/10000 [00:04<00:00, 2477.12it/s]


In [91]:
model.forward(x[1,:])

tensor([ 0.5003, -7.7463], grad_fn=<AddBackward0>)

In [92]:
y[1,:]

tensor([ 0.5044, -7.7441])

In [120]:
x[0,:]

tensor([-1.5171, -0.9901])