In [7]:
from models_new.nac import NAC
import torch.nn.functional as F
import torch
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from tqdm import tqdm_notebook as tqdm

from data_generator_helper import generate_synthetic_selection_dataset

from models_new.nac import NeuralAccumulatorCell
import torch
import torchvision.utils as vutils
import numpy as np
import torchvision.models as models
from torchvision import datasets
from tensorboardX import SummaryWriter
import datetime
import os

In [2]:
def reportLoss(loss, epoch):
    print("epoch {},  \t loss {}".format(epoch, loss))
    
def train(model, optimizer, x_train, y_train, epochs, batch_size):
    
    for epoch in range(epochs):
        
        #print('G is: ',model.G.data)
        #print('g is: ',torch.sigmoid(model.G))
        
        for batch in range(len(x_train) // batch_size):
            
            model.train()
            optimizer.zero_grad()

            x_batch_train = x_train[batch:(batch+batch_size),:]
            y_batch_train = y_train[batch:(batch+batch_size),:]

            out = model(x_batch_train)

            loss = F.mse_loss(out, y_batch_train)
            
            if loss != loss:
                break
                print("nan detected")
            
            loss.backward()
            optimizer.step()
        
        if loss != loss:
            break
        
        if epoch % 100 == 0: 
            #pass
            reportLoss(loss.data, epoch)
            
        
        #model.temperature = epoch * 0.001
            
    return test(model,x_train,y_train)
        
def test(model, x_test, y_test):
    
    model.eval()
    output_test = model(x_test)
    loss = F.mse_loss(output_test, y_test)
    
    return loss

In [3]:
import math
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F

from torch.nn.parameter import Parameter


class NeuralAccumulatorCell(nn.Module):
    """A Neural Accumulator (NAC) cell [1].
    Attributes:
        in_dim: size of the input sample.
        out_dim: size of the output sample.
    Sources:
        [1]: https://arxiv.org/abs/1808.00508
    """
    def __init__(self, in_dim, out_dim, ini):
        super().__init__()
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.initial = ini
        self.W_hat = Parameter(torch.Tensor(out_dim, in_dim))
        self.M_hat = Parameter(torch.Tensor(out_dim, in_dim))

        self.register_parameter('W_hat', self.W_hat)
        self.register_parameter('M_hat', self.M_hat)
        self.register_parameter('bias', None)

        self._reset_params(ini)

    def _reset_params(self,ini):
        if ini =='Kai_uni':
            init.kaiming_uniform_(self.W_hat)
            init.kaiming_uniform_(self.M_hat)

        if ini =='Xav_norm':
            init.xavier_normal_(self.W_hat)
            init.xavier_normal_(self.M_hat)

        if ini =='Kai_norm':
            init.kaiming_normal_(self.W_hat)
            init.kaiming_normal_(self.M_hat)

        if ini =='Zeros':
            init.zeros_(self.W_hat)
            init.zeros_(self.M_hat)

        if ini =='Ones':
            init.ones_(self.W_hat)
            init.ones_(self.M_hat)

    def forward(self, input):
        W = torch.tanh(self.W_hat) * torch.sigmoid(self.M_hat)
        return F.linear(input, W, self.bias)

    def extra_repr(self):
        return 'in_dim={}, out_dim={}'.format(
            self.in_dim, self.out_dim
        )

In [4]:
import math
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F

from torch.nn.parameter import Parameter
import numpy as np

class NeuralArithmeticLogicUnitCell(nn.Module):
    """A Neural Arithmetic Logic Unit (NALU) cell [1].

    Attributes:
        in_dim: size of the input sample.
        out_dim: size of the output sample.

    Sources:
        [1]: https://arxiv.org/abs/1808.00508
    """
    def __init__(self, in_dim, out_dim, ini):
        super().__init__()
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.eps = 1e-10

        self.G = Parameter(torch.Tensor(1, 2))
        #print(self.G)
        self.nac = NeuralAccumulatorCell(in_dim, out_dim, ini)
        self.register_parameter('bias', None)
        self.temperature = 0.005
        
        if ini =='Kai_uni':
            init.kaiming_uniform_(self.G, a=math.sqrt(5))

        if ini =='Xav_norm':
            init.xavier_normal_(self.G)

        if ini =='Kai_norm':
            init.kaiming_normal_(self.G)

        if ini =='Zeros':
            init.zeros_(self.G)

        if ini =='Ones':
            init.ones_(self.G)


    def forward(self, input):
        a = self.nac(input)

        #g = torch.sigmoid(F.linear(input, self.G, self.bias) * self.temperature)
        g = torch.sigmoid(self.G)
        #print(g)
        add_sub = g * a
        log_input = torch.log(torch.abs(input) + self.eps)
        m = torch.exp(self.nac(log_input))
        mul_div = (1 - g) * m
        y = add_sub + mul_div
        return y
    
    def extra_repr(self):
        return 'in_dim={}, out_dim={}'.format(
            self.in_dim, self.out_dim
        )


In [32]:
print(model)

NeuralArithmeticLogicUnitCell(
  in_dim=100, out_dim=2
  (nac): NeuralAccumulatorCell(in_dim=100, out_dim=2)
)


In [5]:
test_per_range = 5
sample_size = 100
set_size = 1000

in_dim = sample_size
hidden_dim = 1
out_dim = 2
num_layers = 1

lr = 0.01
epochs = 2000
batch_size = 3

#values = np.linspace(1,1000,10)
#inits = ['Kai_norm']
values = [[0,1],[0,10],[0,100],[-10,10],[-100,100],[-1000,1000]]

train_loss = np.zeros((len(values),test_per_range))
test_loss = np.zeros((len(values),test_per_range))

for j, val in tqdm(enumerate(values)):
    
    print("Testing range: ",val)
    
    test_min_val = val[0]
    test_max_val = val[1]
    
    min_value = 0
    max_value = 1
    
    for i in range(test_per_range):

        model = NeuralArithmeticLogicUnitCell(in_dim, out_dim, 'Kai_norm')
        optimizer = torch.optim.RMSprop(model.parameters(),lr=lr)

        x_train, y_train, boundaries = generate_synthetic_selection_dataset(min_value, max_value,
                                                                    sample_size, set_size, boundaries = None)

        x_test, y_test, _ = generate_synthetic_selection_dataset(test_min_val, test_max_val,
                                                                    sample_size, set_size, boundaries = boundaries)

        train_loss[j, i] = train(model, optimizer, x_train, y_train, epochs, batch_size)

        test_loss[j, i]  = test(model, x_test, y_test)
        
    
        

Testing range:  [0, 1]
epoch 0,  	 loss 1.1447168588638306
epoch 100,  	 loss 7.383202382804654e-10
epoch 200,  	 loss 2.3120112768726386e-10
epoch 300,  	 loss 1.4006218407303095e-10
epoch 400,  	 loss 9.936229616869241e-11
epoch 500,  	 loss 8.541670842854288e-11
epoch 600,  	 loss 8.067976148273814e-11
epoch 700,  	 loss 4.945377440890297e-11
epoch 800,  	 loss 3.607662432680847e-11
epoch 900,  	 loss 4.3352581940991897e-11
epoch 1000,  	 loss 2.6981675574355712e-11
epoch 1100,  	 loss 3.607662432680847e-11
epoch 1200,  	 loss 5.3091753215994686e-11
epoch 1300,  	 loss 4.2822041051993054e-11
epoch 1400,  	 loss 1.8796223258399358e-11
epoch 1500,  	 loss 5.203067490744395e-11
epoch 1600,  	 loss 3.490185918053612e-11
epoch 1700,  	 loss 3.13017771425006e-11
epoch 1800,  	 loss 1.48929757415317e-11
epoch 1900,  	 loss 1.8834120027455548e-11
epoch 0,  	 loss 103.58143615722656
epoch 100,  	 loss 2.3923983573913574
epoch 200,  	 loss 2.39180588722229
epoch 300,  	 loss 2.287548303604126

epoch 400,  	 loss 5.356544721668577e-11
epoch 500,  	 loss 2.999437850870201e-11
epoch 600,  	 loss 2.4224769759606524e-11
epoch 700,  	 loss 1.1008675741830398e-11
epoch 800,  	 loss 1.688249540165998e-11
epoch 900,  	 loss 1.8199367360360874e-11
epoch 1000,  	 loss 1.2022383089060895e-11
epoch 1100,  	 loss 1.251502634164181e-11
epoch 1200,  	 loss 1.550878002409828e-11
epoch 1300,  	 loss 8.545460346287559e-12
epoch 1400,  	 loss 1.02507629121118e-11
epoch 1500,  	 loss 1.4504546001770091e-11
epoch 1600,  	 loss 7.531752999057062e-12
epoch 1700,  	 loss 1.0326554715500702e-11
epoch 1800,  	 loss 9.379164112033322e-12
epoch 1900,  	 loss 4.149569576838985e-12
epoch 0,  	 loss 57.633888244628906
epoch 100,  	 loss 0.1943642795085907
epoch 200,  	 loss 0.1949814110994339
epoch 300,  	 loss 0.19138318300247192
epoch 400,  	 loss 0.18934206664562225
epoch 500,  	 loss 0.18777696788311005
epoch 600,  	 loss 0.18636342883110046
epoch 700,  	 loss 0.18512015044689178
epoch 800,  	 loss 0.1

epoch 800,  	 loss 6.702401638031006
epoch 900,  	 loss 6.70673131942749
epoch 1000,  	 loss 6.709596633911133
epoch 1100,  	 loss 6.710992813110352
epoch 1200,  	 loss 6.710563659667969
epoch 1300,  	 loss 6.709221363067627
epoch 1400,  	 loss 6.707681655883789
epoch 1500,  	 loss 6.706239223480225
epoch 1600,  	 loss 6.705009460449219
epoch 1700,  	 loss 6.7039313316345215
epoch 1800,  	 loss 6.702988624572754
epoch 1900,  	 loss 6.70228910446167
epoch 0,  	 loss 31.823637008666992
epoch 100,  	 loss 0.3324268162250519
epoch 200,  	 loss 0.2981541156768799
epoch 300,  	 loss 0.3590238094329834
epoch 400,  	 loss 0.3431975841522217
epoch 500,  	 loss 0.36042532324790955
epoch 600,  	 loss 0.36326462030410767
epoch 700,  	 loss 0.3631439805030823
epoch 800,  	 loss 0.3634462058544159
epoch 900,  	 loss 0.3662571907043457
epoch 1000,  	 loss 0.37223803997039795
epoch 1100,  	 loss 0.3741700053215027
epoch 1200,  	 loss 0.37460681796073914
epoch 1300,  	 loss 0.37502509355545044
epoch 14

In [7]:
np.savetxt("train_temperature.csv", np.mean(train_loss,axis=1), delimiter=',', fmt='%2.2f')
np.savetxt("test_temperature.csv", np.mean(test_loss,axis=1), delimiter=',', fmt='%2.2f')

In [6]:

np.mean(train_loss)
train_loss
for i in range(len(train_loss)):
    print(np.mean(train_loss[i]))

6.928090936337498e+17
nan
nan
nan
3.1619450646450665e+30
nan


In [7]:
for i in range(len(test_loss)):
    print(np.mean(test_loss[i]))

7.700181607818198e+16
nan
nan
nan
nan
nan


In [28]:
train_loss

array([[6.82913726e-11, 2.14984798e+00, 6.87983837e-11, 4.16536446e+01,
        5.86825371e+00],
       [7.04544601e+01, 3.17643238e-11, 3.85512629e-11, 9.79576975e-11,
        6.95157473e-11],
       [5.35277309e-11, 2.04450071e-01, 3.26391983e+01, 4.32088890e+01,
        1.05685558e-10],
       [3.89962576e-11, 6.36379433e+00, 2.31421566e-11, 3.70921269e-02,
        9.11201392e-11],
       [2.23760281e+01, 1.91656876e+01, 4.82981467e-11, 1.40770630e+02,
        5.58206467e-11],
       [5.58511536e-11, 3.77266829e-11, 4.83802096e-11, 9.59608257e-01,
        5.67008543e+00]])

In [29]:
test_loss

array([[7.99784891e-11, 2.77612400e+00, 8.50187698e-11, 4.69741631e+01,
        5.44384813e+00],
       [1.90444609e+04, 4.19184446e-01, 2.08014526e+17,            inf,
        4.74704408e+25],
       [           inf,            inf, 1.84690675e+06, 1.82400888e+06,
                   inf],
       [2.20811125e+06, 5.56982361e+02, 1.11066240e+09, 4.62045435e+14,
                   inf],
       [5.25089922e+04,            inf,            inf, 1.05991586e+05,
                   nan],
       [           inf,            inf,            inf,            inf,
                   inf]])

In [30]:
model.G

Parameter containing:
tensor([[ 16.4868, -27.8944]], requires_grad=True)

In [31]:
torch.sigmoid(model.G)

tensor([[1.0000e+00, 7.6842e-13]], grad_fn=<SigmoidBackward>)