In [7]:
import torch.nn.functional as F
import torch.nn as nn
from torch.nn.parameter import Parameter
import torch
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
#plt.style.use('seaborn')


In [102]:
def generate_synthetic_arithmetic_dataset(arithmetic_op, min_value, max_value, sample_size, set_size, boundaries = None):
    """
    generates a dataset of integers for the synthetics arithmetic task

    :param arithmetic_op: the type of operation to perform on the sum of the two sub sections can be either :
    ["add" , "subtract", "multiply", "divide", "root", "square"]
    :param min_value: the minimum possible value of the generated integers
    :param max_value: the maximum possible value of the generated integers
    :param sample_size: the number of integers per sample
    :param set_size: the number of samples in the dataset
    :param boundaries: [Optional] an iterable of 4 integer indices in the following format :
    [start of 1st section, end of 1st section, start of 2nd section, end of 2nd section]
    if None, the boundaries are randomly generated.
    :return: the training dataset input, the training true outputs, the boundaries of the sub sections used
    """
    scaled_input_values = np.random.uniform(min_value, max_value, (set_size, sample_size))

    if boundaries is None:
        boundaries = [np.random.randint(sample_size) for i in range(4)]
        boundaries[1] = np.random.randint(boundaries[0], sample_size)
        boundaries[3] = np.random.randint(boundaries[2], sample_size)
    else:
        if len(boundaries) != 4:
            raise ValueError("boundaries is expected to be a list of 4 elements but found {}".format(len(boundaries)))

    a = np.array([np.sum(sample[boundaries[0]:boundaries[1]]) for sample in scaled_input_values])
    b = np.array([np.sum(sample[boundaries[2]:boundaries[3]]) for sample in scaled_input_values])
    
    true_outputs = None
    if "add" in str.lower(arithmetic_op):
        true_outputs = a + b
    elif "sub" in str.lower(arithmetic_op):
        true_outputs = a - b
    elif "mult" in str.lower(arithmetic_op):
        true_outputs = a * b
    elif "div" in str.lower(arithmetic_op):
        true_outputs = a / b
    elif "square" == str.lower(arithmetic_op):
        true_outputs = a * a
    elif "root" in str.lower(arithmetic_op):
        true_outputs = np.sqrt(a)
    
    scaled_input_values = torch.tensor(scaled_input_values, dtype=torch.float32)
    true_outputs = torch.tensor(true_outputs, dtype=torch.float32).unsqueeze(1)
        
    return scaled_input_values, true_outputs, boundaries

In [27]:
def generate_recurrent_synthetic_arithmetic_dataset(arithmetic_op, min_value, max_value, sample_size, sequence_size, set_size, boundaries = None):
    """
    generates a dataset of integers for the synthetics arithmetic task

    :param arithmetic_op: the type of operation to perform on the sum of the two sub sections can be either :
    ["add" , "subtract", "multiply", "divide", "root", "square"]
    :param min_value: the minimum possible value of the generated integers
    :param max_value: the maximum possible value of the generated integers
    :param sample_size: the number of integers per sample
    :param set_size: the number of samples in the dataset
    :param boundaries: [Optional] an iterable of 4 integer indices in the following format :
    [start of 1st section, end of 1st section, start of 2nd section, end of 2nd section]
    if None, the boundaries are randomly generated.
    :return: the training dataset input, the training true outputs, the boundaries of the sub sections used
    """
    scaled_input_values = np.random.uniform(min_value, max_value, (set_size, sample_size, sequence_size))

    if boundaries is None:
        boundaries = [np.random.randint(sample_size) for i in range(4)]
        boundaries[1] = np.random.randint(boundaries[0], sample_size)
        boundaries[3] = np.random.randint(boundaries[2], sample_size)
    else:
        if len(boundaries) != 4:
            raise ValueError("boundaries is expected to be a list of 4 elements but found {}".format(len(boundaries)))

    a = np.array([np.sum(sample[boundaries[0]:boundaries[1],:]) for sample in scaled_input_values])
    b = np.array([np.sum(sample[boundaries[2]:boundaries[3],:]) for sample in scaled_input_values])
    
    true_outputs = None
    if "add" in str.lower(arithmetic_op):
        true_outputs = a + b
    elif "sub" in str.lower(arithmetic_op):
        true_outputs = a - b
    elif "mult" in str.lower(arithmetic_op):
        true_outputs = a * b
    elif "div" in str.lower(arithmetic_op):
        true_outputs = a / b
    elif "square" == str.lower(arithmetic_op):
        true_outputs = a * a
    elif "root" in str.lower(arithmetic_op):
        true_outputs = np.sqrt(a)
    
    scaled_input_values = torch.tensor(scaled_input_values, dtype=torch.float32)
    true_outputs = torch.tensor(true_outputs, dtype=torch.float32).unsqueeze(1)
        
    return scaled_input_values, true_outputs, boundaries

In [28]:
arithmetic_op  ='add'
min_value = 0
max_value = 2
sample_size = 50
sequence_size = 10
set_size = 10

# X = (set_size, sample_size, sequence_size)

# set_size: how many input data
# (sample_size, sequence_size) the size of each input data

X, y, boundaries = generate_recurrent_synthetic_arithmetic_dataset(
                                                arithmetic_op, min_value,
                                                max_value, sample_size,
                                                sequence_size, set_size)


np.shape(X),np.shape(y), boundaries



(torch.Size([10, 50, 10]), torch.Size([10, 1]), [42, 44, 21, 22])

In [103]:
class NeuralAccumulatorCell(nn.Module):
    """A Neural Accumulator (NAC) cell [1].

    Attributes:
        in_dim: size of the input sample.
        out_dim: size of the output sample.

    Sources:
        [1]: https://arxiv.org/abs/1808.00508
    """
    def __init__(self, in_dim, out_dim):
        super().__init__()
        self.in_dim = in_dim
        self.out_dim = out_dim

        self.W_hat = Parameter(torch.Tensor(out_dim, in_dim))
        self.M_hat = Parameter(torch.Tensor(out_dim, in_dim))

        self.register_parameter('W_hat', self.W_hat)
        self.register_parameter('M_hat', self.M_hat)
        self.register_parameter('bias', None)

        self._reset_params()

    def _reset_params(self):
        init.kaiming_uniform_(self.W_hat)
        init.kaiming_uniform_(self.M_hat)

    def forward(self, input):
        W = torch.tanh(self.W_hat) * torch.sigmoid(self.M_hat)
        return F.linear(input, W, self.bias)

    def extra_repr(self):
        return 'in_dim={}, out_dim={}'.format(
            self.in_dim, self.out_dim
        )


class NAC(nn.Module):
    """A stack of NAC layers.

    Attributes:
        num_layers: the number of NAC layers.
        in_dim: the size of the input sample.
        hidden_dim: the size of the hidden layers.
        out_dim: the size of the output.
    """
    def __init__(self, num_layers, in_dim, hidden_dim, out_dim):
        super().__init__()
        self.num_layers = num_layers
        self.in_dim = in_dim
        self.hidden_dim = hidden_dim
        self.out_dim = out_dim

        layers = []
        for i in range(num_layers):
            layers.append(
                NeuralAccumulatorCell(
                    hidden_dim if i > 0 else in_dim,
                    hidden_dim if i < num_layers - 1 else out_dim,
                )
            )
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        out = self.model(x)
        return out



In [104]:
class NeuralArithmeticLogicUnitCell(nn.Module):
    """A Neural Arithmetic Logic Unit (NALU) cell [1].

    Attributes:
        in_dim: size of the input sample.
        out_dim: size of the output sample.

    Sources:
        [1]: https://arxiv.org/abs/1808.00508
    """
    def __init__(self, in_dim, out_dim):
        super(NeuralArithmeticLogicUnitCell).__init__()
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.eps = 1e-10

        self.G = Parameter(torch.Tensor(out_dim, in_dim))
        self.nac = NeuralAccumulatorCell(in_dim, out_dim)
        self.register_parameter('bias', None)

        init.kaiming_uniform_(self.G,a=np.sqrt(5))

    def forward(self, input):
        a = self.nac(input)
        g = torch.sigmoid(F.linear(input, self.G, self.bias))
        add_sub = g * a
        log_input = torch.log(torch.abs(input) + self.eps)
        m = torch.exp(self.nac(log_input))
        mul_div = (1 - g) * m
        y = add_sub + mul_div
        return y

    def extra_repr(self):
        return 'in_dim={}, out_dim={}'.format(
            self.in_dim, self.out_dim
        )


class NALU(nn.Module):
    """A stack of NAC layers.
    Attributes:
        num_layers: the number of NAC layers.
        in_dim: the size of the input sample.
        hidden_dim: the size of the hidden layers.
        out_dim: the size of the output.
    """
    def __init__(self, num_layers, in_dim, hidden_dim, out_dim):
        super(NALU).__init__()
        self.num_layers = num_layers
        self.in_dim = in_dim
        self.hidden_dim = hidden_dim
        self.out_dim = out_dim

        layers = []
        for i in range(num_layers):
            layers.append(
                NeuralArithmeticLogicUnitCell(
                    hidden_dim if i > 0 else in_dim,
                    hidden_dim if i < num_layers - 1 else out_dim,
                )
            )
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        out = self.model(x)
        return out

In [105]:
class RNALU(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(RNALU, self).__init__()
        self.NALU_1 = NeuralArithmeticLogicUnitCell(input_dim, output_dim)
        self.NALU_2 = NeuralArithmeticLogicUnitCell(output_dim*2, output_dim)
        
    def forward(self, y0, x1):
        self.y1 = self.NALU_2(torch.cat([y0, self.NALU_1(x1)], dim=1))
        return y0, self.y1   

In [106]:
class RNN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(RNALU, self).__init__()
        self.W = Parameter(init.kaiming_uniform_(torch.Tensor(output_dim, output_dim)))
        self.V = Parameter(init.kaiming_uniform_(torch.Tensor(output_dim, output_dim)))
        self.register_parameter('bias', None)
        
    def forward(self, x0, x1):
        self.y0 = nn.tanh(F.linear(x0, self.W, self.bias))
        self.y1 = nn.tanh(F.linear(self.y0, self.V) + F.linear(x1, self.W, self.bias) )
        return self.y0, self.y1                       

In [107]:
class NALUNet(nn.Module):
    def __init__(self, hidden_dim):
        super(NALUNet, self).__init__()
        self.net = RNALU(2, hidden_dim)
        self.lin = nn.Linear(hidden_dim, 1)
        self.hidden_dim = hidden_dim
        
    def forward(self, x):
        self.prev = torch.zeros([x.size(0), self.hidden_dim], dtype=torch.float)              
        for i in range(x.size(1)):
            self.y0, self.y1 = self.net(self.prev ,x[:,i,:])
            self.prev = self.y1
        out = self.lin(self.y1)
        return out

In [15]:
class DataGenerator():
    
    ARITHMETIC_FUNCTIONS = {
        'add': lambda x, y: x + y,
        'sub': lambda x, y: x - y,
        'mul': lambda x, y: x * y,
        'div': lambda x, y: x / y,
        'squared': lambda x, y: torch.pow(x, 2),
        'root': lambda x, y: torch.sqrt(x),
    }

    def generate_data_recurrent(num_train, num_test, time_series_len, fn, support):
        a = torch.FloatTensor(num_train + num_test, time_series_len).uniform_(*support).unsqueeze_(1)
        b = torch.FloatTensor(num_train + num_test, time_series_len).uniform_(*support).unsqueeze_(1)

        X = torch.FloatTensor(num_train + num_test, time_series_len, 2)
        y = torch.FloatTensor(num_train + num_test, 1)
        
        for i in range(num_train + num_test):
            X[i,:,0] = a[i,:]
            X[i,:,1] = b[i,:]
            y[i,0] = fn(torch.sum(a[i,:]), torch.sum(b[i,:]))
            
        X = torch.FloatTensor(X)
        y = torch.FloatTensor(y).unsqueeze_(1)
        indices = list(range(num_train + num_test))
        np.random.shuffle(indices)
        X_train, y_train = X[indices[num_test:]], y[indices[num_test:]]
        X_test, y_test = X[indices[:num_test]], y[indices[:num_test]]
        return X_train, y_train, X_test, y_test




In [14]:
fn = DataGenerator.ARITHMETIC_FUNCTIONS['add']
RANGE_Inter = [1, 10]
RANGE_Extra = [1, 100]

#X_train, y_train, X_valid, y_valid = dg.generate_data(num_train=500, num_test=50, dim=100, num_sum=5, fn=fn, support=RANGE_Inter)
#_, _, X_test, y_test = dg.generate_data(num_train=0, num_test=50, dim=100, num_sum=5, fn=fn, support=RANGE_Extra)

X_train, y_train, X_valid, y_valid = DataGenerator.generate_data_recurrent(num_train=500, num_test=50, time_series_len = 10, fn=fn, support=RANGE_Inter)
_, _, X_test, y_test = DataGenerator.generate_data_recurrent(num_train=0, num_test=50, time_series_len = 10, fn=fn, support=RANGE_Extra)


np.shape(X_train)

torch.Size([550, 1, 10])
torch.Size([50, 1, 10])


torch.Size([500, 10, 2])

In [146]:
fn = DataGenerator.ARITHMETIC_FUNCTIONS['add']
RANGE_Inter = [1, 10]
RANGE_Extra = [1, 100]

#X_train, y_train, X_valid, y_valid = dg.generate_data(num_train=500, num_test=50, dim=100, num_sum=5, fn=fn, support=RANGE_Inter)
#_, _, X_test, y_test = dg.generate_data(num_train=0, num_test=50, dim=100, num_sum=5, fn=fn, support=RANGE_Extra)

tensor([[[5.9296, 4.6421],
         [8.0179, 1.0727]],

        [[1.1866, 4.2403],
         [7.5139, 6.7523]]])
tensor([[[19.6621]],

        [[19.6930]]])
tensor(19.6930)


In [110]:
# Defining the network to train on

## Has to be NAC/NALU
net = NALUNet(1)

#Defining criterion
criterion = nn.MSELoss()

# Defining optimizers
optimizer = optim.RMSprop(net_1.parameters(), lr = 1e-2)

AttributeError: cannot assign parameters before Module.__init__() call

In [None]:
# setting hyperparameters and gettings epoch sizes
num_epochs = int(1e4)
num_samples_train = X_train.shape[0]
num_samples_valid = X_valid.shape[0]
num_samples_test = X_test.shape[0]

# setting up lists for handling loss/accuracy
train_loss = [[], []]
valid_loss = [[], []]
test_loss = [[], []]

NAC_W1_a, NAC_W1_b = [], []
NAC_W2_a, NAC_W2_b = [], []

NALU_g1, NALU_g2 = [], []
NALU_g1_var, NALU_g2_var = [], []

Reg_W1_a, Reg_W1_b = [], []
Reg_W2_a, Reg_W2_b = [], []

get_slice = lambda i, size: range(i * size, (i + 1) * size)

for epoch in range(num_epochs):
    # Forward -> Backprob -> Update params
    
    ## Training Network 1
    net_1.train()
    output_1= net_1(X_train)
    loss_1 = criterion(output_1, y_train[:,:,0])
    optimizer_1.zero_grad()
    loss_1.backward()
    optimizer_1.step()
    
    ## Training Network 2
    net_2.train()
    output_2 = net_2(X_train)
    loss_2 = criterion(output_2[:,-1,:], y_train[:,:,0])
    optimizer_2.zero_grad()
    loss_2.backward()
    optimizer_2.step()
 
    # Evaluating
    
    net_1.eval()
    net_2.eval()
    
    ### Evaluate training

    train_preds_1 = net_1(X_train).data.numpy()
    train_preds_2 = net_2(X_train)[:,-1,:].data.numpy()
    train_targs = y_train.data.numpy()[:,:,0]
    
    ### Evaluate validation

    val_preds_1 = net_1(X_valid).data.numpy()
    val_preds_2 = net_2(X_valid)[:,-1,:].data.numpy()
    val_targs = y_valid.data.numpy()[:,:,0]
        
#     NALU_g1.append(np.mean(net_1.g1.data.numpy(),axis=0).tolist())
#     NALU_g2.append(np.mean(net_1.g2.data.numpy(),axis=0).tolist())
    
#     NALU_g1_var.append(np.std(net_1.g1.data.numpy(),axis=0).tolist())
#     NALU_g2_var.append(np.std(net_1.g2.data.numpy(),axis=0).tolist())  
    
    ### Evaluate test (outside of range)
   
    test_preds_1 = net_1(X_test).data.numpy()
    test_preds_2 = net_2(X_test)[:,-1,:].data.numpy()
    test_targs = y_test.data.numpy()[:,:,0]

    train_loss_cur_1 = np.mean(np.abs(train_preds_1 - train_targs))
    train_loss_cur_2 = np.mean(np.abs(train_preds_2 - train_targs))
    valid_loss_cur_1 = np.mean(np.abs(val_preds_1 - val_targs))
    valid_loss_cur_2 = np.mean(np.abs(val_preds_2 - val_targs))
    test_loss_cur_1 = np.mean(np.abs(test_preds_1 - test_targs))
    test_loss_cur_2 = np.mean(np.abs(test_preds_2 - test_targs))
    
    train_loss[0].append(train_loss_cur_1)
    train_loss[1].append(train_loss_cur_2)
    valid_loss[0].append(valid_loss_cur_1)
    valid_loss[1].append(valid_loss_cur_2)
    test_loss[0].append(test_loss_cur_1)
    test_loss[1].append(test_loss_cur_2)
    
    #Getting the weights
    #NAC_W1_a.append(net_1.W1[:,0].data.numpy().tolist())
    #NAC_W1_b.append(net_1.W1[:,1].data.numpy().tolist())
    #NAC_W2_a.append(net_1.W2[:,0].data.numpy().tolist())
    #NAC_W2_b.append(net_1.W2[:,1].data.numpy().tolist())
    
    #Reg_W1_a.append(net_2.W1[:,0].data.numpy().tolist())
    #Reg_W1_b.append(net_2.W1[:,1].data.numpy().tolist())
    #Reg_W2_a.append(net_2.W2[:,0].data.numpy().tolist())
    #Reg_W2_b.append(net_2.W2[:,1].data.numpy().tolist())
    
    if epoch % (num_epochs/10) == 0:
        print("Epoch %2i : Train losses (%f, %f), Test losses (%f, %f)" % (
                epoch+1, train_loss_cur_1, train_loss_cur_2, test_loss_cur_1, test_loss_cur_2))

In [None]:
# Plotting Training/Validation/Test Losses
epoch = np.arange(len(train_loss[0]))
f, (ax1, ax2) = plt.subplots(1,2, sharex = True, sharey = True, figsize=(15,5))

ax1.plot(epoch, train_loss[0], 'r', epoch, valid_loss[0], 'b')
ax2.plot(epoch, train_loss[1], 'r', epoch, valid_loss[1], 'b')

ax1.legend(['Train Loss','Validation Loss'])
ax2.legend(['Train Loss','Validation Loss'])

ax1.set_title('NALU Net'), ax2.set_title('Reg Net')
ax1.set_xlabel('Updates'), ax2.set_xlabel('Updates'), ax1.set_ylabel('Loss')

plt.figure(figsize=(15,5))

plt.plot(epoch, test_loss[0], 'r', epoch, test_loss[1], 'b')
plt.title('Extrapolation Loss')
plt.legend(['NALU Test Loss','Reg Test Loss'])
plt.xlabel('Updates'), plt.ylabel('Loss')

plt.show()

In [None]:
'''
FEATURES_NUM = 100
epochs = 100
batch_size = 10
learning_rate = 0.1
operator = "add"

in_dim = FEATURES_NUM
hidden_dim = 2
out_dim = 1
num_layers = 2

dim = in_dim # dimensition for generating data

model = NALU(num_layers, in_dim, hidden_dim, out_dim)
print(model)

epochs = 2000
#lrs = [0.001, 0.005, 0.001 ]
#bss = [10,20,50]
lrs = [0.05]
bss = [1]
i = 0
for learning_rate in lrs:
    for batch_size in bss:
        print("lrs_" + str(learning_rate) + "_bss_" + str(batch_size))
        #print(i)
        #i += 1
        #filename = "lrs_" + str(learning_rate) + "_bss_" + str(batch_size) + ".txt"
        #file = open(filename,"a")
        #boundaries = [0,10,5,14]
        X_train, y_train, boundaries = generate_synthetic_arithmetic_dataset(operator, 100, 110, FEATURES_NUM, 1000)
        X_test, y_test, _ = generate_synthetic_arithmetic_dataset(operator, 0, 10, FEATURES_NUM, 1000, boundaries)
        optimizer = torch.optim.RMSprop(model.parameters(),lr=learning_rate)
        print(boundaries)
        for epoch in range(epochs):

            for batch in range(len(X_train) // batch_size):
                model.train()
                optimizer.zero_grad()

                X_batch_train = X_train[batch:(batch+batch_size),:]
                y_batch_train = y_train[batch:(batch+batch_size),:]

                out = model(X_batch_train)
                if np.sum(np.isnan(out.detach().numpy())) > 0:
                    print(epoch,batch)
                loss = F.mse_loss(out, y_batch_train)

                loss.backward()
                optimizer.step()


            if epoch % 10 == 0:
                model.eval()

                X_batch_test = X_test[batch:(batch+batch_size),:]
                y_batch_test = y_test[batch:(batch+batch_size),:]

                output_test = model(X_batch_test)

                acc = np.sum(np.isclose(output_test.detach().numpy(), y_batch_test.detach().numpy(), atol=.1, rtol=0)) / len(y_batch_test)
                print("epoch \t", epoch, "\t loss \t",  loss.detach().numpy(), "\t acc \t", acc)
                #file.write("epoch \t" + str(epoch) + "\t loss \t" + str(loss.detach().numpy()) + "\t acc \t" + str(acc)+ "\n")
        
        
        
        

'''