In [1]:
import torch


# Generate dataset
def generate_data(num_samples=1000):
    a = torch.rand(num_samples, 1) * 10
    b = torch.rand(num_samples, 1) * 10

    add = a + b
    sub = a - b
    mul = a * b
    div = a / b

    inputs = torch.cat([a, b], dim=1)
    outputs = torch.cat([add, sub, mul, div], dim=1)

    return inputs, outputs


X, y = generate_data()

In [5]:
import torch.nn as nn
import torch.nn.functional as F


class NALU(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(NALU, self).__init__()

        self.G = nn.Parameter(torch.Tensor(input_dim, output_dim))
        self.W = nn.Parameter(torch.Tensor(input_dim, output_dim))
        self.M_hat = nn.Parameter(torch.Tensor(input_dim, output_dim))
        self.N_hat = nn.Parameter(torch.Tensor(input_dim, output_dim))

        self.register_parameter("G", self.G)
        self.register_parameter("W", self.W)
        self.register_parameter("M_hat", self.M_hat)
        self.register_parameter("N_hat", self.N_hat)
        self.reset_parameters()

    def reset_parameters(self):
        nn.init.xavier_uniform_(self.W)
        nn.init.xavier_uniform_(self.M_hat)
        nn.init.xavier_uniform_(self.N_hat)
        nn.init.zeros_(self.G)

    # def forward(self, input):
    #     W = torch.tanh(self.W) * torch.sigmoid(self.M_hat)
    #     G = torch.sigmoid(F.linear(input, self.G))
    #     m = torch.exp(F.linear(torch.log(torch.abs(input) + 1e-10), W))
    #     a = F.linear(input, W)
    #     y = G * a + (1 - G) * m
    #     return y
    def forward(self, input):
        W = torch.tanh(self.W) * torch.sigmoid(self.M_hat)
        G = torch.sigmoid(input @ self.G)  # Using @ for matrix multiplication
        m = torch.exp(input @ W)
        a = input @ W
        y = G * a + (1 - G) * m
        return y

In [13]:
class SimpleNALUNetwork(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(SimpleNALUNetwork, self).__init__()
        self.nalu1 = NALU(input_dim, hidden_dim)
        self.nalu2 = NALU(hidden_dim, output_dim)

    def forward(self, x):
        out = self.nalu1(x)
        out = self.nalu2(out)
        return out


# model = SimpleNALUNetwork(2, 10, 4)
model = SimpleNALUNetwork(2, 50, 4)

In [14]:
train_X.shape

torch.Size([800, 2])

In [36]:
optimizer = torch.optim.RMSprop(model.parameters(), lr=0.0001)
loss_func = nn.MSELoss()

# Split data into training and testing
train_size = int(0.8 * len(X))
test_size = len(X) - train_size

train_X, test_X = torch.split(X, [train_size, test_size])
train_y, test_y = torch.split(y, [train_size, test_size])

# Training loop
num_epochs = 10000
for epoch in range(num_epochs):
    outputs = model(train_X)
    loss = loss_func(outputs, train_y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 1000 == 0:
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}")

Epoch 1000/10000, Loss: 0.04463537782430649
Epoch 2000/10000, Loss: 0.04071627929806709
Epoch 3000/10000, Loss: 0.03880450129508972
Epoch 4000/10000, Loss: 0.036106228828430176
Epoch 5000/10000, Loss: 0.033632658421993256
Epoch 6000/10000, Loss: 0.03211185336112976
Epoch 7000/10000, Loss: 0.03128601238131523
Epoch 8000/10000, Loss: 0.029169518500566483
Epoch 9000/10000, Loss: 0.028140854090452194
Epoch 10000/10000, Loss: 0.028249789029359818


In [21]:
def evaluate_arbitrary_numbers(model, A, B):
    # Create input tensor from A and B
    input_tensor = torch.Tensor([[A, B]])

    # Calculate the expected outputs
    add = A + B
    sub = A - B
    mul = A * B
    div = A / B if B != 0 else float("inf")  # Handle division by zero

    true_output_tensor = torch.Tensor([[add, sub, mul, div]])

    # Get the model's predictions
    model.eval()
    with torch.no_grad():
        predicted_output = model(input_tensor)

    # Print the results
    print("Input Numbers: ")
    print("A: {:.2f}, B: {:.2f}".format(A, B))
    print("\nOperations:")

    operations = ["Addition", "Subtraction", "Multiplication", "Division"]
    for op, true_val, pred_val in zip(
        operations, true_output_tensor[0], predicted_output[0]
    ):
        print(f"{op}:\n\tExpected: {true_val:.2f}\n\tPredicted: {pred_val:.2f}\n")


# Example usage:
evaluate_arbitrary_numbers(model, 6.0, 3.0)

Input Numbers: 
A: 6.00, B: 3.00

Operations:
Addition:
	Expected: 9.00
	Predicted: 8.96

Subtraction:
	Expected: 3.00
	Predicted: 2.82

Multiplication:
	Expected: 18.00
	Predicted: 17.71

Division:
	Expected: 2.00
	Predicted: 2.33



In [25]:
import torch.nn.functional as F


class SimpleFeedForwardNN(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(SimpleFeedForwardNN, self).__init__()

        # Define the layers
        self.fc1 = torch.nn.Linear(input_dim, hidden_dim)
        self.fc2 = torch.nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))  # Using ReLU activation function for hidden layer
        x = self.fc2(x)
        return x


# Let's instantiate and train the model:

input_dim = 2
hidden_dim = 100  # You can adjust this value
output_dim = 4

# Instantiate the model
ffnn_model = SimpleFeedForwardNN(input_dim, hidden_dim, output_dim)

In [46]:
class EmbedFeedForwardNN(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, emb_dim=20):
        super(SimpleFeedForwardNN, self).__init__()
        emb = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, emb_dim),
        )

        # Define the layers
        self.fc1 = torch.nn.Linear(emb_dim, hidden_dim)
        self.fc2 = torch.nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))  # Using ReLU activation function for hidden layer
        x = self.fc2(x)
        return x


# Let's instantiate and train the model:

input_dim = 2
hidden_dim = 100  # You can adjust this value
output_dim = 4

# Instantiate the model
efnn_model = EmbedFeedForwardNN(input_dim, hidden_dim, output_dim)

TypeError: super(type, obj): obj must be an instance or subtype of type

In [26]:
# Loss and optimizer
criterion = torch.nn.MSELoss()
optimizer = torch.optim.RMSprop(ffnn_model.parameters(), lr=0.001)

# Training loop, just as before
num_epochs = 100000
for epoch in range(num_epochs):
    # Forward pass
    outputs = ffnn_model(X)
    loss = criterion(outputs, y)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10000 == 0:
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}")

Epoch 10000/100000, Loss: 4.013541221618652
Epoch 20000/100000, Loss: 0.25719088315963745
Epoch 30000/100000, Loss: 0.12887084484100342
Epoch 40000/100000, Loss: 0.08110211789608002
Epoch 50000/100000, Loss: 0.0392223596572876
Epoch 60000/100000, Loss: 0.05254456028342247
Epoch 70000/100000, Loss: 0.051144082099199295
Epoch 80000/100000, Loss: 0.1144065409898758
Epoch 90000/100000, Loss: 0.059087157249450684
Epoch 100000/100000, Loss: 0.03847454860806465


In [37]:
a, b = 200.0, 12.0
evaluate_arbitrary_numbers(ffnn_model, a, b)
evaluate_arbitrary_numbers(model, a, b)

Input Numbers: 
A: 200.00, B: 12.00

Operations:
Addition:
	Expected: 212.00
	Predicted: 204.94

Subtraction:
	Expected: 188.00
	Predicted: 182.11

Multiplication:
	Expected: 2400.00
	Predicted: 1180.43

Division:
	Expected: 16.67
	Predicted: 78.56

Input Numbers: 
A: 200.00, B: 12.00

Operations:
Addition:
	Expected: 212.00
	Predicted: inf

Subtraction:
	Expected: 188.00
	Predicted: inf

Multiplication:
	Expected: 2400.00
	Predicted: 0.00

Division:
	Expected: 16.67
	Predicted: 0.00



In [31]:
X.shape

torch.Size([1000, 2])

In [32]:
max(X[:, 0])

tensor(9.9979)

In [33]:
X

tensor([[3.2766, 4.5116],
        [9.7308, 1.5936],
        [5.1066, 3.4027],
        ...,
        [3.9201, 9.9845],
        [5.4350, 1.6716],
        [0.8284, 0.6871]])

In [40]:
seq = nn.Sequential(nn.Linear(2, 100), nn.ReLU(), nn.Linear(100, 2))

In [42]:
seq(X).shape

torch.Size([1000, 2])

In [44]:
X[0:5, :]

tensor([[3.2766, 4.5116],
        [9.7308, 1.5936],
        [5.1066, 3.4027],
        [9.5233, 7.5495],
        [7.3589, 1.2958]])

In [45]:
seq(X[0:5, :])

tensor([[-0.6209, -2.2437],
        [-2.0155, -2.2886],
        [-1.0121, -2.2064],
        [-1.7563, -4.4800],
        [-1.5151, -1.8035]], grad_fn=<AddmmBackward0>)