In [1]:
import torch
import numpy as np
from torch import optim
import torch.nn.functional as F

In [5]:
class ACT_reservoir:
    def __init__(self, input_dim, hidden_dim, output_dim, max_iter=20, learning_rate=0.01, ponder_cost=0.01):
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.max_iter = max_iter
        self.learning_rate = learning_rate
        self.ponder_cost = ponder_cost

        self.total_dim = 1 + input_dim + hidden_dim + output_dim + 1  # Including bias and halting node
        
        # Initializing the weight matrix with random values between -2 and 2 and setting requires_grad=True
        self.W = (torch.rand((self.total_dim, self.total_dim)) * 4 - 2).clone().detach().requires_grad_(True)
        
        # Index ranges for different parts of the state vector
        self.input_indices = range(1, input_dim + 1)
        self.output_indices = range(input_dim + hidden_dim + 1, input_dim + hidden_dim + output_dim + 1)
        self.halting_index = self.total_dim - 1  # Last element as the halting unit

        # Initializing Adam optimizer
        self.optimizer = optim.Adam([self.W], lr=learning_rate)
        
    def predict(self, xi):
        x = torch.zeros((self.total_dim, 1), requires_grad=True)
        x_new = x.clone()  # Creating a new tensor by cloning the original tensor
        x_new[0] = 1  # Bias unit
        x_new[self.input_indices] = torch.Tensor(xi).view((self.input_dim, 1))  # Input part
        self.x = x_new  # Replacing the old tensor with the new one

        t = 0
        while t < self.max_iter and self.x[self.halting_index].item() <= 1:
            self.x = self.W @ self.x #the core of the network
            #self.x=F.relu(self.x) #enable to add non-linearities
            self.x.retain_grad()  # Retain gradients at each step for BPTT
            t += 1
        return self.x[self.output_indices], t
    
    def train(self, xi, target):
        self.optimizer.zero_grad()  # Clear old gradients
        predicted_output, steps = self.predict(xi)
        error = predicted_output - torch.Tensor(target).view((-1, 1))

        # Calculating the MSE loss and adding the ponder cost
        loss = torch.mean(error ** 2) + self.ponder_cost * steps
        loss.backward()  # Compute the gradients
        
        # Update the weights
        self.optimizer.step()

        return loss.item()

    def summary(self):
        print("Total Parameters:", self.total_dim ** 2)
        print("Input Dimension:", self.input_dim)
        print("Hidden Dimension:", self.hidden_dim)
        print("Output Dimension:", self.output_dim)
        print("Max Iterations:", self.max_iter)

In [6]:
# Generating a dataset of integer products
inputs = []
targets = []
for i in range(1, 21):
    for j in range(1, 21):
        inputs.append([i, j])
        targets.append([i * j])

# Convert the dataset to numpy arrays
inputs = np.array(inputs)
targets = np.array(targets)


# Convert the dataset to PyTorch tensors
inputs = torch.Tensor(inputs)
targets = torch.Tensor(targets)

# Create a model instance
model = ACT_reservoir(2, 5, 1, learning_rate=0.0001, ponder_cost=0.05)

# Train the model
epochs = 1000
for epoch in range(epochs):
    total_loss = 0
    for xi, target in zip(inputs, targets):
        loss = model.train(xi.numpy(), target.numpy())
        total_loss += loss
    
    if epoch % 10 == 0:  # Print every 100 epochs
        print(f"Epoch {epoch}, Loss: {total_loss/len(inputs)}")

Epoch 0, Loss: 21879.714569593743
Epoch 10, Loss: 19922.51134843439
Epoch 20, Loss: 18366.42053228013
Epoch 30, Loss: 16899.67541829373
Epoch 40, Loss: 15523.320453680064
Epoch 50, Loss: 14236.006366672535
Epoch 60, Loss: 13036.220109713515
Epoch 70, Loss: 11922.23675569729
Epoch 80, Loss: 10892.17760000402
Epoch 90, Loss: 9943.987055785507
Epoch 100, Loss: 9075.547996951593
Epoch 110, Loss: 8284.077886840701
Epoch 120, Loss: 7566.939908095701
Epoch 130, Loss: 6921.105356479697
Epoch 140, Loss: 6343.268455157476
Epoch 150, Loss: 5829.9090175028705
Epoch 160, Loss: 5377.189117958471
Epoch 170, Loss: 4981.060699903779
Epoch 180, Loss: 4637.260159416636
Epoch 190, Loss: 4341.359433137421
Epoch 200, Loss: 4088.8223329504394
Epoch 210, Loss: 3875.1325167902
Epoch 220, Loss: 3695.799106949288
Epoch 230, Loss: 3546.5291930224093
Epoch 240, Loss: 3423.1991494665667
Epoch 250, Loss: 3322.019689152492
Epoch 260, Loss: 3239.5063349727634
Epoch 270, Loss: 3172.5470114914046
Epoch 280, Loss: 3118.4

In [18]:
sample_input = torch.Tensor([100000, 10])
predicted_output, steps = model.predict(sample_input.numpy())
predicted_product = predicted_output.item()
print(f"Predicted product of 15 and 5: {predicted_product}")
print(f"N_steps: {steps}")

Predicted product of 15 and 5: 561333.625
N_steps: 1
