
# Santiago Valencia García - A00395902

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define constants
INPUT_SIZE = 1     # Size of input data (1 for binary input)
MEMORY_SIZE = 128  # Number of memory locations
MEMORY_DIM = 20    # Dimensionality of each memory slot
CONTROLLER_HIDDEN_SIZE = 100  # Size of controller hidden layer
SEQ_LEN = 10       # Length of the input sequence


### Command Unit

The controller serves as the primary processing unit for the NTM, producing signals that manage memory operations. Here, it consists of an LSTM layer followed by a fully connected (fc) layer. The LSTM layer interprets patterns in the input sequence, while the fc layer condenses the output for efficient use by the rest of the NTM. Through control vectors created from current inputs and previous memory outputs, the controller directs the NTM’s actions at each time step.

In [None]:
# Controller network
class Controller(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Controller, self).__init__()
        self.rnn = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h, _ = self.rnn(x)
        return self.fc(h[:, -1, :])

### Memory

The memory in an NTM provides extended data retention, supporting the model’s capacity to store information independently from the controller’s short-term memory. Structured as a `memory_size x memory_dim` tensor, each row represents an individual memory cell. The `Memory` class offers read and write methods: reading retrieves data through address-based weights, while writing uses erase and add vectors to update specific cells. This structure is differentiable, enabling the NTM to learn effective memory access and update processes throughout training.

In [None]:
# Memory module
class Memory(nn.Module):
    def __init__(self, memory_size, memory_dim):
        super(Memory, self).__init__()
        self.memory = torch.randn(memory_size, memory_dim) * 0.01

    def read(self, address):
        return torch.matmul(address.unsqueeze(0), self.memory).squeeze(0)

    def write(self, address, erase_vector, add_vector):
        address = address.view(-1, 1)
        erase_matrix = address * erase_vector.unsqueeze(0)
        add_matrix = address * add_vector.unsqueeze(0)
        self.memory = self.memory * (1 - erase_matrix) + add_matrix

## Memory Access Unit

The Memory Access Unit bridges the Controller and the Memory, guiding which memory locations to access and modify. It creates an address vector to target specific memory locations, alongside erase and add vectors that control how those locations are altered. Here, linear layers convert the control vector into these crucial elements—address, erase, and add. In its `forward` function, the Memory Access Unit applies the memory updates and extracts the relevant data from memory, empowering the NTM to execute advanced memory operations.

In [None]:
# Read-Write head
class Head(nn.Module):
    def __init__(self, memory_size, memory_dim):
        super(Head, self).__init__()
        self.memory_size = memory_size
        self.memory_dim = memory_dim
        self.addressing = nn.Linear(CONTROLLER_HIDDEN_SIZE, memory_size)
        self.erase = nn.Linear(CONTROLLER_HIDDEN_SIZE, memory_dim)
        self.add = nn.Linear(CONTROLLER_HIDDEN_SIZE, memory_dim)

    def forward(self, control_vector, memory):
        address_weights = torch.softmax(self.addressing(control_vector), dim=-1)
        erase_vector = torch.sigmoid(self.erase(control_vector))
        add_vector = torch.tanh(self.add(control_vector))
        memory.write(address_weights, erase_vector, add_vector)
        read_data = memory.read(address_weights)
        return read_data

### Final Output Module

The Final Output Module generates the NTM’s ultimate output by combining processed data from the Controller and the Memory’s retrieved information. This layer, represented by a linear transformation within the `NTM` class, accepts the merged outputs of the Controller and Memory read vectors. It then produces predictions aligned with the target sequence for tasks like copying. The Final Output Module transforms the Controller's refined processing and Memory’s contextual insights into specific, task-related outputs.

In [None]:
# NTM Model Implementation
class NeuralTuringMachine(nn.Module):
    def __init__(self, input_dim, mem_size, mem_feature_dim, hidden_units):
        super(NeuralTuringMachine, self).__init__()
        # Initialize the components of the NTM: controller, memory, head, and output layer
        self.controller_net = Controller(input_dim + mem_feature_dim, hidden_units, hidden_units)
        self.memory_module = Memory(mem_size, mem_feature_dim)
        self.memory_head = Head(mem_size, mem_feature_dim)
        self.output_layer = nn.Linear(hidden_units + mem_feature_dim, input_dim)

    def forward(self, inputs):
        batch, seq_length, _ = inputs.size()
        memory_state = torch.zeros(batch, MEM_FEATURE_DIM).detach()  # Detach to prevent gradient flow
        results = []

        # Process each time step in the sequence
        for timestep in range(seq_length):
            controller_input = torch.cat([inputs[:, timestep, :], memory_state], dim=-1)
            control_vector = self.controller_net(controller_input.unsqueeze(1))
            memory_state = self.memory_head(control_vector, self.memory_module)
            memory_state = memory_state.detach()  # Detach to avoid retaining unnecessary computation graph
            result = self.output_layer(torch.cat([control_vector, memory_state], dim=-1))
            results.append(result)

        # Stack the results for each timestep into a single tensor
        return torch.stack(results, dim=1)


## Training Process

The train_neural_turing_machine() function manages the training of the NTM, running through multiple epochs to learn the copy task. It initializes the model, uses MSE loss, and applies the Adam optimizer. For each epoch, the model processes input sequences, computes the loss, and updates the weights using backpropagation. Progress is logged every 10 epochs to track learning.

In [None]:
# Generate input and target sequences for the copying task
def generate_copy_task_data(seq_len, batch_size=1):
    # Create a random binary sequence
    input_seq = torch.randint(0, 2, (batch_size, seq_len, INPUT_SIZE)).float()
    # The target is the same as the input
    target_seq = input_seq.clone()
    return input_seq, target_seq

In [None]:
# Training loop for Neural Turing Machine
def train_ntm():
    # Initialize NTM model, loss function and optimizer
    ntm = NTM(INPUT_SIZE, MEMORY_SIZE, MEMORY_DIM, CONTROLLER_HIDDEN_SIZE)
    criterion = nn.MSELoss()  # Mean Squared Error loss
    optimizer = optim.Adam(ntm.parameters(), lr=0.001)  # Adam optimizer

    epochs = 100  # Total number of training epochs
    for epoch in range(epochs):
        # Generate input and target sequences for the current task
        input_seq, target_seq = generate_copy_task_data(SEQ_LEN)

        # Reset gradients before each epoch
        optimizer.zero_grad()

        # Perform a forward pass through the model
        output_seq = ntm(input_seq)

        # Calculate the loss for the current prediction
        loss = criterion(output_seq, target_seq)

        # Backpropagate the error to adjust model parameters
        loss.backward()

        # Update model parameters using the optimizer
        optimizer.step()

        # Print training progress every 10 epochs
        if epoch % 10 == 0:
            print(f'Epoch {epoch}, Loss: {loss.item()}')
            print("Input Sequence:")
            print(input_seq.squeeze().numpy())
            print("Expected Output Sequence:")
            print(target_seq.squeeze().numpy())
            print("Model Output Sequence:")
            print(output_seq.detach().squeeze().numpy())
            print("=" * 50)

    print("Training completed.")
    return ntm, input_seq, target_seq


# Execute the training process and evaluate the model's performance
ntm, input_seq, target_seq = train_ntm()

# Get the model's output for the final input sequence
output_seq = ntm(input_seq).detach()

# Function to display the final results
def display_final_results(input_seq, target_seq, output_seq):
    # Print the final input, expected output, and model output
    print("\nFinal Input Sequence:")
    print(input_seq.squeeze().numpy())
    print("\nFinal Expected Output Sequence:")
    print(target_seq.squeeze().numpy())
    print("\nFinal Model Output Sequence:")
    print(output_seq.detach().squeeze().numpy())

    # Print the rounded and absolute values of the output for clarity
    print(torch.absolute(torch.round(output_seq)).detach().squeeze().numpy())

# Display the results
display_final_results(input_seq, target_seq, output_seq)


Epoch 0, Loss: 0.42799314856529236
Input sequence:
[0. 1. 1. 0. 0. 1. 1. 0. 0. 0.]
Expected Output sequence:
[0. 1. 1. 0. 0. 1. 1. 0. 0. 0.]
NTM Output sequence:
[-0.03352125 -0.03415407 -0.03394266 -0.03287514 -0.03267232 -0.0333279
 -0.03311973 -0.03205422 -0.03185463 -0.0316558 ]
Epoch 10, Loss: 0.3283453583717346
Input sequence:
[0. 1. 1. 1. 0. 0. 0. 0. 1. 1.]
Expected Output sequence:
[0. 1. 1. 1. 0. 0. 0. 0. 1. 1.]
NTM Output sequence:
[0.1534404  0.2049212  0.20513059 0.20533809 0.16183257 0.16204457
 0.1622567  0.16246797 0.20639877 0.20660222]
Epoch 20, Loss: 0.20362956821918488
Input sequence:
[1. 0. 1. 0. 1. 0. 0. 1. 1. 1.]
Expected Output sequence:
[1. 0. 1. 0. 1. 0. 0. 1. 1. 1.]
NTM Output sequence:
[0.48428524 0.41273454 0.53281295 0.4132245  0.5333064  0.41370544
 0.41394863 0.5340569  0.53429496 0.53451824]
Epoch 30, Loss: 0.2358575165271759
Input sequence:
[0. 0. 1. 0. 0. 1. 1. 0. 0. 0.]
Expected Output sequence:
[0. 0. 1. 0. 0. 1. 1. 0. 0. 0.]
NTM Output sequence:
[0.