**Part 2**

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import random

In [None]:
# Define the neural network
class LogicNet(nn.Module):
    def __init__(self):
        super(LogicNet, self).__init__()
        self.fc1 = nn.Linear(2, 4)  # Input layer -> Hidden Layer
        self.fc2 = nn.Linear(4, 1)  # Hidden Layer -> Output Layer

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        return x


In [None]:
# Define the environment
class LogicGateEnv:
    def __init__(self, gate="AND"):
        self.gate = gate
        self.data = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=torch.float32)
        self.targets = self.get_targets(gate)

    def get_targets(self, gate):
        if gate == "AND":
            return torch.tensor([[0], [0], [0], [1]], dtype=torch.float32)
        elif gate == "OR":
            return torch.tensor([[0], [1], [1], [1]], dtype=torch.float32)
        elif gate == "XOR":
            return torch.tensor([[0], [1], [1], [0]], dtype=torch.float32)
        elif gate == "XNOR":
            return torch.tensor([[1], [0], [0], [1]], dtype=torch.float32)

    def step(self, input_idx, prediction):
        correct = self.targets[input_idx].item()
        # Rounds up to 1 if it is >=.5 to get prediction; else 0
        reward = 1.0 if round(prediction.item()) == correct else -1.0
        return reward

In [None]:
# Training loop
def train_logic_gate(gate="XOR", epochs=1000, learning_rate=0.01, batch_size=10):
    print(f"Training {gate} gate with {epochs} epochs, {learning_rate} learning rate, and {batch_size} batch size.")
    # Global stack
    env = LogicGateEnv(gate)
    net = LogicNet()
    optimizer = optim.Adam(net.parameters(), lr=learning_rate)
    loss_fn = nn.MSELoss()
    num_correct = 0.0

    for epoch in range(epochs):
        # Create Lists
        rewards_batch = []
        inputs_batch = []
        targets_batch = []
        prediction_batch = []
        # Gather agent predictions in a loop, push values to lists :
        for i in range(batch_size):
            # Get model inputs and target
            idx = random.randint(0, 3)
            inputs = env.data[idx]
            target = env.targets[idx]

            # Get model prediction
            prediction = net(inputs)

            # Calculate reward
            reward = env.step(idx, prediction)

            # Append to lists
            inputs_batch.append(inputs)
            prediction_batch.append(prediction)
            rewards_batch.append(reward)
            targets_batch.append(target)

        # Convert collected batch lists into PyTorch tensors
        inputs_batch_tensor = torch.stack(inputs_batch)
        prediction_batch_tensor = torch.stack(prediction_batch)
        targets_batch_tensor = torch.stack(targets_batch)
        rewards_batch_tensor = torch.tensor(rewards_batch, dtype=torch.float32)

        num_correct += rewards_batch_tensor.sum()

        # Unsqueeze to ensure rewards_batch_t has the same shape as targets_batch_t for element-wise ops SHAPE:(1, batch_size)
        rewards_batch_t = rewards_batch_tensor.unsqueeze(1)

        # --- START OF ADVANTAGE CALCULATION ---
        # Calculate the mean of the rewards in the current batch
        mean_reward = rewards_batch_tensor.mean()

        # Calculate the standard deviation of the rewards in the current batch
        # Add a small epsilon (1e-8) to prevent division by zero in case all rewards are identical
        std_reward = rewards_batch_tensor.std() + 1e-8

        # Calculate the advantage for each time step in the batch using your specified formula
        advantages_of_batch = (rewards_batch_t - mean_reward) / (std_reward)
        # --- END OF ADVANTAGE CALCULATION ---

        mean_advantage_of_batch = advantages_of_batch.mean()

        # Back propagation
        loss = loss_fn(prediction_batch_tensor, targets_batch_tensor) * (1-mean_advantage_of_batch)
        # Effect: If the batch had a very high advantage (meaning the model performed very well), the effective loss becomes very small, essentially telling the model it's doing great and its current parameters are good. This aligns with reinforcement

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Epoch count
        if epoch % 1000 == 0:
            print(f"Epoch {epoch}: Loss = {loss.item()}, Reward = {reward}")

    print("Training completed.\n")
    print(f"Number of correct predictions: {num_correct}/{epochs * batch_size}")
    print(f"Accuracy: {num_correct/(epochs * batch_size)}",)

    print("\nTesting model:")
    for i in range(4):
        pred = net(env.data[i]).item()
        print(f"Input: {env.data[i].tolist()}, Prediction: {round(pred)}, Actual: {env.targets[i].item()}")


In [None]:

# Run training
train_logic_gate("XNOR")

Training XNOR gate with 1000 epochs, 0.01 learning rate, and 10 batch size.
Epoch 0: Loss = 0.22494681179523468, Reward = 1.0
Training completed.

Number of correct predictions: 9588.0/10000
Accuracy: 0.9588000178337097

Testing model:
Input: [0.0, 0.0], Prediction: 1, Actual: 1.0
Input: [0.0, 1.0], Prediction: 0, Actual: 0.0
Input: [1.0, 0.0], Prediction: 0, Actual: 0.0
Input: [1.0, 1.0], Prediction: 1, Actual: 1.0
