**Part 1**

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import random

In [2]:
# Define the neural network
class LogicNet(nn.Module):
    def __init__(self):
        super(LogicNet, self).__init__()
        self.fc1 = nn.Linear(2, 4)  # Input layer -> Hidden Layer
        self.fc2 = nn.Linear(4, 1)  # Hidden Layer -> Output Layer

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        return x


In [3]:
# Define the environment
class LogicGateEnv:
    def __init__(self, gate="AND"):
        self.gate = gate
        self.data = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=torch.float32)
        self.targets = self.get_targets(gate)
        self.num_correct = 0.0

    def get_targets(self, gate):
        if gate == "AND":
            return torch.tensor([[0], [0], [0], [1]], dtype=torch.float32)
        elif gate == "OR":
            return torch.tensor([[0], [1], [1], [1]], dtype=torch.float32)
        elif gate == "XOR":
            return torch.tensor([[0], [1], [1], [0]], dtype=torch.float32)

    def step(self, input_idx, prediction):
        correct = self.targets[input_idx].item()
        reward = 1.0 if round(prediction.item()) == correct else -1.0
        self.num_correct += reward
        return reward

In [6]:
# Training loop with reward-based backpropagation
def train_logic_gate(gate="XOR", epochs=1000, learning_rate=0.01):
    print(f"Training {gate} gate with {epochs} epochs, {learning_rate} learning rate")
    env = LogicGateEnv(gate)
    net = LogicNet()
    optimizer = optim.Adam(net.parameters(), lr=learning_rate)
    loss_fn = nn.MSELoss()

    for epoch in range(epochs):
        idx = random.randint(0, 3)
        inputs = env.data[idx]
        target = env.targets[idx]

        # Agent Action
        prediction = net(inputs)

        # Compute reward based on action in the environment
        reward = env.step(idx, prediction)

        # Modify loss using the reward
        loss = loss_fn(prediction, target) * (1 - reward)  # Scaling by reward (negative to encourage better choices)

        # Backpropagation with modified loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Epoch count
        if epoch % 1000 == 0:
            print(f"Epoch {epoch}: Loss = {loss.item()}, Reward = {reward}")

    print("Training completed. Testing model:")
    for i in range(4):
        pred = net(env.data[i]).item()
        print(f"Input: {env.data[i].tolist()}, Prediction: {round(pred)}, Actual: {env.targets[i].item()}")
    print("accuracy :", env.num_correct / epochs)


In [7]:
# Run training
train_logic_gate("XOR")

Training XOR gate with 1000 epochs, 0.01 learning rate
Epoch 0: Loss = 0.72088623046875, Reward = -1.0
Training completed. Testing model:
Input: [0.0, 0.0], Prediction: 0, Actual: 0.0
Input: [0.0, 1.0], Prediction: 1, Actual: 1.0
Input: [1.0, 0.0], Prediction: 1, Actual: 1.0
Input: [1.0, 1.0], Prediction: 0, Actual: 0.0
accuracy : 0.908
