In [1]:
import numpy as np
import tensorflow as tf
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
tf.random.set_seed(42)
np.random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x7c1b730a9e90>

In [3]:
X = np.array([[0.,0.], [0.,1.], [1.,0.], [1.,1.]], dtype=np.float32)
y = np.array([[0.],   [1.],   [1.],   [0.]],   dtype=np.float32)

In [4]:
X_tf = tf.constant(X)
y_tf = tf.constant(y)

**Keras Implementation:**

In [7]:
best_acc_keras = 0
best_model_keras = None

In [9]:
LEARNING_RATES = [0.001, 0.005, 0.02, 0.08, 0.3]
NEURONS_LIST    = [2, 4, 8]
EPOCHS_LIST     = [500, 2000]

In [10]:
for neurons in NEURONS_LIST:
    for lr in LEARNING_RATES:
        for epochs in EPOCHS_LIST:
            model = tf.keras.Sequential([
                tf.keras.layers.Dense(neurons, activation='relu', input_shape=(2,)),
                tf.keras.layers.Dense(1,   activation='sigmoid')
            ])

            model.compile(
                optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
                loss='binary_crossentropy',
                metrics=['accuracy']
            )

            model.fit(X, y, epochs=epochs, verbose=0, batch_size=4)

            loss, acc = model.evaluate(X, y, verbose=0)
            print(f"neurons={neurons:2d}   lr={lr:6.3f}   epochs={epochs:4d}   → acc={acc:.4f}   loss={loss:.5f}")

            if acc > best_acc_keras:
                best_acc_keras = acc
                best_model_keras = model

neurons= 2   lr= 0.001   epochs= 500   → acc=0.5000   loss=0.69315
neurons= 2   lr= 0.001   epochs=2000   → acc=0.7500   loss=0.50884
neurons= 2   lr= 0.005   epochs= 500   → acc=0.7500   loss=0.48437
neurons= 2   lr= 0.005   epochs=2000   → acc=0.7500   loss=0.34735
neurons= 2   lr= 0.020   epochs= 500   → acc=1.0000   loss=0.00796
neurons= 2   lr= 0.020   epochs=2000   → acc=0.5000   loss=0.69315
neurons= 2   lr= 0.080   epochs= 500   → acc=0.7500   loss=0.47740
neurons= 2   lr= 0.080   epochs=2000   → acc=0.7500   loss=0.47739
neurons= 2   lr= 0.300   epochs= 500   → acc=0.7500   loss=0.47739
neurons= 2   lr= 0.300   epochs=2000   → acc=1.0000   loss=0.00001
neurons= 4   lr= 0.001   epochs= 500   → acc=1.0000   loss=0.48316
neurons= 4   lr= 0.001   epochs=2000   → acc=1.0000   loss=0.20237
neurons= 4   lr= 0.005   epochs= 500   → acc=0.7500   loss=0.48160
neurons= 4   lr= 0.005   epochs=2000   → acc=0.7500   loss=0.34729
neurons= 4   lr= 0.020   epochs= 500   → acc=0.7500   loss=0.4

In [11]:
print("\nBest Keras prediction:")
print(np.round(best_model_keras.predict(X, verbose=0)).astype(int).flatten())
print()


Best Keras prediction:
[0 1 1 0]



**PyTorch Implementation:**

In [12]:
X_t = torch.from_numpy(X)
y_t = torch.from_numpy(y)

In [13]:
class XORNet(nn.Module):
    def __init__(self, neurons):
        super().__init__()
        self.fc1 = nn.Linear(2, neurons)
        self.fc2 = nn.Linear(neurons, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        return torch.sigmoid(self.fc2(x))

In [14]:
best_acc_pt = 0
best_model_pt = None

In [15]:
for neurons in NEURONS_LIST:
    for lr in LEARNING_RATES:
        for epochs in EPOCHS_LIST:
            model = XORNet(neurons)
            optimizer = optim.Adam(model.parameters(), lr=lr)
            criterion = nn.BCELoss()

            for _ in range(epochs):
                optimizer.zero_grad()
                out = model(X_t)
                loss = criterion(out, y_t)
                loss.backward()
                optimizer.step()

            with torch.no_grad():
                pred = torch.round(model(X_t))
                acc = (pred == y_t).float().mean().item()
                final_loss = criterion(model(X_t), y_t).item()

            print(f"neurons={neurons:2d}   lr={lr:6.3f}   epochs={epochs:4d}   → acc={acc:.4f}   loss={final_loss:.5f}")

            if acc > best_acc_pt + 1e-5:
                best_acc_pt = acc
                best_model_pt = model

neurons= 2   lr= 0.001   epochs= 500   → acc=0.7500   loss=0.57173
neurons= 2   lr= 0.001   epochs=2000   → acc=1.0000   loss=0.16923
neurons= 2   lr= 0.005   epochs= 500   → acc=0.7500   loss=0.49131
neurons= 2   lr= 0.005   epochs=2000   → acc=0.5000   loss=0.69315
neurons= 2   lr= 0.020   epochs= 500   → acc=0.7500   loss=0.47818
neurons= 2   lr= 0.020   epochs=2000   → acc=0.5000   loss=0.69315
neurons= 2   lr= 0.080   epochs= 500   → acc=0.7500   loss=0.47742
neurons= 2   lr= 0.080   epochs=2000   → acc=0.7500   loss=0.47739
neurons= 2   lr= 0.300   epochs= 500   → acc=0.7500   loss=0.47739
neurons= 2   lr= 0.300   epochs=2000   → acc=0.5000   loss=0.69330
neurons= 4   lr= 0.001   epochs= 500   → acc=0.5000   loss=0.62205
neurons= 4   lr= 0.001   epochs=2000   → acc=0.7500   loss=0.49410
neurons= 4   lr= 0.005   epochs= 500   → acc=1.0000   loss=0.05409
neurons= 4   lr= 0.005   epochs=2000   → acc=1.0000   loss=0.00311
neurons= 4   lr= 0.020   epochs= 500   → acc=1.0000   loss=0.0

In [16]:
print("\nBest PyTorch model prediction:")
print(torch.round(best_model_pt(X_t)).int().flatten().numpy())
print()


Best PyTorch model prediction:
[0 1 1 0]



**TensorFlow Implementation:**

In [17]:
best_acc_tf = 0
best_vars = None

In [18]:
for neurons in NEURONS_LIST:
    for lr in LEARNING_RATES:
        for epochs in EPOCHS_LIST:

            W1 = tf.Variable(tf.random.normal([2, neurons], stddev=0.8))
            b1 = tf.Variable(tf.zeros([neurons]))
            W2 = tf.Variable(tf.random.normal([neurons, 1], stddev=0.8))
            b2 = tf.Variable(tf.zeros([1]))

            optimizer = tf.optimizers.Adam(learning_rate=lr)

            def forward(x):
                h = tf.nn.relu(tf.matmul(x, W1) + b1)
                return tf.sigmoid(tf.matmul(h, W2) + b2)

            for _ in range(epochs):
                with tf.GradientTape() as tape:
                    y_pred = forward(X_tf)
                    loss = tf.reduce_mean(
                        tf.keras.losses.binary_crossentropy(y_tf, y_pred, from_logits=False)
                    )

                grads = tape.gradient(loss, [W1, b1, W2, b2])
                optimizer.apply_gradients(zip(grads, [W1, b1, W2, b2]))

            pred = tf.round(forward(X_tf))
            acc = tf.reduce_mean(tf.cast(tf.equal(pred, y_tf), tf.float32)).numpy()

            print(f"neurons={neurons:2d}   lr={lr:6.3f}   epochs={epochs:4d}   → acc={acc:.4f}")

            if acc > best_acc_tf:
                best_acc_tf = acc
                best_vars = (W1, b1, W2, b2)

neurons= 2   lr= 0.001   epochs= 500   → acc=0.5000
neurons= 2   lr= 0.001   epochs=2000   → acc=0.7500
neurons= 2   lr= 0.005   epochs= 500   → acc=1.0000
neurons= 2   lr= 0.005   epochs=2000   → acc=0.7500
neurons= 2   lr= 0.020   epochs= 500   → acc=0.7500
neurons= 2   lr= 0.020   epochs=2000   → acc=1.0000
neurons= 2   lr= 0.080   epochs= 500   → acc=1.0000
neurons= 2   lr= 0.080   epochs=2000   → acc=0.7500
neurons= 2   lr= 0.300   epochs= 500   → acc=0.7500
neurons= 2   lr= 0.300   epochs=2000   → acc=0.7500
neurons= 4   lr= 0.001   epochs= 500   → acc=1.0000
neurons= 4   lr= 0.001   epochs=2000   → acc=1.0000
neurons= 4   lr= 0.005   epochs= 500   → acc=1.0000
neurons= 4   lr= 0.005   epochs=2000   → acc=0.7500
neurons= 4   lr= 0.020   epochs= 500   → acc=0.7500
neurons= 4   lr= 0.020   epochs=2000   → acc=1.0000
neurons= 4   lr= 0.080   epochs= 500   → acc=0.7500
neurons= 4   lr= 0.080   epochs=2000   → acc=1.0000
neurons= 4   lr= 0.300   epochs= 500   → acc=0.7500
neurons= 4  

In [19]:
if best_vars is not None:
    W1, b1, W2, b2 = best_vars
    final_pred = tf.round(tf.sigmoid(tf.matmul(tf.nn.relu(tf.matmul(X_tf, W1) + b1), W2) + b2))
    print("\nBest TF Low-Level model prediction:")
    print(final_pred.numpy().flatten().astype(int))
else:
    print("\nNo successful model found in TF experiments")


Best TF Low-Level model prediction:
[0 1 1 0]
