In [1]:
import tensorflow as tf
import numpy as np

tf.random.set_seed(42)
np.random.seed(42)

In [2]:
# ==========================================
# 1. Generate Synthetic Dataset
# ==========================================

N = 1000

X0 = np.random.randn(N//2, 2) + np.array([-2, -2])
X1 = np.random.randn(N//2, 2) + np.array([2, 2])

X = np.vstack([X0, X1]).astype(np.float32)
y = np.vstack([
    np.zeros((N//2, 1)),
    np.ones((N//2, 1))
]).astype(np.float32)

# Shuffle
idx = np.random.permutation(N)
X = X[idx]
y = y[idx]


In [3]:
# Train / Test Split
train_size = int(0.8 * N)

X_train = tf.constant(X[:train_size])
y_train = tf.constant(y[:train_size])

X_test = tf.constant(X[train_size:])
y_test = tf.constant(y[train_size:])

# 1. Using the with tf.GradientTape() as tape

In [4]:
# ==========================================
# 2. Define Model Class
# ==========================================

class SimpleMLP(tf.Module):
    
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        
        # Layer 1 parameters
        self.W1 = tf.Variable(
            tf.random.normal([input_dim, hidden_dim], stddev=0.1)
        )
        self.b1 = tf.Variable(tf.zeros([hidden_dim]))
        
        # Layer 2 parameters
        self.W2 = tf.Variable(
            tf.random.normal([hidden_dim, output_dim], stddev=0.1)
        )
        self.b2 = tf.Variable(tf.zeros([output_dim]))
    
    # Forward pass
    def __call__(self, X):
        h = tf.nn.relu(tf.matmul(X, self.W1) + self.b1)
        logits = tf.matmul(h, self.W2) + self.b2
        return logits
    
    # Loss function
    def compute_loss(self, X, y):
        logits = self(X)
        loss = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=logits)
        )
        return loss
    
    # One training step
    def train_step(self, X, y, learning_rate):
        with tf.GradientTape() as tape:
            loss = self.compute_loss(X, y)
        
        grads = tape.gradient(loss, self.trainable_variables)
        
        # Backpropagation
        for var, grad in zip(self.trainable_variables, grads):
            var.assign_sub(learning_rate * grad)
        
        return loss
    
    # Accuracy
    def accuracy(self, X, y):
        logits = self(X)
        probs = tf.sigmoid(logits)
        preds = tf.cast(probs > 0.5, tf.float32)
        return tf.reduce_mean(
            tf.cast(tf.equal(preds, y), tf.float32)
        )

In [5]:
# ==========================================
# 3. Train Model
# ==========================================

model = SimpleMLP(input_dim=2, hidden_dim=16, output_dim=1)

epochs = 200
lr = 0.01

In [6]:
for epoch in range(epochs):
    loss = model.train_step(X_train, y_train, lr)
    
    if epoch % 20 == 0:
        print(f"Epoch {epoch}, Loss: {loss.numpy():.4f}")

Epoch 0, Loss: 0.6427
Epoch 20, Loss: 0.6124
Epoch 40, Loss: 0.5812
Epoch 60, Loss: 0.5490
Epoch 80, Loss: 0.5158
Epoch 100, Loss: 0.4821
Epoch 120, Loss: 0.4481
Epoch 140, Loss: 0.4143
Epoch 160, Loss: 0.3808
Epoch 180, Loss: 0.3480


In [7]:
# ==========================================
# 4. Evaluation
# ==========================================

train_acc = model.accuracy(X_train, y_train)
test_acc = model.accuracy(X_test, y_test)

print("\nFinal Results:")
print("Train Accuracy:", train_acc.numpy())
print("Test Accuracy:", test_acc.numpy())



Final Results:
Train Accuracy: 0.99875
Test Accuracy: 1.0


# 2. With Optimizers

In [14]:
class NewSimpleMLP(tf.Module):
    
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        
        self.W1 = tf.Variable(tf.random.normal([input_dim, hidden_dim], stddev=0.1))
        self.b1 = tf.Variable(tf.zeros([hidden_dim]))
        
        self.W2 = tf.Variable(tf.random.normal([hidden_dim, output_dim], stddev=0.1))
        self.b2 = tf.Variable(tf.zeros([output_dim]))
    
    def __call__(self, X):
        h = tf.nn.relu(tf.matmul(X, self.W1) + self.b1)
        logits = tf.matmul(h, self.W2) + self.b2
        return logits
    
    def compute_loss(self, X, y):
        logits = self(X)
        return tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=logits)
        )
    
    def accuracy(self, X, y):
        logits = self(X)
        probs = tf.sigmoid(logits)
        preds = tf.cast(probs > 0.5, tf.float32)
        return tf.reduce_mean(tf.cast(tf.equal(preds, y), tf.float32))
    


In [16]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
# optimizer = tf.keras.optimizers.SGD(0.01)
# optimizer = tf.keras.optimizers.RMSprop(0.001)
# optimizer = tf.keras.optimizers.Adagrad(0.01)

In [17]:
# ==========================================
# 2. Training Step (Modern TF2)
# ==========================================

@tf.function
def train_step(model, optimizer, X, y):
    with tf.GradientTape() as tape:
        loss = model.compute_loss(X, y)

    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))

    return loss

In [18]:
# Create model and optimizer
model = NewSimpleMLP(2, 16, 1)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

# Training loop
for epoch in range(200):
    loss = train_step(model, optimizer, X, y)
    
    if epoch % 20 == 0:
        print(f"Epoch {epoch}, Loss: {loss.numpy():.4f}")

print("Final Accuracy:", model.accuracy(X, y).numpy())

Epoch 0, Loss: 0.7011
Epoch 20, Loss: 0.6608
Epoch 40, Loss: 0.6149
Epoch 60, Loss: 0.5581
Epoch 80, Loss: 0.4893
Epoch 100, Loss: 0.4126
Epoch 120, Loss: 0.3367
Epoch 140, Loss: 0.2693
Epoch 160, Loss: 0.2141
Epoch 180, Loss: 0.1711
Final Accuracy: 0.998
