In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0


In [None]:
def custom_relu(x):
  return tf.maximum(x, 0.0)

def custom_relu_grad(x):
  return tf.where(x > 0, tf.ones_like(x), tf.zeros_like(x))

@tf.custom_gradient
def custom_relu_op(x):
  y = custom_relu(x)
  def grad(dy):
    return custom_relu_grad(x) * dy
  return y, grad


In [None]:
# Model A (Default ReLU)
model_a = tf.keras.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dense(10, activation='softmax')
])

# Model B (Custom ReLU)
model_b = tf.keras.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation=custom_relu_op),
  tf.keras.layers.Dense(10, activation='softmax')
])


In [None]:
model_a.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model_b.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model_a.fit(x_train, y_train, epochs=5)
model_b.fit(x_train, y_train, epochs=5)

test_loss_a, test_acc_a = model_a.evaluate(x_test, y_test)
test_loss_b, test_acc_b = model_b.evaluate(x_test, y_test)

print("Model A (Default ReLU): Test Accuracy:", test_acc_a)
print("Model B (Custom ReLU): Test Accuracy:", test_acc_b)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Model A (Default ReLU): Test Accuracy: 0.9753000140190125
Model B (Custom ReLU): Test Accuracy: 0.9786999821662903
