## **DYING RELU PROBLEM**
- *using variable learning rate*


### Layer-wise Dying ReLU Check on MNIST (0.1 LR)


In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Input
from tensorflow.keras.utils import to_categorical

In [2]:
# Load and preprocess MNIST data
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

x_train = x_train.reshape(-1, 784)
x_test = x_test.reshape(-1, 784)

y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

In [3]:
## Model Layers
inputs = Input(shape=(784,))
x = Dense(128,activation='relu',name = 'dense_1')(inputs)
x = Dense(64,activation='relu', name = 'dense_2')(x)
x = Dense(32, activation='relu', name = 'dense_3')(x)
outputs = Dense(10, activation='softmax', name = 'output')(x)

model_1 = Model(inputs,outputs)

In [4]:
optimizer_1 = tf.keras.optimizers.Adam(learning_rate=0.1)
model_1.compile(
    loss = 'categorical_crossentropy',
    optimizer = optimizer_1,
    metrics = ['accuracy']
)

In [5]:
history_1 = model_1.fit(
    x_train, y_train,
    epochs=5,
    batch_size=128,
    validation_data=(x_test, y_test),
    verbose=1
)

Epoch 1/5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.5141 - loss: 1.4670 - val_accuracy: 0.6313 - val_loss: 1.1877
Epoch 2/5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.4776 - loss: 1.3616 - val_accuracy: 0.5157 - val_loss: 1.2196
Epoch 3/5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.4795 - loss: 1.3563 - val_accuracy: 0.3321 - val_loss: 1.8514
Epoch 4/5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.4091 - loss: 1.4821 - val_accuracy: 0.3195 - val_loss: 1.5652
Epoch 5/5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.3549 - loss: 1.5728 - val_accuracy: 0.3809 - val_loss: 1.5714


In [6]:
# Function to compute fraction of dead neurons in a given layer
def dead_neurons_fraction(model, layer_index, sample_data):
    activation_model = Model(inputs=model.input, outputs=model.layers[layer_index].output)
    activations = activation_model.predict(sample_data, verbose=0)
    dead_frac = np.mean(np.all(activations == 0, axis=0))
    return dead_frac

In [7]:
# Identify ReLU layers safely
relu_layers_1 = [
    i for i, layer in enumerate(model_1.layers)
    if hasattr(layer, 'activation') and layer.activation.__name__ == 'relu'
]

In [8]:
print("\nLayer-wise Dead ReLU Analysis:")
for idx in relu_layers_1:
    dead_frac = dead_neurons_fraction(model_1, idx, x_train[:1000])
    print(f"Layer {model_1.layers[idx].name}: {dead_frac * 100:.2f}% neurons are dead")

# Final accuracy summary
final_acc = history_1.history["val_accuracy"][-1]
print(f"\nFinal Validation Accuracy: {final_acc * 100:.2f}%")


Layer-wise Dead ReLU Analysis:
Layer dense_1: 96.88% neurons are dead
Layer dense_2: 90.62% neurons are dead
Layer dense_3: 87.50% neurons are dead

Final Validation Accuracy: 38.09%


### Dying ReLU Check on MNIST (0.01 LR)

In [9]:
optimizer_2 = tf.keras.optimizers.Adam(learning_rate=0.01)

In [10]:
## Model Layers
inputs = Input(shape=(784,))
x = Dense(128,activation='relu',name = 'dense_1')(inputs)
x = Dense(64,activation='relu', name = 'dense_2')(x)
x = Dense(32, activation='relu', name = 'dense_3')(x)
outputs = Dense(10, activation='softmax', name = 'output')(x)

model_2 = Model(inputs,outputs)

In [11]:
model_2.compile(loss='categorical_crossentropy', optimizer= optimizer_2, metrics= ['accuracy'])

In [12]:
history_2 = model_2.fit(
    x_train,
    y_train,
    epochs = 5,
    validation_split =0.2,
    batch_size = 128
)

Epoch 1/5
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9076 - loss: 0.2988 - val_accuracy: 0.9532 - val_loss: 0.1671
Epoch 2/5
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9567 - loss: 0.1430 - val_accuracy: 0.9601 - val_loss: 0.1359
Epoch 3/5
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9666 - loss: 0.1126 - val_accuracy: 0.9621 - val_loss: 0.1308
Epoch 4/5
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9717 - loss: 0.0929 - val_accuracy: 0.9678 - val_loss: 0.1173
Epoch 5/5
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9758 - loss: 0.0841 - val_accuracy: 0.9707 - val_loss: 0.1122


In [13]:
# Identify ReLU layers safely
relu_layers_2 = [
    i for i, layer in enumerate(model_2.layers)
    if hasattr(layer, 'activation') and layer.activation.__name__ == 'relu'
]

print("\nLayer-wise Dead ReLU Analysis:")
for idx in relu_layers_2:
    dead_frac = dead_neurons_fraction(model_2, idx, x_train[:1000])
    print(f"Layer {model_2.layers[idx].name}: {dead_frac * 100:.2f}% neurons are dead")

# Final accuracy summary
final_acc = history_2.history["val_accuracy"][-1]
print(f"\nFinal Validation Accuracy: {final_acc * 100:.2f}%")


Layer-wise Dead ReLU Analysis:
Layer dense_1: 28.12% neurons are dead
Layer dense_2: 15.62% neurons are dead
Layer dense_3: 18.75% neurons are dead

Final Validation Accuracy: 97.07%


In [16]:
learning_rates = [0.5,0.1,0.01,0.001]

for lr in learning_rates:
    
    #define layers
    inputs = Input(shape=(784,))
    x = Dense(128,activation='relu',name = 'dense_1')(inputs)
    x = Dense(64,activation='relu', name = 'dense_2')(x)
    x = Dense(32, activation='relu', name = 'dense_3')(x)
    outputs = Dense(10, activation='softmax', name = 'output')(x)
    
    #define model
    _model = Model(inputs,outputs)
    
    #define optimizer
    _optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
    
    #compile the model
    _model.compile(loss = 'categorical_crossentropy', metrics = ['accuracy'], optimizer= _optimizer)
    
    #Train the model
    _history = _model.fit(x_train,y_train,validation_split = 0.2, epochs = 5, batch_size = 256)
    
    # Identify ReLU layers safely
    _relu_layers = [
        i for i, layer in enumerate(_model.layers)
        if hasattr(layer, 'activation') and layer.activation.__name__ == 'relu'
    ]

    print(f"\nLayer-wise Dead ReLU Analysis for model with Learning Rate: {lr}")
    for idx in _relu_layers:
        dead_frac = dead_neurons_fraction(_model, idx, x_train[:1000])
        print(f"Layer {_model.layers[idx].name}: {dead_frac * 100:.2f}% neurons are dead")

    # Final accuracy summary
    final_acc = _history.history["val_accuracy"][-1]
    print(f"\nFinal Validation Accuracy: {final_acc * 100:.2f}%")
    

Epoch 1/5
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.1059 - loss: 110.2047 - val_accuracy: 0.0997 - val_loss: 2.3082
Epoch 2/5
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.1051 - loss: 2.3124 - val_accuracy: 0.1081 - val_loss: 2.3196
Epoch 3/5
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.1025 - loss: 2.3121 - val_accuracy: 0.0914 - val_loss: 2.3223
Epoch 4/5
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.1031 - loss: 2.3140 - val_accuracy: 0.1060 - val_loss: 2.3253
Epoch 5/5
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.1039 - loss: 2.3151 - val_accuracy: 0.1060 - val_loss: 2.3127

Layer-wise Dead ReLU Analysis for model with Learning Rate: 0.5
Layer dense_1: 98.44% neurons are dead
Layer dense_2: 96.88% neurons are dead
Layer dense_3: 100.00% neurons are dead

Final Val