<a href="https://colab.research.google.com/github/Redcoder815/Deep_Learning_TensorFlow/blob/main/07DropOut.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf

In [2]:
def dropout_layer(X, dropout):
    assert 0 <= dropout <= 1
    if dropout == 1: return tf.zeros_like(X)
    mask = tf.random.uniform(
        shape=tf.shape(X), minval=0, maxval=1) < 1 - dropout
    return tf.cast(mask, dtype=tf.float32) * X / (1.0 - dropout)

In [3]:
X = tf.reshape(tf.range(16, dtype=tf.float32), (2, 8))
print('dropout_p = 0:', dropout_layer(X, 0))
print('dropout_p = 0.5:', dropout_layer(X, 0.5))
print('dropout_p = 1:', dropout_layer(X, 1))

dropout_p = 0: tf.Tensor(
[[ 0.  1.  2.  3.  4.  5.  6.  7.]
 [ 8.  9. 10. 11. 12. 13. 14. 15.]], shape=(2, 8), dtype=float32)
dropout_p = 0.5: tf.Tensor(
[[ 0.  2.  4.  0.  0.  0. 12.  0.]
 [ 0. 18. 20. 22.  0.  0. 28.  0.]], shape=(2, 8), dtype=float32)
dropout_p = 1: tf.Tensor(
[[0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]], shape=(2, 8), dtype=float32)


In [4]:
class DropoutMLPScratch(tf.keras.Model):
    def __init__(self, num_outputs, num_hiddens_1, num_hiddens_2,
                 dropout_1, dropout_2):
        super().__init__()
        self.lin1 = tf.keras.layers.Dense(num_hiddens_1, activation='relu')
        self.lin2 = tf.keras.layers.Dense(num_hiddens_2, activation='relu')
        self.lin3 = tf.keras.layers.Dense(num_outputs)
        self.dropout_1 = dropout_1
        self.dropout_2 = dropout_2

    def build(self, input_shape):
        # input_shape will be (None, 784)
        # The first Dense layer receives input of 784 features after flattening
        self.lin1.build((input_shape[0], input_shape[-1])) # input_shape[-1] is 784

        # The input to lin2 is the output of lin1 (shape: (batch_size, num_hiddens_1))
        self.lin2.build((input_shape[0], self.lin1.units))

        # The input to lin3 is the output of lin2 (shape: (batch_size, num_hiddens_2))
        self.lin3.build((input_shape[0], self.lin2.units))

        super().build(input_shape) # Mark the model as built

    def call(self, X, training=False):
        H1 = self.lin1(tf.reshape(X, (tf.shape(X)[0], -1)))
        if training:
            H1 = dropout_layer(H1, self.dropout_1)
        H2 = self.lin2(H1)
        if training:
            H2 = dropout_layer(H2, self.dropout_2)
        return self.lin3(H2)

In [5]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()

# Preprocess data
# Reshape images to (num_samples, 784) and normalize to [0, 1]
X_train = tf.cast(tf.reshape(X_train, (-1, 784)), tf.float32) / 255.0
X_test = tf.cast(tf.reshape(X_test, (-1, 784)), tf.float32) / 255.0

# Cast labels to int64 as expected by some TF operations
y_train = tf.cast(y_train, tf.int64)
y_test = tf.cast(y_test, tf.int64)

# Hyperparameters (from original notebook context)
batch_size = 256
lr = 0.1
num_epochs = 10

# Create tf.data.Dataset objects for efficient data pipeline
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(buffer_size=1024).batch(batch_size)
val_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(batch_size)

In [6]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.SGD(learning_rate=lr)

In [7]:
hparams = {'num_outputs':10, 'num_hiddens_1':256, 'num_hiddens_2':256,
           'dropout_1':0.5, 'dropout_2':0.5}
model = DropoutMLPScratch(**hparams)

In [8]:
model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy'])

In [9]:
history = model.fit(train_dataset, epochs=num_epochs, validation_data=val_dataset)

Epoch 1/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 19ms/step - accuracy: 0.5434 - loss: 1.2813 - val_accuracy: 0.7844 - val_loss: 0.5994
Epoch 2/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 18ms/step - accuracy: 0.7667 - loss: 0.6560 - val_accuracy: 0.8167 - val_loss: 0.5138
Epoch 3/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.8013 - loss: 0.5635 - val_accuracy: 0.8256 - val_loss: 0.4754
Epoch 4/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 19ms/step - accuracy: 0.8166 - loss: 0.5211 - val_accuracy: 0.8360 - val_loss: 0.4528
Epoch 5/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.8235 - loss: 0.4969 - val_accuracy: 0.8327 - val_loss: 0.4514
Epoch 6/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.8328 - loss: 0.4735 - val_accuracy: 0.8408 - val_loss: 0.4371
Epoch 7/10
[1m235/235

In [10]:
val_loss, val_acc = model.evaluate(val_dataset, verbose=0)
print(f"Final validation accuracy: {val_acc:.4f}")

Final validation accuracy: 0.8578


Using sequential

In [11]:
class DropoutMLP(tf.keras.Model):
    def __init__(self, num_outputs, num_hiddens_1, num_hiddens_2,
                 dropout_1, dropout_2):
        super().__init__()
        self.net = tf.keras.models.Sequential([
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(num_hiddens_1, activation=tf.nn.relu),
            tf.keras.layers.Dropout(dropout_1),
            tf.keras.layers.Dense(num_hiddens_2, activation=tf.nn.relu),
            tf.keras.layers.Dropout(dropout_2),
            tf.keras.layers.Dense(num_outputs)])
    def call(self, X):
      return self.net(X)

In [12]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.SGD(learning_rate=lr)

In [13]:
model_sequential = DropoutMLP(**hparams)

In [14]:
model_sequential.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy'])

In [15]:
history = model_sequential.fit(train_dataset, epochs=num_epochs, validation_data=val_dataset)

Epoch 1/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.6593 - loss: 1.0143 - val_accuracy: 0.7957 - val_loss: 0.5984
Epoch 2/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - accuracy: 0.8214 - loss: 0.5095 - val_accuracy: 0.8375 - val_loss: 0.4635
Epoch 3/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - accuracy: 0.8415 - loss: 0.4473 - val_accuracy: 0.8231 - val_loss: 0.4862
Epoch 4/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.8541 - loss: 0.4094 - val_accuracy: 0.8371 - val_loss: 0.4470
Epoch 5/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 13ms/step - accuracy: 0.8606 - loss: 0.3872 - val_accuracy: 0.8407 - val_loss: 0.4411
Epoch 6/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.8652 - loss: 0.3724 - val_accuracy: 0.7985 - val_loss: 0.5808
Epoch 7/10
[1m235/235

In [16]:
val_loss, val_acc = model_sequential.evaluate(val_dataset, verbose=0)
print(f"Final validation accuracy: {val_acc:.4f}")

Final validation accuracy: 0.8645
