# **Regularization in Deep Learning**
L1 and L2 Regularization   
Dropout   
Data Augmentation   
Early stopping   
K-fold validation    

# **L1 Regularization**

In [1]:

import tensorflow as tf
from tensorflow.keras import layers, models, regularizers

In [2]:
def create_model(l1_strength=0.01):
  model = models.Sequential()
# L1 regularization
  model.add(layers.Dense(64, activation='relu',
                         kernel_regularizer=regularizers.l1(l1_strength),
  input_shape=(784,))) # input size of 784 ( MNIST images)

# L1 regularization
  model.add(layers.Dense(64, activation='relu', kernel_regularizer=regularizers.l1(l1_strength)))
# Output layer (without regularization)
  model.add(layers.Dense(10, activation='softmax')) # For classification with 10 classes
# Compile the model with a loss function and optimizer
  model.compile(optimizer='adam',

  loss='sparse_categorical_crossentropy', # Example loss for classification
  metrics=['accuracy'])

  return model

In [3]:
# Create a model with L1 = 0.01 regularization strength
model = create_model(l1_strength=0.01)
# Print the model summary to check the architecture
model.summary()
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train, X_test = X_train / 255.0, X_test / 255.0 # Normalize images to [0, 1]
X_train = X_train.reshape(-1, 784) # Flatten 28x28 images into vectors
X_test = X_test.reshape(-1, 784)
model.fit(X_train, y_train, epochs=10, batch_size=32)
# Evaluate the model on test data
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_acc}')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - accuracy: 0.7048 - loss: 4.2570
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.8481 - loss: 1.1654
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2ms/step - accuracy: 0.8599 - loss: 1.0630
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.8665 - loss: 1.0050
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.8684 - loss: 0.9628
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.8736 - loss: 0.9355
Epoch 7/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - 

# **L2 Regularization**

In [4]:
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers

#Example of creating a simple neural network model with L1 and L2 regularizartion

def create_model(l2_strength=0.01):
  model = models.Sequential()
  #L1 reguylarization
  model.add(layers.Dense(64, activation='relu', input_shape=(784,), kernel_regularizer=regularizers.l1(l2_strength))) #inputsize of 748 (MNIST images)

  # L1 regularization
  model.add(layers.Dense(64, activation='relu', kernel_regularizer=regularizers.l1(l2_strength)))

  #Output layer (Without Regularization)
  model.add(layers.Dense(10, activation='softmax')) # For classification with 10 classes

  #Compile the model with a loss function and optimizer
  model.compile(optimizer='adam',
  loss='sparse_categorical_crossentropy', # Example loss for classification
  metrics=['accuracy'])

  return model

#Create a model with L1 = 0.01 regularization strength
model = create_model(l2_strength=0.01)

#Print the model summary to check the architecture
model.summary()

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train, X_test = X_train / 255.0, X_test / 25 #Normalize images to [0,1]

X_train = X_train.reshape(-1, 784) # Flatten 28x28 images into vectors
X_test = X_test.reshape(-1, 784)

model.fit(X_train, y_train, epochs=10, batch_size=32)

#Evaluate the model on test data
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_acc}')

Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.7107 - loss: 4.2863
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2ms/step - accuracy: 0.8488 - loss: 1.1405
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.8598 - loss: 1.0527
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.8639 - loss: 1.0002
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.8662 - loss: 0.9707
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.8702 - loss: 0.9381
Epoch 7/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.8709 - loss: 0.9199
Epoch 8/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.8704 - loss: 0.9081
Epoch 9/10
[1m1875/187

# **Dropout**

In [5]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Function to create a model with Dropout regularization
def create_model(dropout_rate=0.5):
  model = models.Sequential()

  # Input layer
  model.add(layers.Dense(64, activation='relu', input_shape=(784,))) # Example input size of 784 (e.g., MNIST images)

  # Dropout layer after the first Dense layer
  model.add(layers.Dropout(dropout_rate))

  # Hidden layer
  model.add(layers.Dense(64, activation='relu'))

  # Dropout layer after the second Dense layer
  model.add(layers.Dropout(dropout_rate))

  # Output layer (softmax for classification)
  model.add(layers.Dense(10, activation='softmax')) # Example for classification with 10 classes

  # Compile the model with a loss function and optimizer
  model.compile(optimizer='adam',

  loss='sparse_categorical_crossentropy', # Loss function for classification
  metrics=['accuracy'])

  return model

# Create a model with Dropout regularization (50% dropout rate)
model = create_model(dropout_rate=0.5)

# Print the model summary to check the architecture
model.summary()

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train, X_test = X_train / 255.0, X_test / 255.0 # Normalize images to [0, 1]
X_train = X_train.reshape(-1, 784) # Flatten 28x28 images into vectors
X_test = X_test.reshape(-1, 784)
model.fit(X_train, y_train, epochs=10, batch_size=32)

# Evaluate the model on test data
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_acc}')

Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.6541 - loss: 1.0459
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.8760 - loss: 0.4284
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.8943 - loss: 0.3663
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9048 - loss: 0.3353
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9084 - loss: 0.3219
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9120 - loss: 0.3018
Epoch 7/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9164 - loss: 0.2947
Epoch 8/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9194 - loss: 0.2831
Epoch 9/10
[1m1875/187

# **Early Stopping Regularization**

In [6]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping

# Function to create a simple neural network model
def create_model():
  model = models.Sequential()
  # Input layer
  model.add(layers.Dense(64, activation='relu', input_shape=(784,))) # Example input size (e.g., MNIST images)
  # Hidden layer
  model.add(layers.Dense(64, activation='relu'))
  # Output layer (softmax for classification)
  model.add(layers.Dense(10, activation='softmax')) # Example for classification with 10 classes
  # Compile the model with a loss function and optimizer
  model.compile(optimizer='adam',

  loss='sparse_categorical_crossentropy', # Loss function for classification
  metrics=['accuracy'])

  return model
# Create a model
model = create_model()

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', # Monitor validation loss

patience=3, # Number of epochs to wait for improvement
restore_best_weights=True, # Restore the best weights after stopping
verbose=1) # Print messages when early stopping is triggered

X_train, X_test = X_train / 255.0, X_test / 255.0 # Normalize images to [0, 1]
X_train = X_train.reshape(-1, 784) # Flatten 28x28 images into vectors
# Train the model with early stopping
# Using a validation set to monitor the validation loss
model.fit(X_train, y_train, epochs=50, batch_size=32,
validation_data=(X_test, y_test),
callbacks=[early_stopping])
# After training, you can evaluate the model on test data
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_acc}')

Epoch 1/50
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - accuracy: 0.5265 - loss: 1.4508 - val_accuracy: 0.8485 - val_loss: 0.5244
Epoch 2/50
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.8599 - loss: 0.4828 - val_accuracy: 0.8851 - val_loss: 0.3924
Epoch 3/50
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.8869 - loss: 0.3917 - val_accuracy: 0.9007 - val_loss: 0.3425
Epoch 4/50
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.8998 - loss: 0.3509 - val_accuracy: 0.9064 - val_loss: 0.3152
Epoch 5/50
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9090 - loss: 0.3125 - val_accuracy: 0.9139 - val_loss: 0.2937
Epoch 6/50
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9177 - loss: 0.2882 - val_accuracy: 0.9225 - val_loss: 0.2694
Epoch 7/50
[1m

# **Elastic Net**

In [7]:
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers

# Function to create a model with Elastic Net (L1 + L2) regularization
def create_model(l1_strength=0.01, l2_strength=0.01):
    model = models.Sequential()
    # Input layer with Elastic Net regularization
    model.add(layers.Dense(64, activation='relu',
        kernel_regularizer=regularizers.l1_l2(l1=l1_strength, l2=l2_strength),
        input_shape=(784,)))  # Input shape set to 784 for flattened MNIST images

    # Hidden layer with Elastic Net regularization
    model.add(layers.Dense(64, activation='relu',
        kernel_regularizer=regularizers.l1_l2(l1=l1_strength, l2=l2_strength)))

    # Output layer (softmax for classification)
    model.add(layers.Dense(10, activation='softmax'))  # For 10 classes

    # Compile the model with a loss function and optimizer
    model.compile(optimizer='adam',
        loss='sparse_categorical_crossentropy',  # Loss function for classification
        metrics=['accuracy'])
    return model

# Create a model with Elastic Net regularization (L1 = 0.01, L2 = 0.01)
model = create_model(l1_strength=0.01, l2_strength=0.01)

# Print the model summary to check the architecture
model.summary()

# Load and preprocess the data
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train, X_test = X_train / 255.0, X_test / 255.0  # Normalize images to [0, 1]

# Flatten images into vectors of size 784 (28*28)
X_train = X_train.reshape(-1, 784)
X_test = X_test.reshape(-1, 784)

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32)

# Evaluate the model on test data
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_acc}')

Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.6812 - loss: 4.4455
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2ms/step - accuracy: 0.8348 - loss: 1.2368
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.8448 - loss: 1.1145
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.8512 - loss: 1.0541
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.8529 - loss: 1.0189
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.8571 - loss: 0.9811
Epoch 7/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 3ms/step - accuracy: 0.8600 - loss: 0.9536
Epoch 8/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.8588 - loss: 0.9477
Epoch 9/10
[1m1875/187