#Regularization Techniques - Part. 1

#1. L1 and L2 Regularization

L1 and L2 regularization are techniques used to prevent overfitting by adding a penalty term to the loss function based on the magnitudes of model weights.

L1 Regularization: Involves adding a penalty term proportional to the absolute values of the weights. This helps in encouraging sparsity in the model.

L2 Regularization: Adds a penalty term proportional to the square of the weights to the loss function. This encourages the weights to be small.

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.regularizers import l1, l2
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

# Load and preprocess the MNIST dataset
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images = train_images.reshape((60000, 784)).astype('float32') / 255
test_images = test_images.reshape((10000, 784)).astype('float32') / 255
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

# Define and compile the model with L1 regularization
model_l1 = Sequential([
    Dense(128, activation='relu', kernel_regularizer=l1(0.001), input_shape=(784,)),
    Dense(64, activation='relu', kernel_regularizer=l1(0.001)),
    Dense(10, activation='softmax')
])

model_l1.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history_l1 = model_l1.fit(train_images, train_labels, epochs=20, batch_size=64,
                          validation_split=0.2)

# Define and compile the model with L2 regularization
model_l2 = Sequential([
    Dense(128, activation='relu', kernel_regularizer=l2(0.001), input_shape=(784,)),
    Dense(64, activation='relu', kernel_regularizer=l2(0.001)),
    Dense(10, activation='softmax')
])

model_l2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history_l2 = model_l2.fit(train_images, train_labels, epochs=20, batch_size=64,
                          validation_split=0.2)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


#2. Dropout Regularization

Dropout is a technique used during training to randomly deactivate (set to zero) a fraction of neurons in a layer, which helps prevent overfitting.

In [None]:
from tensorflow.keras.layers import Dropout

# Define and compile the model with Dropout regularization
model_dropout = Sequential([
    Dense(128, activation='relu', input_shape=(784,)),
    Dropout(0.25),
    Dense(64, activation='relu'),
    Dropout(0.25),
    Dense(10, activation='softmax')
])

model_dropout.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history_dropout = model_dropout.fit(train_images, train_labels, epochs=20, batch_size=64,
                                    validation_split=0.2)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


#3.  Batch Normalization

 Batch Normalization normalizes the activations of each layer to stabilize and speed up training. It can also act as a regularizer.

In [None]:
from tensorflow.keras.layers import BatchNormalization

# Define and compile the model with Batch Normalization
model_bn = Sequential([
    Dense(128, activation='relu', input_shape=(784,)),
    BatchNormalization(),
    Dense(64, activation='relu'),
    BatchNormalization(),
    Dense(10, activation='softmax')
])

model_bn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history_bn = model_bn.fit(train_images, train_labels, epochs=20, batch_size=64,
                           validation_split=0.2)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


#4. Early Stopping

Early Stopping monitors a specified metric (e.g., validation loss) and stops training if the metric does not improve.

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

# Define and compile the model
model = Sequential([
    Dense(128, activation='relu', input_shape=(784,)),
    Dense(64, activation='relu'),
    Dense(10, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Set up early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=3)

# Train the model with early stopping
history = model.fit(train_images, train_labels, epochs=50, batch_size=64,
                    validation_split=0.2, callbacks=[early_stopping])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50


#5. Data Augmentation

Data augmentation involves generating new training samples by randomly perturbing existing data (e.g., rotating, scaling, shifting), which helps in creating a more robust model.

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Load and preprocess the MNIST dataset
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images = train_images.reshape((60000, 28, 28, 1)).astype('float32') / 255
test_images = test_images.reshape((10000, 28, 28, 1)).astype('float32') / 255

# Create an ImageDataGenerator for data augmentation
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.2,
    zoom_range=0.2,
    fill_mode='nearest'
)

# Define and compile a CNN model
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Generate augmented training data and train the model
datagen.fit(train_images)
model.fit(datagen.flow(train_images, train_labels, batch_size=64), epochs=20, validation_data=(test_images, test_labels))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x7896f6c3c1c0>

#6. Label Smoothing

Label smoothing involves replacing hard 0s and 1s in the target labels with small values like 0.1 and 0.9, respectively. This prevents the model from becoming too confident and encourages it to generalize better.

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

# Load and preprocess the MNIST dataset
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images = train_images.reshape((60000, 784)).astype('float32') / 255
test_images = test_images.reshape((10000, 784)).astype('float32') / 255
train_labels_smooth = to_categorical(train_labels, num_classes=10, dtype='float32')

# Define and compile a model with label smoothing
def custom_loss(y_true, y_pred):
    epsilon = 0.1
    y_true_smooth = y_true * (1 - epsilon) + epsilon / 10
    return tf.keras.losses.categorical_crossentropy(y_true_smooth, y_pred)

model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam', loss=custom_loss, metrics=['accuracy'])

# Train the model with label smoothing
model.fit(train_images, train_labels_smooth, epochs=20, batch_size=64, validation_data=(test_images, to_categorical(test_labels)))


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x78970dfde110>