<a href="https://colab.research.google.com/github/Sanjay030303/Full-Stack-Data-Science-2023/blob/main/DL_ASSIGNMENT_15.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### 1. Deep Learning

#### a. Build a DNN with five hidden layers of 100 neurons each, He initialization, and the ELU activation function.
```python
import tensorflow as tf
from tensorflow.keras import layers, models

def create_dnn_model(input_shape, num_classes):
    model = models.Sequential()
    model.add(layers.InputLayer(input_shape=input_shape))
    he_init = tf.keras.initializers.HeNormal()

    for _ in range(5):
        model.add(layers.Dense(100, activation='elu', kernel_initializer=he_init))
    
    model.add(layers.Dense(num_classes, activation='softmax'))
    return model

input_shape = (28 * 28,)
num_classes = 5
model = create_dnn_model(input_shape, num_classes)
```

#### b. Train the model on MNIST (digits 0 to 4) using Adam optimization and early stopping.
```python
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

# Filter digits 0 to 4
mask_train = y_train < 5
mask_test = y_test < 5

x_train, y_train = x_train[mask_train], y_train[mask_train]
x_test, y_test = x_test[mask_test], y_test[mask_test]

x_train = x_train.reshape(-1, 28 * 28)
x_test = x_test.reshape(-1, 28 * 28)

y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

checkpoint_cb = ModelCheckpoint('dnn_model.h5', save_best_only=True)
early_stopping_cb = EarlyStopping(patience=10, restore_best_weights=True)

history = model.fit(x_train, y_train, epochs=100, validation_split=0.2, callbacks=[checkpoint_cb, early_stopping_cb])
```

#### c. Tune the hyperparameters using cross-validation.
```python
from sklearn.model_selection import KFold
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

def build_model():
    return create_dnn_model(input_shape, num_classes)

model_cv = KerasClassifier(build_fn=build_model, epochs=100, batch_size=32, verbose=0)
param_grid = {'batch_size': [16, 32, 64], 'epochs': [50, 100]}
grid_search = GridSearchCV(estimator=model_cv, param_grid=param_grid, cv=3)
grid_result = grid_search.fit(x_train, y_train)

best_params = grid_result.best_params_
best_score = grid_result.best_score_
print(f"Best Params: {best_params}, Best Score: {best_score}")
```

#### d. Add Batch Normalization and compare the learning curves.
```python
def create_dnn_model_with_bn(input_shape, num_classes):
    model = models.Sequential()
    model.add(layers.InputLayer(input_shape=input_shape))
    he_init = tf.keras.initializers.HeNormal()

    for _ in range(5):
        model.add(layers.Dense(100, kernel_initializer=he_init))
        model.add(layers.BatchNormalization())
        model.add(layers.Activation('elu'))
    
    model.add(layers.Dense(num_classes, activation='softmax'))
    return model

model_bn = create_dnn_model_with_bn(input_shape, num_classes)
model_bn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history_bn = model_bn.fit(x_train, y_train, epochs=100, validation_split=0.2, callbacks=[checkpoint_cb, early_stopping_cb])
```

#### e. Add dropout to every layer and try again.
```python
def create_dnn_model_with_dropout(input_shape, num_classes, dropout_rate=0.5):
    model = models.Sequential()
    model.add(layers.InputLayer(input_shape=input_shape))
    he_init = tf.keras.initializers.HeNormal()

    for _ in range(5):
        model.add(layers.Dense(100, kernel_initializer=he_init))
        model.add(layers.Activation('elu'))
        model.add(layers.Dropout(dropout_rate))
    
    model.add(layers.Dense(num_classes, activation='softmax'))
    return model

model_dropout = create_dnn_model_with_dropout(input_shape, num_classes)
model_dropout.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history_dropout = model_dropout.fit(x_train, y_train, epochs=100, validation_split=0.2, callbacks=[checkpoint_cb, early_stopping_cb])
```

### 2. Transfer Learning

#### a. Create a new DNN that reuses all the pretrained hidden layers of the previous model, freezes them, and replaces the softmax output layer with a new one.
```python
pretrained_model = tf.keras.models.load_model('dnn_model.h5')
pretrained_layers = pretrained_model.layers[:-1]

new_model = models.Sequential(pretrained_layers)
for layer in new_model.layers:
    layer.trainable = False

new_model.add(layers.Dense(5, activation='softmax'))
```

#### b. Train this new DNN on digits 5 to 9 using only 100 images per digit.
```python
mask_train_5to9 = (y_train >= 5) & (y_train <= 9)
x_train_5to9, y_train_5to9 = x_train[mask_train_5to9], y_train[mask_train_5to9]

new_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history_transfer = new_model.fit(x_train_5to9, y_train_5to9, epochs=100, validation_split=0.2, callbacks=[checkpoint_cb, early_stopping_cb])
```

#### c. Cache the frozen layers and train the model again.
```python
new_model_with_cache = models.Sequential(pretrained_layers)
for layer in new_model_with_cache.layers:
    layer.trainable = False
new_model_with_cache.add(layers.Dense(5, activation='softmax'))

new_model_with_cache.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

@tf.function
def cached_layer_output(x):
    return new_model_with_cache.layers[0](x)

x_train_cached = cached_layer_output(x_train_5to9)

history_transfer_cache = new_model_with_cache.fit(x_train_cached, y_train_5to9, epochs=100, validation_split=0.2, callbacks=[checkpoint_cb, early_stopping_cb])
```

#### d. Reuse just four hidden layers instead of five.
```python
new_model_4_layers = models.Sequential(pretrained_layers[:-1])
for layer in new_model_4_layers.layers:
    layer.trainable = False

new_model_4_layers.add(layers.Dense(5, activation='softmax'))
new_model_4_layers.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history_transfer_4_layers = new_model_4_layers.fit(x_train_5to9, y_train_5to9, epochs=100, validation_split=0.2, callbacks=[checkpoint_cb, early_stopping_cb])
```

#### e. Unfreeze the top two hidden layers and continue training.
```python
for layer in new_model.layers[-2:]:
    layer.trainable = True

new_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history_finetune = new_model.fit(x_train_5to9, y_train_5to9, epochs=100, validation_split=0.2, callbacks=[checkpoint_cb, early_stopping_cb])
```

### 3. Pretraining on an Auxiliary Task

#### a. Build two DNNs without the output layer, then concatenate their outputs and add a new hidden layer and an output layer.
```python
def create_dnn_without_output(input_shape):
    model = models.Sequential()
    model.add(layers.InputLayer(input_shape=input_shape))
    he_init = tf.keras.initializers.HeNormal()

    for _ in range(5):
        model.add(layers.Dense(100, activation='elu', kernel_initializer=he_init))
    return model

input_shape = (28 * 28,)
dnn_a = create_dnn_without_output(input_shape)
dnn_b = create_dnn_without_output(input_shape)

combined_input = layers.concatenate([dnn_a.output, dnn_b.output])
hidden_layer = layers.Dense(10, activation='elu', kernel_initializer=he_init)(combined_input)
output_layer = layers.Dense(1, activation='sigmoid')(hidden_layer)

comparison_model = models.Model(inputs=[dnn_a.input, dnn_b.input], outputs=output_layer)
```

#### b. Create a function to generate training batches of pairs of images.
```python
import numpy as np

def generate_pairs(x, y, num_pairs):
    pairs = []
    labels = []
    n_classes = len(np.unique(y))
    class_idx = [np.where(y == i)[0] for i in range(n_classes)]
    
    for _ in range(num_pairs // 2):
        for digit in range(n_classes):
            idx_a, idx_b = np.random.choice(class_idx[digit], 2, replace=False)
            pairs += [[x[idx_a], x[idx_b]]]
            labels += [0]
            
            digit_b = (digit + np.random.randint(1, n_classes)) % n_classes
           


#### c. Train the DNN on the training set of image pairs.
```python
def generate_pairs_batch(x, y, batch_size):
    while True:
        pairs = []
        labels = []
        n_classes = len(np.unique(y))
        class_idx = [np.where(y == i)[0] for i in range(n_classes)]

        for _ in range(batch_size // 2):
            for digit in range(n_classes):
                idx_a, idx_b = np.random.choice(class_idx[digit], 2, replace=False)
                pairs += [[x[idx_a], x[idx_b]]]
                labels += [0]

                digit_b = (digit + np.random.randint(1, n_classes)) % n_classes
                idx_b = np.random.choice(class_idx[digit_b])
                pairs += [[x[idx_a], x[idx_b]]]
                labels += [1]

        pairs = np.array(pairs)
        labels = np.array(labels)
        yield [pairs[:, 0], pairs[:, 1]], labels

comparison_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

train_pairs_generator = generate_pairs_batch(x_train, y_train, batch_size=32)
validation_pairs = ([x_test[:2500], x_test[2500:5000]], y_test[:2500], y_test[2500:5000])

history_comparison = comparison_model.fit(train_pairs_generator, steps_per_epoch=len(x_train) // 32, epochs=100, validation_data=validation_pairs)
```

#### d. Create a new DNN by reusing and freezing the hidden layers of DNN A and add a softmax output layer on top with 10 neurons.
```python
pretrained_layers_a = dnn_a.layers[:-1]

new_model_from_a = models.Sequential(pretrained_layers_a)
for layer in new_model_from_a.layers:
    layer.trainable = False

new_model_from_a.add(layers.Dense(10, activation='softmax'))

new_model_from_a.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

history_new_model_from_a = new_model_from_a.fit(x_train, y_train, epochs=100, validation_split=0.2, callbacks=[checkpoint_cb, early_stopping_cb])
```

These steps conclude the exercise on pretraining on an auxiliary task using DNNs for comparing MNIST digit images and then transferring the learned features to a new classification task. This approach leverages the shared lower layers for better performance with limited training data on the new task.