<a href="https://www.kaggle.com/code/shaikhabdulrafay03/hyperparameter-tuning-and-desicion-boundary?scriptVersionId=166580212" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import time

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd
train = pd.read_csv('/kaggle/input/fashionmnist/fashion-mnist_train.csv')
test = pd.read_csv('/kaggle/input/fashionmnist/fashion-mnist_test.csv')
train

In [None]:
import matplotlib.pyplot as plt
import numpy as np
pixels = train.iloc[0, 1:]  
image = pixels.values.reshape(28, 28)

plt.imshow(image, cmap='gray')
plt.show()

In [None]:
print("DataSet Information:")
print("-" * 30)
print('DF Shape: ' ,train.shape)
print('number of Columns: ' ,len(train.columns))
print('number of Observations: ' ,len(train))
print('Number of values in train: ' , train.count().sum())
print('Total Number of Missing values in train: ' , train.isna().sum().sum())
print('percentage of Missing values : ' ,  "{:.2f}".format(train.isna().sum().sum()/train.count().sum() *100),'%')
print('Total Number of Duplicated records in train : ' , train.duplicated().sum().sum())
print('percentage of Duplicated values : ' ,  "{:.2f}".format(train.duplicated().sum().sum()/train.count().sum() *100),'%')

In [None]:
train = train.drop_duplicates()

In [None]:
train['label'].value_counts().sort_index(ascending=True)


In [None]:
from tensorflow import keras
y = train.pop('label')
y_train = keras.utils.to_categorical(y, 10)

y_test = test.pop('label')
y_test = keras.utils.to_categorical(y_test, 10)

In [None]:
#normalize data
X_train = train/255
test = test / 255

In [None]:
from sklearn.model_selection import train_test_split

# Assuming 'your_data' is your DataFrame and 'your_labels' is the corresponding label column
X_test, X_val, y_test, y_val = train_test_split(test, y_test, test_size=0.5, random_state=42)

# Print the shapes of the resulting sets
print("Train set:", X_train.shape, y_train.shape)
print("Validation set:", X_val.shape, y_val.shape)
print("Test set:", X_test.shape, y_test.shape)



# Modelling

# Hyperparameter Tuning Analysis

## Methodology 

- Hyperparameter Variations:
 - Varying the number of layers
 - Trying different activation functions
 - Adjusting the learning rate
 - Experimenting with batch sizes
 - Keeping the number of epochs constant with implemented early stopping


## Evaluation Metrics for Hyperparameter Tuning
- Training duration (number of epochs before early stopping , time in seconds)
- Noise levels in accuracy and loss graphs (on both training and validation sets)
- Test Loss and accuracy
| `Experiment Number`  | `learning_rate` | `batch_size` | `Number of Layers` | `Activation Functions` | `Batch Normalization & Dropout`  |
|----------------------|-----------------|--------------|--------------------|------------------------|----------------------------------|
| 1                    | 0.05            | 32           | 1                  | relu, sigmoid          | Yes                              |
| 2                    | 0.05            | 16           | 1                  | relu, softmax          | No                               |
| 3                    | 0.05            | 128          | 3                  | tanh, sigmoid          | Yes                              |
| 4                    | 0.02            | 256          | 3                  | tanh, softmax          | No                               |
| 5                    | 0.2             | 512          | 5                  | relu, softmax          | Yes                              |
| 6                    | 1.0             | 128          | 5                  | leakyrelu, sigmoid     | No                               |
| 7                    | 0.9             | 2048         | 4                  | relu, softmax          | Yes                              |
| 8                    | 0.99            | 4096         | 4                  | tanh, sigmoid          | No                               |



In [None]:
times = []
eps = []
train_acc = []
test_loss = []
test_accu = []

In [None]:
#Experiment Number: 1
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers

start_time = time.time()
model = keras.Sequential([
    layers.BatchNormalization(input_shape=[X_train.shape[1]]),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),
    layers.BatchNormalization(),
    layers.Dense(10, activation='sigmoid'),
])

model.compile(
    optimizer = Adam(learning_rate=0.05), 
    loss = 'categorical_crossentropy',
    metrics = ['accuracy']
)

early_stopping = keras.callbacks.EarlyStopping(
    patience=5,
    min_delta=0.001,
    restore_best_weights=True,
)
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    batch_size=32,
    epochs=200,
    callbacks=[early_stopping],
)
end_time = time.time()
training_time = end_time - start_time
times.append(training_time)
eps.append(len(history.history['loss']))
train_acc.append(history.history['accuracy'][-1])
history_df = pd.DataFrame(history.history)
history_df.loc[:, ['loss', 'val_loss']].plot(title="categorical_crossentropy")
history_df.loc[:, ['accuracy', 'val_accuracy']].plot(title="Accuracy")

eval = model.evaluate(X_test , y_test)
print('Training Time: ' ,training_time)
print('Test Loss and Accuracy : ' , eval)
test_loss.append(eval[0])
test_accu.append(eval[1])

In [None]:
#Experiment Number: 2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers

start_time = time.time()
model = keras.Sequential([
    layers.Dense(256, activation='relu' , input_shape=[X_train.shape[1]]),
    layers.Dense(10, activation='softmax'),
])

model.compile(
    optimizer = Adam(learning_rate=0.05), 
    loss = 'categorical_crossentropy',
    metrics = ['accuracy']
)

early_stopping = keras.callbacks.EarlyStopping(
    patience=5,
    min_delta=0.001,
    restore_best_weights=True,
)
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    batch_size=16,
    epochs=200,
    callbacks=[early_stopping],
)
end_time = time.time()
training_time = end_time - start_time
times.append(training_time)
eps.append(len(history.history['loss']))
train_acc.append(history.history['accuracy'][-1])
history_df = pd.DataFrame(history.history)
history_df.loc[:, ['loss', 'val_loss']].plot(title="categorical_crossentropy")
history_df.loc[:, ['accuracy', 'val_accuracy']].plot(title="Accuracy")

eval = model.evaluate(X_test , y_test)
print('Training Time: ' ,training_time)
print('Test Loss and Accuracy : ' , eval)
test_loss.append(eval[0])
test_accu.append(eval[1])

In [None]:
#Experiment Number: 3
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers

start_time = time.time()
model = keras.Sequential([
    layers.BatchNormalization(input_shape=[X_train.shape[1]]),
    layers.Dense(256, activation='tanh'),
    layers.Dropout(0.3),
    layers.BatchNormalization(),
    layers.Dense(256, activation='tanh'),
    layers.Dropout(0.3),
    layers.BatchNormalization(),
    layers.Dense(256, activation='tanh'),
    layers.Dropout(0.3),
    layers.BatchNormalization(),
    layers.Dense(10, activation='sigmoid'),
])

model.compile(
    optimizer = Adam(learning_rate=0.05), 
    loss = 'categorical_crossentropy',
    metrics = ['accuracy']
)

early_stopping = keras.callbacks.EarlyStopping(
    patience=5,
    min_delta=0.001,
    restore_best_weights=True,
)
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    batch_size=128,
    epochs=200,
    callbacks=[early_stopping],
)
end_time = time.time()
training_time = end_time - start_time
times.append(training_time)
eps.append(len(history.history['loss']))
train_acc.append(history.history['accuracy'][-1])
history_df = pd.DataFrame(history.history)
history_df.loc[:, ['loss', 'val_loss']].plot(title="categorical_crossentropy")
history_df.loc[:, ['accuracy', 'val_accuracy']].plot(title="Accuracy")

eval = model.evaluate(X_test , y_test)
print('Training Time: ' ,training_time)
print('Test Loss and Accuracy : ' , eval)
test_loss.append(eval[0])
test_accu.append(eval[1])

In [None]:
#Experiment Number: 4
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers

start_time = time.time()
model = keras.Sequential([
    layers.Dense(256, activation='tanh' , input_shape=[X_train.shape[1]]),
    layers.Dense(256, activation='tanh'),
    layers.Dense(256, activation='tanh'),
    layers.Dense(10, activation='softmax'),
])

model.compile(
    optimizer = Adam(learning_rate=0.02), 
    loss = 'categorical_crossentropy',
    metrics = ['accuracy']
)

early_stopping = keras.callbacks.EarlyStopping(
    patience=5,
    min_delta=0.001,
    restore_best_weights=True,
)
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    batch_size=256,
    epochs=200,
    callbacks=[early_stopping],
)
end_time = time.time()
training_time = end_time - start_time
times.append(training_time)
eps.append(len(history.history['loss']))
history_df = pd.DataFrame(history.history)
train_acc.append(history.history['accuracy'][-1])
history_df.loc[:, ['loss', 'val_loss']].plot(title="categorical_crossentropy")
history_df.loc[:, ['accuracy', 'val_accuracy']].plot(title="Accuracy")

eval = model.evaluate(X_test , y_test)
print('Training Time: ' ,training_time)
print('Test Loss and Accuracy : ' , eval)
test_loss.append(eval[0])
test_accu.append(eval[1])

In [None]:
#Experiment Number: 5
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers

start_time = time.time()
model = keras.Sequential([
    layers.BatchNormalization(input_shape=[X_train.shape[1]]),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),
    layers.BatchNormalization(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),
    layers.BatchNormalization(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),
    layers.BatchNormalization(),
    layers.BatchNormalization(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),
    layers.BatchNormalization(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(10, activation='softmax'),
])

model.compile(
    optimizer = Adam(learning_rate=0.2), 
    loss = 'categorical_crossentropy',
    metrics = ['accuracy']
)

early_stopping = keras.callbacks.EarlyStopping(
    patience=5,
    min_delta=0.001,
    restore_best_weights=True,
)
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    batch_size=512,
    epochs=200,
    callbacks=[early_stopping],
)
end_time = time.time()
training_time = end_time - start_time
times.append(training_time)
eps.append(len(history.history['loss']))
train_acc.append(history.history['accuracy'][-1])
history_df = pd.DataFrame(history.history)
history_df.loc[:, ['loss', 'val_loss']].plot(title="categorical_crossentropy")
history_df.loc[:, ['accuracy', 'val_accuracy']].plot(title="Accuracy")

eval = model.evaluate(X_test , y_test)
print('Training Time: ' ,training_time)
print('Test Loss and Accuracy : ' , eval)
test_loss.append(eval[0])
test_accu.append(eval[1])

In [None]:
#Experiment Number: 6
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers

start_time = time.time()
model = keras.Sequential([
    layers.Dense(256, activation='relu' , input_shape=[X_train.shape[1]]),
    layers.Dense(256, activation='relu'),
    layers.Dense(256, activation='relu'),
    layers.Dense(256, activation='relu'),
    layers.Dense(10, activation='sigmoid'),
])

model.compile(
    optimizer = Adam(learning_rate=1), 
    loss = 'categorical_crossentropy',
    metrics = ['accuracy']
)

early_stopping = keras.callbacks.EarlyStopping(
    patience=5,
    min_delta=0.001,
    restore_best_weights=True,
)
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    batch_size=128,
    epochs=200,
    callbacks=[early_stopping],
)
end_time = time.time()
training_time = end_time - start_time
times.append(training_time)
eps.append(len(history.history['loss']))
train_acc.append(history.history['accuracy'][-1])
history_df = pd.DataFrame(history.history)
history_df.loc[:, ['loss', 'val_loss']].plot(title="categorical_crossentropy")
history_df.loc[:, ['accuracy', 'val_accuracy']].plot(title="Accuracy")

eval = model.evaluate(X_test , y_test)
print('Training Time: ' ,training_time)
print('Test Loss and Accuracy : ' , eval)
test_loss.append(eval[0])
test_accu.append(eval[1])

In [None]:
#Experiment Number: 7
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers

start_time = time.time()
model = keras.Sequential([
    layers.BatchNormalization(input_shape=[X_train.shape[1]]),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),
    layers.BatchNormalization(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),
    layers.BatchNormalization(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),
    layers.BatchNormalization(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(10, activation='softmax'),
])

model.compile(
    optimizer = Adam(learning_rate=0.9), 
    loss = 'categorical_crossentropy',
    metrics = ['accuracy']
)

early_stopping = keras.callbacks.EarlyStopping(
    patience=5,
    min_delta=0.001,
    restore_best_weights=True,
)
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    batch_size=2048,
    epochs=200,
    callbacks=[early_stopping],
)
end_time = time.time()
training_time = end_time - start_time
times.append(training_time)
eps.append(len(history.history['loss']))
train_acc.append(history.history['accuracy'][-1])
history_df = pd.DataFrame(history.history)
history_df.loc[:, ['loss', 'val_loss']].plot(title="categorical_crossentropy")
history_df.loc[:, ['accuracy', 'val_accuracy']].plot(title="Accuracy")

eval = model.evaluate(X_test , y_test)
print('Training Time: ' ,training_time)
print('Test Loss and Accuracy : ' , eval)
test_loss.append(eval[0])
test_accu.append(eval[1])

In [None]:
#Experiment Number: 8
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers

start_time = time.time()
model = keras.Sequential([
    layers.Dense(256, activation='tanh' , input_shape=[X_train.shape[1]]),
    layers.Dense(256, activation='tanh'),
    layers.Dense(256, activation='tanh'),
    layers.Dense(256, activation='tanh'),
    layers.Dense(10, activation='sigmoid'),
])

model.compile(
    optimizer = Adam(learning_rate=0.99), 
    loss = 'categorical_crossentropy',
    metrics = ['accuracy']
)

early_stopping = keras.callbacks.EarlyStopping(
    patience=5,
    min_delta=0.001,
    restore_best_weights=True,
)
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    batch_size=4096,
    epochs=200,
    callbacks=[early_stopping],
)
end_time = time.time()
training_time = end_time - start_time
times.append(training_time)
eps.append(len(history.history['loss']))
train_acc.append(history.history['accuracy'][-1])
history_df = pd.DataFrame(history.history)
history_df.loc[:, ['loss', 'val_loss']].plot(title="categorical_crossentropy")
history_df.loc[:, ['accuracy', 'val_accuracy']].plot(title="Accuracy")

eval = model.evaluate(X_test , y_test)
print('Training Time: ' ,training_time)
print('Test Loss and Accuracy : ' , eval)
test_loss.append(eval[0])
test_accu.append(eval[1])

| `Experiment Number`  | `learning_rate` | `batch_size` | `Number of Layers` | `Activation Functions` | `Batch Normalization & Dropout`  |
|----------------------|-----------------|--------------|--------------------|------------------------|----------------------------------|
| 1                    | 0.05            | 32           | 1                  | relu, sigmoid          | Yes                              |
| 2                    | 0.05            | 2            | 1                  | relu, softmax          | No                               |
| 3                    | 0.05            | 128          | 3                  | tanh, sigmoid          | Yes                              |
| 4                    | 0.02            | 256          | 3                  | tanh, softmax          | No                               |
| 5                    | 0.2             | 512          | 5                  | relu, softmax          | Yes                              |
| 6                    | 1.0             | 128          | 5                  | leakyrelu, sigmoid     | No                               |
| 7                    | 0.9             | 2048         | 4                  | relu, softmax          | Yes                              |
| 8                    | 0.99            | 4096         | 4                  | tanh, sigmoid          | No                               |


In [None]:
dict = {
    'Experiment Number' : [1,2,3,4,5,6,7,8],
    'training time in seconds' : times,
    'Number of Epochs' : eps,
    'Training Accuracy' : train_acc,
    'Test Loss' : test_loss,
    'Test Accuracy ': test_accu
}

dd = pd.DataFrame(dict)
dd

# Results

- Experiment 1:
  - Good accuracy.
  - Big difference in loss and accuracy between training and validation datasets.
  - Generalized model.
  - Long training time due to more epochs.

- Experiment 2:
  - Low accuracy.
  - Noisy graphs of training loss and accuracy.
  - Underfitting observed.
  - Long training time.

- Experiment 3:
  - Low accuracy.
  - Underfitting.
  - Shorter training time.
  - Less noisy graph.

- Experiment 4:
  - Moderate accuracy.
  - Short training time.
  - Graphs are a bit glitchy.

- Experiment 5:
  - Very glitchy graphs.
  - Underfitting.
  - Long training time.

- Experiment 6:
  - Underfitting.
  - Short training time.
  - Very glitchy.

- Experiment 7:
  - Underfitting.
  - Short training time.
  - Very noisy.

- Experiment 8:
  - Underfitting.
  - Short training time.
  - Very glitchy graphs.



# Conclusion

- With the learning rate and the batch size, you have some control over:
 - How long it takes to train a model
 - How noisy the learning curves are
 - How small the loss becomes
 
- Number of Layers:
 - Influences the model's complexity and learning capacity.
 - Impacts training time and potential overfitting.
 - Affects the depth of learning curves and the model's ability to capture intricate patterns.

- Batch Normalization and Dropouts:

 - Batch Normalization helps in stabilizing and accelerating training.
 - Dropouts act as a regularization technique, preventing overfitting.
 - Together, they can influence training time, noise in learning curves, and the model's robustness.


In [None]:

from tensorflow import keras
from tensorflow.keras import layers

# YOUR CODE HERE: define the model given in the diagram
model = keras.Sequential([
    layers.BatchNormalization(input_shape=[X_train.shape[1]]),
    layers.Dense(256, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.3),
    layers.Dense(256, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.3),
    layers.Dense(512, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.3),
    layers.Dense(10, activation='softmax'),
])

model.compile(
    optimizer = 'adam', 
    loss = 'categorical_crossentropy',
    metrics = ['accuracy']
)

early_stopping = keras.callbacks.EarlyStopping(
    patience=5,
    min_delta=0.001,
    restore_best_weights=True,
)
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    batch_size=512,
    epochs=200,
    callbacks=[early_stopping],
)

history_df = pd.DataFrame(history.history)
history_df.loc[:, ['loss', 'val_loss']].plot(title="categorical_crossentropy")
history_df.loc[:, ['accuracy', 'val_accuracy']].plot(title="Accuracy")

print('Test Loss and Accuracy : ' ,model.evaluate(X_test , y_test))


In [None]:
from sklearn.decomposition import PCA
import numpy as np
import matplotlib.pyplot as plt

# Assuming X_train is a DataFrame
X_train_array = X_train.to_numpy()  # Convert DataFrame to NumPy array

# Flatten the input data
X_train_flattened = X_train_array.reshape(X_train_array.shape[0], -1)

pca = PCA(n_components=2)
train_images_pca = pca.fit_transform(X_train_flattened)

x_min, x_max = train_images_pca[:, 0].min() - 50, train_images_pca[:, 0].max() + 50
y_min, y_max = train_images_pca[:, 1].min() - 50, train_images_pca[:, 1].max() + 50
xx, yy = np.meshgrid(np.arange(x_min, x_max, 10),
                     np.arange(y_min, y_max, 10))

meshgrid_points = np.c_[xx.ravel(), yy.ravel()]
meshgrid_points_original = pca.inverse_transform(meshgrid_points)
meshgrid_points_original = meshgrid_points_original.reshape(-1, 28, 28)

# Flatten the meshgrid points
meshgrid_points_flattened = meshgrid_points_original.reshape(meshgrid_points_original.shape[0], -1)

predictions = model.predict(meshgrid_points_flattened)
Z = np.argmax(predictions, axis=1)
Z = Z.reshape(xx.shape)

# Plot decision boundary
plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.8)
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.title('Decision Boundary')
plt.show()
