In [18]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import KFold
import time
import matplotlib.pyplot as plt

<h3>Load MNIST dataset</h3>

In [19]:
(X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()

<h3>Combine the training and test sets into one</h3>

In [20]:
X_combined = np.concatenate([X_train, X_test], axis=0)
y_combined = np.concatenate([y_train, y_test], axis=0)

<h3>Normalize the images</h3>

In [21]:
X_combined = X_combined / 255.0

<h3>Flatten the images</h3>

In [22]:
X_combined_flattened = X_combined.reshape(len(X_combined), 28 * 28)

<h3>Initialize the KFold cross-validation</h3>

In [23]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Track the metrics for each fold
fold_accuracies = []
fold_losses = []
fold_val_accuracies = []
fold_val_losses = []
fold_times = []

<h3>Perform 5-fold cross-validation</h3>

In [7]:
for fold, (train_idx, val_idx) in enumerate(kf.split(X_combined_flattened, y_combined)):
    print(f"Training on Fold {fold + 1}...")
    
    # Split data into training and validation sets for the current fold
    X_train_fold, X_val_fold = X_combined_flattened[train_idx], X_combined_flattened[val_idx]
    y_train_fold, y_val_fold = y_combined[train_idx], y_combined[val_idx]
    
    # Build the model 
    model = keras.Sequential([
        keras.layers.Dense(16, input_shape=(784,), activation="sigmoid"),
        keras.layers.Dense(16, activation="sigmoid"),
        keras.layers.Dense(10, activation="sigmoid"),  
    ])
    
    model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    
    # Track the time before training starts
    start_time = time.time()
    
    # Train the model on the current fold, specifying validation data for validation metrics
    history = model.fit(X_train_fold, y_train_fold, epochs=5, batch_size=32, validation_data=(X_val_fold, y_val_fold), verbose=1)
    
    # Track the time taken for training
    time_taken = time.time() - start_time
    
    # Evaluate the model on the validation set of the current fold
    val_loss, val_accuracy = model.evaluate(X_val_fold, y_val_fold, verbose=0)
    
    # Extract training loss and accuracy from history
    train_loss = history.history['loss'][-1]  
    train_accuracy = history.history['accuracy'][-1]  
    
    # Append the results
    fold_accuracies.append(train_accuracy)
    fold_losses.append(train_loss)
    fold_val_accuracies.append(val_accuracy)
    fold_val_losses.append(val_loss)
    fold_times.append(time_taken)
    
    print(f"Fold {fold + 1} - Training Loss: {train_loss:.4f}, Training Accuracy: {train_accuracy:.4f}")
    print(f"Fold {fold + 1} - Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")
    print(f"Time taken for Fold {fold + 1}: {time_taken:.2f} seconds\n")

Training on Fold 1...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/5
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.5697 - loss: 1.6783 - val_accuracy: 0.8852 - val_loss: 0.5310
Epoch 2/5
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.8995 - loss: 0.4428 - val_accuracy: 0.9138 - val_loss: 0.3256
Epoch 3/5
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9188 - loss: 0.2952 - val_accuracy: 0.9236 - val_loss: 0.2701
Epoch 4/5
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9329 - loss: 0.2396 - val_accuracy: 0.9309 - val_loss: 0.2398
Epoch 5/5
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9401 - loss: 0.2118 - val_accuracy: 0.9356 - val_loss: 0.2246
Fold 1 - Training Loss: 0.2093, Training Accuracy: 0.9407
Fold 1 - Validation Loss: 0.2246, Validation Accuracy: 0.9356
Time taken for Fold 1: 24.03 seconds

Training on Fold 2...
Epoch

<h3>Calculate the average accuracy and loss across all folds</h3>

In [8]:
avg_train_accuracy = np.mean(fold_accuracies)
avg_train_loss = np.mean(fold_losses)
avg_val_accuracy = np.mean(fold_val_accuracies)
avg_val_loss = np.mean(fold_val_losses)
avg_time = np.mean(fold_times)

print(f"Average Training Accuracy: {avg_train_accuracy:.4f}")
print(f"Average Training Loss: {avg_train_loss:.4f}")
print(f"Average Validation Accuracy: {avg_val_accuracy:.4f}")
print(f"Average Validation Loss: {avg_val_loss:.4f}")
print(f"Average Time per Fold: {avg_time:.2f} seconds")

Average Training Accuracy: 0.9363
Average Training Loss: 0.2254
Average Validation Accuracy: 0.9332
Average Validation Loss: 0.2330
Average Time per Fold: 25.58 seconds


<h3>Using 'kullback_leibler_divergence' loss function</h3>

In [26]:
# Perform 5-fold cross-validation
for fold, (train_idx, val_idx) in enumerate(kf.split(X_combined_flattened, y_combined)):
    print(f"Training on Fold {fold + 1}...")
    
    # Split data into training and validation sets for the current fold
    X_train_fold, X_val_fold = X_combined_flattened[train_idx], X_combined_flattened[val_idx]
    y_train_fold, y_val_fold = y_combined[train_idx], y_combined[val_idx]
    
    # One-hot encode the labels for KL Divergence
    y_train_fold_one_hot = keras.utils.to_categorical(y_train_fold, 10)
    y_val_fold_one_hot = keras.utils.to_categorical(y_val_fold, 10)
    
    # Build the model (you can modify the architecture here if needed)
    model = keras.Sequential([
        keras.layers.Dense(16, input_shape=(784,), activation="sigmoid"),
        keras.layers.Dense(16, activation="sigmoid"),
        keras.layers.Dense(10, activation="sigmoid"),  
    ])
    
    # Compile the model using Kullback-Leibler Divergence as the loss function
    model.compile(optimizer="adam", loss=tf.keras.losses.KLDivergence(), metrics=["accuracy"])
    
    # Track the time before training starts
    start_time = time.time()
    
    # Train the model on the current fold, specifying validation data for validation metrics
    history = model.fit(X_train_fold, y_train_fold_one_hot, epochs=5, batch_size=32, validation_data=(X_val_fold, y_val_fold_one_hot), verbose=1)
    
    # Track the time taken for training
    time_taken = time.time() - start_time
    
    # Evaluate the model on the validation set of the current fold
    val_loss, val_accuracy = model.evaluate(X_val_fold, y_val_fold_one_hot, verbose=0)
    
    # Extract training loss and accuracy from history
    train_loss = history.history['loss'][-1]  # Last epoch training loss
    train_accuracy = history.history['accuracy'][-1]  # Last epoch training accuracy
    
    # Append the results
    fold_accuracies.append(train_accuracy)
    fold_losses.append(train_loss)
    fold_val_accuracies.append(val_accuracy)
    fold_val_losses.append(val_loss)
    fold_times.append(time_taken)
    
    print(f"Fold {fold + 1} - Training Loss: {train_loss:.4f}, Training Accuracy: {train_accuracy:.4f}")
    print(f"Fold {fold + 1} - Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")
    print(f"Time taken for Fold {fold + 1}: {time_taken:.2f} seconds\n")


Training on Fold 1...
Epoch 1/5
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.1559 - loss: 0.1503 - val_accuracy: 0.0959 - val_loss: 0.0026
Epoch 2/5
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.0994 - loss: 0.0017 - val_accuracy: 0.0959 - val_loss: 5.3152e-04
Epoch 3/5
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.0972 - loss: 4.0467e-04 - val_accuracy: 0.0959 - val_loss: 1.7102e-04
Epoch 4/5
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - accuracy: 0.0982 - loss: 1.3298e-04 - val_accuracy: 0.0959 - val_loss: 5.7335e-05
Epoch 5/5
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.1005 - loss: 4.3421e-05 - val_accuracy: 0.0986 - val_loss: 1.4661e-05
Fold 1 - Training Loss: 0.0000, Training Accuracy: 0.1004
Fold 1 - Validation Loss: 0.0000, Validation Accuracy: 0.0986
Time taken for 

In [28]:
# Calculate and print the average metrics across all folds
avg_train_accuracy = np.mean(fold_accuracies)
avg_train_loss = np.mean(fold_losses)
avg_val_accuracy = np.mean(fold_val_accuracies)
avg_val_loss = np.mean(fold_val_losses)
avg_time = np.mean(fold_times)

print(f"Average Training Accuracy: {avg_train_accuracy:.4f}")
print(f"Average Training Loss: {avg_train_loss:.4f}")
print(f"Average Validation Accuracy: {avg_val_accuracy:.4f}")
print(f"Average Validation Loss: {avg_val_loss:.4f}")
print(f"Average Time per Fold: {avg_time:.2f} seconds")

Average Training Accuracy: 0.5178
Average Training Loss: 0.1171
Average Validation Accuracy: 0.5163
Average Validation Loss: 0.1207
Average Time per Fold: 37.49 seconds
