In [26]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import KFold
import time
import matplotlib.pyplot as plt

<h3>Load MNIST dataset</h3>

In [27]:
(X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()

<h3>Combine the training and test sets into one</h3>

In [28]:
X_combined = np.concatenate([X_train, X_test], axis=0)
y_combined = np.concatenate([y_train, y_test], axis=0)

<h3>Normalize the images</h3>

In [29]:
X_combined = X_combined / 255.0

<h3>Flatten the images</h3>

In [30]:
X_combined_flattened = X_combined.reshape(len(X_combined), 28 * 28)

<h3>Initialize the KFold cross-validation</h3>

In [31]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Track the metrics for each fold
fold_accuracies = []
fold_losses = []
fold_val_accuracies = []
fold_val_losses = []
fold_times = []

<h3>Perform 5-fold cross-validation</h3>

In [32]:
for fold, (train_idx, val_idx) in enumerate(kf.split(X_combined_flattened, y_combined)):
    print(f"Training on Fold {fold + 1}...")
    
    # Split data into training and validation sets for the current fold
    X_train_fold, X_val_fold = X_combined_flattened[train_idx], X_combined_flattened[val_idx]
    y_train_fold, y_val_fold = y_combined[train_idx], y_combined[val_idx]
    
    # Build the model 
    model = keras.Sequential([
        keras.layers.Dense(16, input_shape=(784,), activation="sigmoid"),
        keras.layers.Dense(16, activation="sigmoid"),
        keras.layers.Dense(10, activation="sigmoid"),  
    ])
    
    model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    
    # Track the time before training starts
    start_time = time.time()
    
    # Train the model on the current fold, specifying validation data for validation metrics
    history = model.fit(X_train_fold, y_train_fold, epochs=5, batch_size=32, validation_data=(X_val_fold, y_val_fold), verbose=1)
    
    # Track the time taken for training
    time_taken = time.time() - start_time
    
    # Evaluate the model on the validation set of the current fold
    val_loss, val_accuracy = model.evaluate(X_val_fold, y_val_fold, verbose=0)
    
    # Extract training loss and accuracy from history
    train_loss = history.history['loss'][-1]  
    train_accuracy = history.history['accuracy'][-1]  
    
    # Append the results
    fold_accuracies.append(train_accuracy)
    fold_losses.append(train_loss)
    fold_val_accuracies.append(val_accuracy)
    fold_val_losses.append(val_loss)
    fold_times.append(time_taken)
    
    print(f"Fold {fold + 1} - Training Loss: {train_loss:.4f}, Training Accuracy: {train_accuracy:.4f}")
    print(f"Fold {fold + 1} - Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")
    print(f"Time taken for Fold {fold + 1}: {time_taken:.2f} seconds\n")

Training on Fold 1...
Epoch 1/5
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.5464 - loss: 1.6911 - val_accuracy: 0.8748 - val_loss: 0.5342
Epoch 2/5
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.8890 - loss: 0.4559 - val_accuracy: 0.9055 - val_loss: 0.3464
Epoch 3/5
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9153 - loss: 0.3058 - val_accuracy: 0.9229 - val_loss: 0.2780
Epoch 4/5
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9316 - loss: 0.2474 - val_accuracy: 0.9306 - val_loss: 0.2500
Epoch 5/5
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9388 - loss: 0.2202 - val_accuracy: 0.9347 - val_loss: 0.2306
Fold 1 - Training Loss: 0.2144, Training Accuracy: 0.9398
Fold 1 - Validation Loss: 0.2306, Validation Accuracy: 0.9347
Time taken for Fold 1: 29.86 seconds

Train

<h3>Calculate the average accuracy and loss across all folds</h3>

In [17]:
avg_train_accuracy = np.mean(fold_accuracies)
avg_train_loss = np.mean(fold_losses)
avg_val_accuracy = np.mean(fold_val_accuracies)
avg_val_loss = np.mean(fold_val_losses)
avg_time = np.mean(fold_times)

print(f"Average Training Accuracy: {avg_train_accuracy:.4f}")
print(f"Average Training Loss: {avg_train_loss:.4f}")
print(f"Average Validation Accuracy: {avg_val_accuracy:.4f}")
print(f"Average Validation Loss: {avg_val_loss:.4f}")
print(f"Average Time per Fold: {avg_time:.2f} seconds")

Average Training Accuracy: 0.9345
Average Training Loss: 0.2327
Average Validation Accuracy: 0.9307
Average Validation Loss: 0.2417
Average Time per Fold: 27.87 seconds


<h3>Using 'kullback_leibler_divergence' loss function</h3>