<a href="https://colab.research.google.com/github/AakritiGuragain/mnist_even_odd_classifier/blob/main/mnist_even_odd_classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt

In [None]:
def create_model(learning_rate=0.01, hidden_units=[128, 64]):

    model = keras.Sequential([
        # Input layer: Flatten 28x28 images to 784 features
        keras.layers.Flatten(input_shape=(28, 28)),

        # First hidden layer
        keras.layers.Dense(hidden_units[0], activation='relu'),
        keras.layers.Dropout(0.2),  # Prevent overfitting

        # Second hidden layer
        keras.layers.Dense(hidden_units[1], activation='relu'),
        keras.layers.Dropout(0.2),

        # Output layer (binary classification)
        keras.layers.Dense(1, activation='sigmoid')
    ])
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )

    return model


In [None]:
def prepare_data():

    print("Loading MNIST dataset...")
    # Load MNIST (automatically downloads if not present)
    (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

    # Normalize pixel values to [0, 1]
    x_train = x_train.astype('float32') / 255.0
    x_test = x_test.astype('float32') / 255.0

    # Convert labels to binary: even=0, odd=1
    y_train_binary = y_train % 2
    y_test_binary = y_test % 2

    print(f"Training samples: {x_train.shape[0]}")
    print(f"Testing samples: {x_test.shape[0]}")
    print(f"Image shape: {x_train.shape[1:]}")

    return x_train, y_train_binary, x_test, y_test_binary

In [None]:
def train_and_evaluate(x_train, y_train, x_test, y_test,
                       learning_rate=0.01, epochs=10, batch_size=32,
                       hidden_units=[128, 64], model_name="Baseline"):

    print(f"\n{'='*60}")
    print(f"Training: {model_name}")
    print(f"Learning Rate: {learning_rate}, Epochs: {epochs}, Batch Size: {batch_size}")
    print(f"Hidden Units: {hidden_units}")
    print(f"{'='*60}")
    # Create and train model
    model = create_model(learning_rate, hidden_units)

    history = model.fit(
        x_train, y_train,
        epochs=epochs,
        batch_size=batch_size,
        validation_split=0.1,
        verbose=1
    )
    test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=0)

    results = {
        'name': model_name,
        'learning_rate': learning_rate,
        'epochs': epochs,
        'batch_size': batch_size,
        'hidden_units': hidden_units,
        'test_accuracy': test_accuracy * 100,
        'test_loss': test_loss,
        'history': history
    }

    print(f"Test Accuracy: {test_accuracy * 100:.2f}%")
    print(f"Test Loss: {test_loss:.4f}")

    return results

In [None]:
def plot_comparison(results_list):

    fig, axes = plt.subplots(1, 2, figsize=(14, 5))

    # Plot accuracy
    for result in results_list:
        axes[0].plot(result['history'].history['accuracy'],
                    label=f"{result['name']} (Train)")
        axes[0].plot(result['history'].history['val_accuracy'],
                    label=f"{result['name']} (Val)", linestyle='--')

    axes[0].set_xlabel('Epoch')
    axes[0].set_ylabel('Accuracy')
    axes[0].set_title('Model Accuracy Comparison')
    axes[0].legend()
    axes[0].grid(True)

    # Plot test accuracy comparison
    names = [r['name'] for r in results_list]
    test_accs = [r['test_accuracy'] for r in results_list]
    colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']

    axes[1].bar(names, test_accs, color=colors[:len(names)])
    axes[1].set_ylabel('Test Accuracy (%)')
    axes[1].set_title('Final Test Accuracy Comparison')
    axes[1].set_ylim([90, 100])

    for i, v in enumerate(test_accs):
        axes[1].text(i, v + 0.2, f'{v:.2f}%', ha='center', fontweight='bold')

    plt.tight_layout()
    plt.show()

In [None]:
if __name__ == "__main__":
    # Load dataset (automatically downloaded)
    x_train, y_train, x_test, y_test = prepare_data()

    # Store results for comparison
    all_results = []

    # ========== EXPERIMENT 1: Baseline Model ==========
    results1 = train_and_evaluate(
        x_train, y_train, x_test, y_test,
        learning_rate=0.01,
        epochs=10,
        batch_size=32,
        hidden_units=[128, 64],
        model_name="Baseline Model"
    )
    all_results.append(results1)

Loading MNIST dataset...
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Training samples: 60000
Testing samples: 10000
Image shape: (28, 28)

Training: Baseline Model
Learning Rate: 0.01, Epochs: 10, Batch Size: 32
Hidden Units: [128, 64]


  super().__init__(**kwargs)


Epoch 1/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 6ms/step - accuracy: 0.9240 - loss: 0.1939 - val_accuracy: 0.9762 - val_loss: 0.0739
Epoch 2/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - accuracy: 0.9695 - loss: 0.0935 - val_accuracy: 0.9780 - val_loss: 0.0686
Epoch 3/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.9711 - loss: 0.0846 - val_accuracy: 0.9805 - val_loss: 0.0670
Epoch 4/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - accuracy: 0.9756 - loss: 0.0710 - val_accuracy: 0.9785 - val_loss: 0.0712
Epoch 5/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - accuracy: 0.9747 - loss: 0.0750 - val_accuracy: 0.9813 - val_loss: 0.0572
Epoch 6/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - accuracy: 0.9805 - loss: 0.0639 - val_accuracy: 0.9808 - val_loss: 0.0725
Epoch 7/10


In [None]:
results2 = train_and_evaluate(
        x_train, y_train, x_test, y_test,
        learning_rate=0.05,  # Increased learning rate
        epochs=10,
        batch_size=32,
        hidden_units=[128, 64],
        model_name="Higher LR (0.05)"
    )
all_results.append(results2)



Training: Higher LR (0.05)
Learning Rate: 0.05, Epochs: 10, Batch Size: 32
Hidden Units: [128, 64]
Epoch 1/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - accuracy: 0.8467 - loss: 0.4166 - val_accuracy: 0.9472 - val_loss: 0.1518
Epoch 2/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - accuracy: 0.8992 - loss: 0.2621 - val_accuracy: 0.9400 - val_loss: 0.1783
Epoch 3/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.8704 - loss: 0.3145 - val_accuracy: 0.9360 - val_loss: 0.1677
Epoch 4/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - accuracy: 0.8860 - loss: 0.2762 - val_accuracy: 0.9370 - val_loss: 0.1845
Epoch 5/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - accuracy: 0.8655 - loss: 0.3091 - val_accuracy: 0.9460 - val_loss: 0.1651
Epoch 6/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s

In [None]:
results3 = train_and_evaluate(
        x_train, y_train, x_test, y_test,
        learning_rate=0.01,
        epochs=20,  # More training epochs
        batch_size=32,
        hidden_units=[128, 64],
        model_name="More Epochs (20)"
    )
all_results.append(results3)


Training: More Epochs (20)
Learning Rate: 0.01, Epochs: 20, Batch Size: 32
Hidden Units: [128, 64]
Epoch 1/20
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 6ms/step - accuracy: 0.9140 - loss: 0.2113 - val_accuracy: 0.9718 - val_loss: 0.0810
Epoch 2/20
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - accuracy: 0.9676 - loss: 0.0989 - val_accuracy: 0.9728 - val_loss: 0.0766
Epoch 3/20
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - accuracy: 0.9711 - loss: 0.0841 - val_accuracy: 0.9822 - val_loss: 0.0617
Epoch 4/20
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - accuracy: 0.9748 - loss: 0.0771 - val_accuracy: 0.9762 - val_loss: 0.0938
Epoch 5/20
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 7ms/step - accuracy: 0.9754 - loss: 0.0751 - val_accuracy: 0.9812 - val_loss: 0.0701
Epoch 6/20
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[

In [None]:
results4 = train_and_evaluate(
        x_train, y_train, x_test, y_test,
        learning_rate=0.01,
        epochs=10,
        batch_size=32,
        hidden_units=[256, 128],  # Larger hidden layers
        model_name="Larger Network"
    )
all_results.append(results4)


Training: Larger Network
Learning Rate: 0.01, Epochs: 10, Batch Size: 32
Hidden Units: [256, 128]
Epoch 1/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 8ms/step - accuracy: 0.9186 - loss: 0.2089 - val_accuracy: 0.9725 - val_loss: 0.0911
Epoch 2/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 7ms/step - accuracy: 0.9670 - loss: 0.1004 - val_accuracy: 0.9800 - val_loss: 0.0715
Epoch 3/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 8ms/step - accuracy: 0.9720 - loss: 0.0837 - val_accuracy: 0.9815 - val_loss: 0.0745
Epoch 4/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 8ms/step - accuracy: 0.9740 - loss: 0.0807 - val_accuracy: 0.9835 - val_loss: 0.0736
Epoch 5/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 8ms/step - accuracy: 0.9758 - loss: 0.0740 - val_accuracy: 0.9820 - val_loss: 0.0804
Epoch 6/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s

In [None]:
print("\n" + "="*60)
print("SUMMARY OF ALL EXPERIMENTS")
print("="*60)

for result in all_results:
        print(f"{result['name']:<25} | Test Accuracy: {result['test_accuracy']:.2f}%")


SUMMARY OF ALL EXPERIMENTS
Baseline Model            | Test Accuracy: 97.94%
Higher LR (0.05)          | Test Accuracy: 93.73%
More Epochs (20)          | Test Accuracy: 98.35%
Larger Network            | Test Accuracy: 98.05%


In [None]:
best = max(all_results, key=lambda x: x['test_accuracy'])
print(f"\nBest Model: {best['name']} with {best['test_accuracy']:.2f}% accuracy")


Best Model: More Epochs (20) with 98.35% accuracy
