<a href="https://colab.research.google.com/github/P-eter-shi/Ai_for_web/blob/main/MnistDatasetmodel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import matplotlib.pyplot as plt

# Set random seed for reproducibility
tf.random.set_seed(42)
np.random.seed(42)

# Load and preprocess the MNIST dataset
def load_and_preprocess_data():
    (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

    # Normalize pixel values to [0, 1]
    x_train = x_train.astype("float32") / 255.0
    x_test = x_test.astype("float32") / 255.0

    # Add channel dimension (for CNN)
    x_train = np.expand_dims(x_train, -1)
    x_test = np.expand_dims(x_test, -1)

    # Convert labels to one-hot encoding
    y_train = keras.utils.to_categorical(y_train, 10)
    y_test = keras.utils.to_categorical(y_test, 10)

    return (x_train, y_train), (x_test, y_test)

# Build the CNN model
def build_model():
    model = keras.Sequential([
        layers.Input(shape=(28, 28, 1)),

        # First convolutional block
        layers.Conv2D(32, kernel_size=(3, 3), activation="relu", padding="same"),
        layers.BatchNormalization(),
        layers.Conv2D(32, kernel_size=(3, 3), activation="relu", padding="same"),
        layers.BatchNormalization(),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Dropout(0.25),

        # Second convolutional block
        layers.Conv2D(64, kernel_size=(3, 3), activation="relu", padding="same"),
        layers.BatchNormalization(),
        layers.Conv2D(64, kernel_size=(3, 3), activation="relu", padding="same"),
        layers.BatchNormalization(),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Dropout(0.25),

        # Classifier head
        layers.Flatten(),
        layers.Dense(256, activation="relu"),
        layers.BatchNormalization(),
        layers.Dropout(0.5),
        layers.Dense(10, activation="softmax")
    ])

    return model

# Train the model
def train_model(model, x_train, y_train, x_test, y_test):
    # Compile the model
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=0.001),
        loss="categorical_crossentropy",
        metrics=["accuracy"]
    )

    # Define callbacks
    callbacks = [
        keras.callbacks.EarlyStopping(
            monitor="val_accuracy",
            patience=5,
            restore_best_weights=True
        ),
        keras.callbacks.ReduceLROnPlateau(
            monitor="val_loss",
            factor=0.2,
            patience=3,
            min_lr=1e-6
        )
    ]

    # Train the model
    history = model.fit(
        x_train,
        y_train,
        batch_size=128,
        epochs=50,
        validation_split=0.1,
        callbacks=callbacks,
        verbose=1
    )

    return history

# Evaluate the model
def evaluate_model(model, x_test, y_test):
    score = model.evaluate(x_test, y_test, verbose=0)
    print(f"Test loss: {score[0]:.4f}")
    print(f"Test accuracy: {score[1]:.4f}")
    return score

# Visualize predictions on sample images
def visualize_predictions(model, x_test, y_test, num_samples=5):
    # Get random samples
    indices = np.random.choice(len(x_test), size=num_samples, replace=False)
    sample_images = x_test[indices]
    sample_labels = y_test[indices]

    # Get model predictions
    predictions = model.predict(sample_images)
    predicted_classes = np.argmax(predictions, axis=1)
    true_classes = np.argmax(sample_labels, axis=1)

    # Plot the samples with predictions
    plt.figure(figsize=(12, 6))
    for i in range(num_samples):
        plt.subplot(1, num_samples, i+1)
        plt.imshow(sample_images[i].squeeze(), cmap="gray")
        plt.title(f"Pred: {predicted_classes[i]}\nTrue: {true_classes[i]}")
        plt.axis("off")
    plt.tight_layout()
    plt.show()

# Main execution
def main():
    # Load and preprocess data
    (x_train, y_train), (x_test, y_test) = load_and_preprocess_data()
    print(f"Training data shape: {x_train.shape}")
    print(f"Test data shape: {x_test.shape}")

    # Build model
    model = build_model()
    model.summary()

    # Train model
    history = train_model(model, x_train, y_train, x_test, y_test)

    # Evaluate model
    test_loss, test_acc = evaluate_model(model, x_test, y_test)

    # Visualize predictions
    visualize_predictions(model, x_test, y_test)

    # Save the model
    model.save("mnist_cnn_model.h5")
    print("Model saved as mnist_cnn_model.h5")

if __name__ == "__main__":
    main()





Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Training data shape: (60000, 28, 28, 1)
Test data shape: (10000, 28, 28, 1)


Epoch 1/50
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m246s[0m 568ms/step - accuracy: 0.8826 - loss: 0.3935 - val_accuracy: 0.6655 - val_loss: 0.9220 - learning_rate: 0.0010
Epoch 2/50
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m258s[0m 560ms/step - accuracy: 0.9794 - loss: 0.0637 - val_accuracy: 0.9908 - val_loss: 0.0362 - learning_rate: 0.0010
Epoch 3/50
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m258s[0m 552ms/step - accuracy: 0.9848 - loss: 0.0477 - val_accuracy: 0.9918 - val_loss: 0.0281 - learning_rate: 0.0010
Epoch 4/50
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m266s[0m 562ms/step - accuracy: 0.9872 - loss: 0.0406 - val_accuracy: 0.9917 - val_loss: 0.0294 - learning_rate: 0.0010
Epoch 5/50
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m262s[0m 563ms/step - accuracy: 0.9892 - loss: 0.0333 - val_accuracy: 0.9928 - val_loss: 0.0252 - learning_rate: 0.0010
Epoch 6/50
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━

Ethical Considerations and Bias Mitigation
Potential Biases in the MNIST Model:

    Data Collection Bias:

        MNIST primarily contains digits written by English-speaking contributors

        May not represent global variations in handwriting styles

        Potential underrepresentation of certain digit styles (e.g., European "1" vs. American "1")

    Model Architecture Bias:

        CNN may prioritize certain spatial patterns common in Western handwriting

        May struggle with digits written in unconventional styles

    Evaluation Bias:

        Accuracy metric alone doesn't capture fairness across different subgroups