<a href="https://colab.research.google.com/github/Mmabatho/AI-For-Software-Engineeering-Week-3/blob/Letshego/task2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

# Set random seed for reproducibility
tf.random.set_seed(42)
np.random.seed(42)

class MNISTCNNClassifier:
    def __init__(self):
        self.model = None
        self.history = None
        self.x_train = None
        self.y_train = None
        self.x_test = None
        self.y_test = None

    def load_and_preprocess_data(self):
        """Load and preprocess MNIST dataset"""
        print("Loading MNIST dataset...")
        (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

        # Normalize pixel values to [0, 1]
        x_train = x_train.astype('float32') / 255.0
        x_test = x_test.astype('float32') / 255.0

        # Reshape data to add channel dimension (28, 28, 1)
        x_train = x_train.reshape(-1, 28, 28, 1)
        x_test = x_test.reshape(-1, 28, 28, 1)

        # Convert labels to categorical one-hot encoding
        y_train = keras.utils.to_categorical(y_train, 10)
        y_test = keras.utils.to_categorical(y_test, 10)

        self.x_train = x_train
        self.y_train = y_train
        self.x_test = x_test
        self.y_test = y_test

        print(f"Training data shape: {x_train.shape}")
        print(f"Test data shape: {x_test.shape}")
        print(f"Number of classes: {y_train.shape[1]}")

    def build_model(self):
        """Build CNN model architecture"""
        model = keras.Sequential([
            # First Convolutional Block
            layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
            layers.BatchNormalization(),
            layers.Conv2D(32, (3, 3), activation='relu'),
            layers.MaxPooling2D((2, 2)),
            layers.Dropout(0.25),

            # Second Convolutional Block
            layers.Conv2D(64, (3, 3), activation='relu'),
            layers.BatchNormalization(),
            layers.Conv2D(64, (3, 3), activation='relu'),
            layers.MaxPooling2D((2, 2)),
            layers.Dropout(0.25),

            # Third Convolutional Block
            layers.Conv2D(128, (3, 3), activation='relu'),
            layers.BatchNormalization(),
            layers.Dropout(0.25),

            # Flatten and Dense layers
            layers.Flatten(),
            layers.Dense(512, activation='relu'),
            layers.BatchNormalization(),
            layers.Dropout(0.5),
            layers.Dense(10, activation='softmax')
        ])

        self.model = model
        print("Model architecture built successfully!")
        return model

    def compile_model(self):
        """Compile the model with optimizer, loss, and metrics"""
        self.model.compile(
            optimizer=keras.optimizers.Adam(learning_rate=0.001),
            loss='categorical_crossentropy',
            metrics=['accuracy']
        )
        print("Model compiled successfully!")

    def train_model(self, epochs=5, batch_size=128, validation_split=0.1):
        """Train the CNN model"""
        print("Starting training...")

        # Callbacks for better training
        callbacks = [
            keras.callbacks.EarlyStopping(
                monitor='val_accuracy',
                patience=3,
                restore_best_weights=True
            ),
            keras.callbacks.ReduceLROnPlateau(
                monitor='val_loss',
                factor=0.2,
                patience=2,
                min_lr=0.0001
            )
        ]

        # Train the model
        self.history = self.model.fit(
            self.x_train, self.y_train,
            batch_size=batch_size,
            epochs=epochs,
            validation_split=validation_split,
            callbacks=callbacks,
            verbose=1
        )

        print("Training completed!")

    def evaluate_model(self):
        """Evaluate the model on test data"""
        print("Evaluating model on test data...")

        # Evaluate on test set
        test_loss, test_accuracy = self.model.evaluate(self.x_test, self.y_test, verbose=0)
        print(f"Test Accuracy: {test_accuracy:.4f} ({test_accuracy*100:.2f}%)")
        print(f"Test Loss: {test_loss:.4f}")

        # Make predictions
        y_pred = self.model.predict(self.x_test)
        y_pred_classes = np.argmax(y_pred, axis=1)
        y_true_classes = np.argmax(self.y_test, axis=1)

        # Classification report
        print("\nClassification Report:")
        print(classification_report(y_true_classes, y_pred_classes))

        return test_accuracy, test_loss

    def plot_training_history(self):
        """Plot training history"""
        if self.history is None:
            print("No training history available. Train the model first.")
            return

        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))

        # Plot accuracy
        ax1.plot(self.history.history['accuracy'], label='Training Accuracy')
        ax1.plot(self.history.history['val_accuracy'], label='Validation Accuracy')
        ax1.set_title('Model Accuracy')
        ax1.set_xlabel('Epoch')
        ax1.set_ylabel('Accuracy')
        ax1.legend()
        ax1.grid(True)

        # Plot loss
        ax2.plot(self.history.history['loss'], label='Training Loss')
        ax2.plot(self.history.history['val_loss'], label='Validation Loss')
        ax2.set_title('Model Loss')
        ax2.set_xlabel('Epoch')
        ax2.set_ylabel('Loss')
        ax2.legend()
        ax2.grid(True)

        plt.tight_layout()
        plt.show()

    def visualize_predictions(self, num_samples=5):
        """Visualize model predictions on sample images"""
        # Get random sample indices
        sample_indices = np.random.choice(len(self.x_test), num_samples, replace=False)

        # Get predictions
        predictions = self.model.predict(self.x_test[sample_indices])
        predicted_classes = np.argmax(predictions, axis=1)
        true_classes = np.argmax(self.y_test[sample_indices], axis=1)

        # Plot the images with predictions
        fig, axes = plt.subplots(1, num_samples, figsize=(15, 3))

        for i in range(num_samples):
            ax = axes[i] if num_samples > 1 else axes

            # Display image
            ax.imshow(self.x_test[sample_indices[i]].reshape(28, 28), cmap='gray')

            # Set title with prediction and confidence
            confidence = np.max(predictions[i]) * 100
            title_color = 'green' if predicted_classes[i] == true_classes[i] else 'red'

            ax.set_title(f'True: {true_classes[i]}\nPred: {predicted_classes[i]}\nConf: {confidence:.1f}%',
                        color=title_color, fontsize=10)
            ax.axis('off')

        plt.tight_layout()
        plt.show()

        # Print detailed predictions
        print("\nDetailed Predictions:")
        for i in range(num_samples):
            print(f"Sample {i+1}:")
            print(f"  True Label: {true_classes[i]}")
            print(f"  Predicted Label: {predicted_classes[i]}")
            print(f"  Confidence: {np.max(predictions[i]):.4f}")
            print(f"  Correct: {'Yes' if predicted_classes[i] == true_classes[i] else 'No'}")
            print()

    def plot_confusion_matrix(self):
        """Plot confusion matrix"""
        # Make predictions
        y_pred = self.model.predict(self.x_test)
        y_pred_classes = np.argmax(y_pred, axis=1)
        y_true_classes = np.argmax(self.y_test, axis=1)

        # Create confusion matrix
        cm = confusion_matrix(y_true_classes, y_pred_classes)

        # Plot confusion matrix
        plt.figure(figsize=(10, 8))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                   xticklabels=range(10), yticklabels=range(10))
        plt.title('Confusion Matrix')
        plt.xlabel('Predicted Label')
        plt.ylabel('True Label')
        plt.show()

    def get_model_summary(self):
        """Print model summary"""
        if self.model is None:
            print("Model not built yet. Build the model first.")
            return

        print("Model Architecture Summary:")
        self.model.summary()

def main():
    """Main function to run the complete pipeline"""
    print("=" * 60)
    print("CNN HANDWRITTEN DIGIT CLASSIFIER")
    print("=" * 60)

    # Initialize classifier
    classifier = MNISTCNNClassifier()

    # Load and preprocess data
    classifier.load_and_preprocess_data()

    # Build and compile model
    classifier.build_model()
    classifier.compile_model()

    # Display model architecture
    classifier.get_model_summary()

    # Train the model
    classifier.train_model(epochs=15, batch_size=128)

    # Plot training history
    classifier.plot_training_history()

    # Evaluate the model
    test_accuracy, test_loss = classifier.evaluate_model()

    # Check if target accuracy is achieved
    if test_accuracy > 0.95:
        print(f"✅ Target accuracy achieved! ({test_accuracy*100:.2f}% > 95%)")
    else:
        print(f"❌ Target accuracy not achieved. ({test_accuracy*100:.2f}% < 95%)")

    # Visualize predictions on sample images
    classifier.visualize_predictions(num_samples=5)

    # Plot confusion matrix
    classifier.plot_confusion_matrix()

    print("=" * 60)
    print("CLASSIFICATION COMPLETE!")
    print("=" * 60)

if __name__ == "__main__":
    main()

CNN HANDWRITTEN DIGIT CLASSIFIER
Loading MNIST dataset...
Training data shape: (60000, 28, 28, 1)
Test data shape: (10000, 28, 28, 1)
Number of classes: 10
Model architecture built successfully!
Model compiled successfully!
Model Architecture Summary:


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Starting training...
Epoch 1/15
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m161s[0m 368ms/step - accuracy: 0.8186 - loss: 0.6340 - val_accuracy: 0.3020 - val_loss: 3.7974 - learning_rate: 0.0010
Epoch 2/15
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m157s[0m 372ms/step - accuracy: 0.9714 - loss: 0.0906 - val_accuracy: 0.9840 - val_loss: 0.0539 - learning_rate: 0.0010
Epoch 3/15
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m198s[0m 364ms/step - accuracy: 0.9796 - loss: 0.0650 - val_accuracy: 0.9910 - val_loss: 0.0298 - learning_rate: 0.0010
Epoch 4/15
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m156s[0m 370ms/step - accuracy: 0.9829 - loss: 0.0539 - val_accuracy: 0.9915 - val_loss: 0.0291 - learning_rate: 0.0010
Epoch 5/15
[1m 17/422[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2:13[0m 330ms/step - accuracy: 0.9853 - loss: 0.0411