# Custom CNN Architecture for Image Classification

This notebook implements and trains a custom CNN architecture from scratch.

## Objectives:
- Design custom CNN architecture
- Implement data preprocessing and augmentation
- Train the model with proper validation
- Evaluate performance and visualize results
- Save the best model

## 1. Setup and Data Loading

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU Available: {tf.config.list_physical_devices('GPU')}")

In [None]:
# Load dataset (adjust based on your choice from notebook 01)
DATASET_CHOICE = "cifar10"  # or "animals10"

if DATASET_CHOICE == "cifar10":
    (x_train, y_train), (x_test, y_test) = cifar10.load_data()
    class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 
                   'dog', 'frog', 'horse', 'ship', 'truck']
    num_classes = 10

print(f"Training data shape: {x_train.shape}")
print(f"Test data shape: {x_test.shape}")
print(f"Number of classes: {num_classes}")

## 2. Data Preprocessing and Augmentation

In [None]:
# Normalize pixel values to [0, 1]
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# Convert labels to categorical one-hot encoding
y_train_cat = to_categorical(y_train, num_classes)
y_test_cat = to_categorical(y_test, num_classes)

print(f"Normalized data range: [{x_train.min():.3f}, {x_train.max():.3f}]")
print(f"Labels shape: {y_train_cat.shape}")

In [None]:
# Create train/validation split
from sklearn.model_selection import train_test_split

x_train_split, x_val_split, y_train_split, y_val_split = train_test_split(
    x_train, y_train_cat, test_size=0.2, random_state=42, stratify=y_train
)

print(f"Training set: {x_train_split.shape}")
print(f"Validation set: {x_val_split.shape}")
print(f"Test set: {x_test.shape}")

In [None]:
# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    zoom_range=0.1,
    fill_mode='nearest'
)

# Fit the data generator
datagen.fit(x_train_split)

print("Data augmentation configured:")
print(f"- Rotation: ±15 degrees")
print(f"- Width/Height shift: ±10%")
print(f"- Horizontal flip: True")
print(f"- Zoom: ±10%")

## 3. Custom CNN Architecture Design

In [None]:
def create_custom_cnn(input_shape, num_classes):
    """
    Create a custom CNN architecture.
    
    Architecture pattern: Conv2D → BatchNorm → ReLU → MaxPool → Dropout
    """
    model = models.Sequential([
        # Block 1
        layers.Conv2D(32, (3, 3), input_shape=input_shape, padding='same'),
        layers.BatchNormalization(),
        layers.ReLU(),
        layers.Conv2D(32, (3, 3), padding='same'),
        layers.BatchNormalization(),
        layers.ReLU(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        
        # Block 2
        layers.Conv2D(64, (3, 3), padding='same'),
        layers.BatchNormalization(),
        layers.ReLU(),
        layers.Conv2D(64, (3, 3), padding='same'),
        layers.BatchNormalization(),
        layers.ReLU(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        
        # Block 3
        layers.Conv2D(128, (3, 3), padding='same'),
        layers.BatchNormalization(),
        layers.ReLU(),
        layers.Conv2D(128, (3, 3), padding='same'),
        layers.BatchNormalization(),
        layers.ReLU(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        
        # Classification head
        layers.GlobalAveragePooling2D(),
        layers.Dense(128, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation='softmax')
    ])
    
    return model

# Create the model
input_shape = x_train.shape[1:]  # (32, 32, 3) for CIFAR-10
custom_model = create_custom_cnn(input_shape, num_classes)

# Display model architecture
custom_model.summary()

## 4. Model Compilation and Training Setup

In [None]:
# Compile the model
custom_model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Setup callbacks
callbacks = [
    EarlyStopping(
        monitor='val_accuracy',
        patience=15,
        restore_best_weights=True,
        verbose=1
    ),
    ModelCheckpoint(
        '../models/custom_cnn_best.h5',
        monitor='val_accuracy',
        save_best_only=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=5,
        min_lr=1e-7,
        verbose=1
    )
]

print("Model compiled with:")
print(f"- Optimizer: Adam")
print(f"- Loss: Categorical Crossentropy")
print(f"- Metrics: Accuracy")
print(f"- Callbacks: Early Stopping, Model Checkpoint, LR Reduction")

## 5. Model Training

In [None]:
# Training parameters
BATCH_SIZE = 32
EPOCHS = 100

print(f"Starting training...")
print(f"Batch size: {BATCH_SIZE}")
print(f"Max epochs: {EPOCHS}")
print(f"Steps per epoch: {len(x_train_split) // BATCH_SIZE}")

# Train the model
history = custom_model.fit(
    datagen.flow(x_train_split, y_train_split, batch_size=BATCH_SIZE),
    steps_per_epoch=len(x_train_split) // BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(x_val_split, y_val_split),
    callbacks=callbacks,
    verbose=1
)

print("\nTraining completed!")

## 6. Training History Visualization

In [None]:
def plot_training_history(history):
    """
    Plot training and validation metrics.
    """
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
    
    # Plot accuracy
    ax1.plot(history.history['accuracy'], label='Training Accuracy', linewidth=2)
    ax1.plot(history.history['val_accuracy'], label='Validation Accuracy', linewidth=2)
    ax1.set_title('Model Accuracy Over Time')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Accuracy')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # Plot loss
    ax2.plot(history.history['loss'], label='Training Loss', linewidth=2)
    ax2.plot(history.history['val_loss'], label='Validation Loss', linewidth=2)
    ax2.set_title('Model Loss Over Time')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Loss')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    # Print best metrics
    best_val_acc = max(history.history['val_accuracy'])
    best_val_acc_epoch = history.history['val_accuracy'].index(best_val_acc) + 1
    
    print(f"\nBest validation accuracy: {best_val_acc:.4f} at epoch {best_val_acc_epoch}")
    print(f"Final training accuracy: {history.history['accuracy'][-1]:.4f}")
    print(f"Final validation accuracy: {history.history['val_accuracy'][-1]:.4f}")

plot_training_history(history)

## 7. Model Evaluation on Test Set

In [None]:
# Evaluate on test set
test_loss, test_accuracy = custom_model.evaluate(x_test, y_test_cat, verbose=0)
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test Loss: {test_loss:.4f}")

# Generate predictions
y_pred = custom_model.predict(x_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test_cat, axis=1)

# Classification report
print("\nClassification Report:")
print(classification_report(y_true_classes, y_pred_classes, target_names=class_names))

## 8. Confusion Matrix Visualization

In [None]:
# Generate and plot confusion matrix
cm = confusion_matrix(y_true_classes, y_pred_classes)

plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
           xticklabels=class_names, yticklabels=class_names)
plt.title('Confusion Matrix - Custom CNN')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.xticks(rotation=45)
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()

# Calculate per-class accuracy
class_accuracy = cm.diagonal() / cm.sum(axis=1)
print("\nPer-class Accuracy:")
for i, class_name in enumerate(class_names):
    print(f"{class_name}: {class_accuracy[i]:.4f}")

## 9. Model Saving and Results Summary

In [None]:
# Save the final model
model_filename = f'../models/custom_cnn_{DATASET_CHOICE}_final.h5'
custom_model.save(model_filename)
print(f"Model saved as: {model_filename}")

# Save training history
import pickle
with open(f'../models/custom_cnn_{DATASET_CHOICE}_history.pkl', 'wb') as f:
    pickle.dump(history.history, f)
print(f"Training history saved")

# Results summary
print("\n" + "="*50)
print("CUSTOM CNN RESULTS SUMMARY")
print("="*50)
print(f"Dataset: {DATASET_CHOICE.upper()}")
print(f"Architecture: Custom CNN with {custom_model.count_params():,} parameters")
print(f"Training epochs: {len(history.history['accuracy'])}")
print(f"Best validation accuracy: {max(history.history['val_accuracy']):.4f}")
print(f"Final test accuracy: {test_accuracy:.4f}")
print(f"Model saved: {model_filename}")
print("="*50)