In [7]:
%pip install matplotlib seaborn scikit-learn pandas

Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [8]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
import pandas as pd


np.random.seed(42)
tf.random.set_seed(42)



# Loading and Preprocessing

In [27]:
class dataLoader:
    """Load and preprocess CIFAR-10 Dataset"""
    
    def __init__(self):
        self.num_classes = 10
        self.class_names = ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"]
        
        
        
    def loadData(self):
        """Load dataset"""
        print("Loading CIFAR-10 dataset.")
        (x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
        
        print(f"Training data shape: {x_train.shape}")
        print(f"Test data shape: {x_test.shape}")
        return (x_train, y_train), (x_test, y_test)
        
    def preprocessData(self, x_train, y_train, x_test, y_test, normalize = True):
        if normalize:
            x_train = x_train.astype('float32') / 255.0
            x_test = x_test.astype('float32') / 255.0
            
        y_train_c = keras.utils.to_categorical(y_train, self.num_classes)
        y_test_c = keras.utils.to_categorical(y_test, self.num_classes)
        
        return x_train, y_train_c, x_test, y_test_c
    
    def createValidationSplit(self, x_train, y_train, val_split = 0.1):
        valSize = int(len(x_train)* val_split)
        
        x_val = x_train[-valSize:]
        y_val = y_train[-valSize:]
        x_train = x_train[:-valSize]
        y_train = y_train[:-valSize]
        
        print(f"Training set: {x_train.shape[0]} samples")
        print(f"Validation set: {x_val.shape[0]} samples")
        
        return x_train, y_train, x_val, y_val

# Data Augmentation

In [17]:
def createDataAug():
    data_aug = keras.Sequential([
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.1),
        layers.RandomZoom(0.1),
        layers.RandomContrast(0.1),
    ], name = "data_augmentaion")
    
    return data_aug

# CNN model Architectures

In [18]:
class CNNArchitectures:
    @staticmethod
    def simpleCNN(input_shape=(32, 32, 3), num_classes=10):
        model = models.Sequential([
            layers.Conv2D(32, (3, 3), activation="relu", padding="same", input_shape = input_shape),
            layers.BatchNormalization(),
            layers.MaxPooling2D((2, 2)),
            layers.Dropout(0.25),
            
            layers.Conv2D(32, (3, 3), activation="relu", padding="same"),
            layers.BatchNormalization(),
            layers.MaxPooling2D((2, 2)),
            layers.Dropout(0.25),
            
            layers.BatchNormalization(),
            layers.MaxPooling2D((2, 2)),
            layers.Dropout(0.25),
            
            layers.Flatten(),
            layers.Dense(256, activation="relu"),
            layers.BatchNormalization(),
            layers.Dropout(0.5),
            layers.Dense(num_classes, activation="softmax")
        ], name = "SimpleCNN")
        
        return model
    
    @staticmethod
    def deep_cnn(input_shape=(32, 32, 3), num_classes=10):
        model = models.Sequential([
            layers.Conv2D(64, (3, 3), activation='relu', padding="same", input_shape=input_shape),
            layers.BatchNormalization(),
            layers.Conv2D(64, (3, 3), activation="relu", padding = "same"),
            layers.BatchNormalization(),
            layers.MaxPooling2D((2, 2)),
            layers.Dropout(0.2),
            
            layers.Conv2D(128, (3, 3), activation = "relu", padding="same"),
            layers.BatchNormalization(),
            layers.Conv2D(128, (3, 3), activation="relu", padding="same"),
            layers.BatchNormalization(),
            layers.MaxPooling2D((2,2)),
            layers.Dropout(0.3),
            
            layers.Conv2D(256, (3, 3), activation="relu", padding="same"),
            layers.BatchNormalization(),
            layers.Conv2D(256, (3, 3), activation="relu", padding = "same"),
            layers.BatchNormalization(),
            layers.MaxPooling2D((2, 2)),
            layers.Dropout(0.4),
            
            layers.Flatten(),
            layers.Dense(512, activation="relu"),
            layers.BatchNormalization(),
            layers.Dropout(0.5),
            layers.Densed(num_classes, activation="softmax"),
        ], name="DeepCNN")
        
        return model
    
    @staticmethod
    def vgg_model(input_shape=(32, 32, 3), num_classes=10):
        model= models.Sequential([
            layers.Conv2D(64, (3, 3), activation="relu", padding="same"),
            layers.Conv2D(64, (3, 3), activation="relu", padding="same"),
            layers.MaxPooling2D((2, 2)), 
            layers.BatchNormalization(),
            layers.Dropout(0.2),
            
            layers.Conv2D(128, (3, 3), activation="relu", padding="same"),
            layers.Conv2D(128, (3, 3), activation="relu", padding="same"),
            layers.MaxPooling2D((2, 2)),
            layers.BatchNormalization(),
            layers.Dropout(0.3),
            
            layers.Conv2D(256, (3, 3), activation="relu", padding="same"),
            layers.Conv2D(128, (3, 3), activation="relu", padding="same"),
            layers.MaxPooling2D((2, 2)), 
            layers.BatchNormalization(),
            layers.Dropout(0.4),
            
            layers.Flatten(),
            layers.Dense(512, (3, 3), activation="relu"),
            layers.BatchNormalization(),
            layers.Dense(512, (3, 3), activation="relu"),
            layers.BatchNormalization(),
            layers.Dropout(0.5), 
            layers.Dense(num_classes, activation="softmax")          
        ], name="VGG_Style")
        
        return model
    
    @staticmethod
    def resnet_style(input_shape=(32, 32, 3), num_classes=10):
        inputs=layers.Input(shape=input_shape)
        
        x = layers.Conv2D(64, 3, padding="same")(inputs)
        x = layers.BatchNormalization()(x)
        x = layers.Activation("relu")(x)
        
        x = CNNArchitectures.residual_block(x, 64)
        x = CNNArchitectures.residual_block(x, 64)
        
        x = CNNArchitectures.residual_block(x, 128, stride=2)
        x = CNNArchitectures.residual_block(x, 128)
        
        x = CNNArchitectures.residual_block(x, 256, stride=2)
        x = CNNArchitectures.residual_block(x, 256)
        
        x = layers.GlobalAveragePooling2D()(x)
        x = layers.Dropout(0.5)(x)
        
        outputs = layers.Dense(num_classes, activation="softmax")(x)
        
        model = models.Model(inputs, outputs, name = "ResNet_Style")
        
        return model
        
        

# Training functions

In [None]:
def compile_and_train(model, x_train, y_train, x_val, y_val, epochs=50, batch_size = 64, learning_rate=0.001, use_augmentation=True):
    #compile
    optimizer = keras.optimizers.Adam(learning_rate = learning_rate)
    model.compile(
        optimizer=optimizer,
        loss = 'categorical_crossentropy',
        metrics = ['accuracy', keras.metrics.TopKCategoricalAccuracy(k=3, name = "top3_accuracy")]
    )
    #callbacks and data augmentation got these functions from claude
    callbacks = [
        keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience = 10,
            restore_best_weights=True
        ),
        keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience = 5,
            min_lr = 1e-7
        ),
        keras.callbacks.ModelCheckpoint(
            'best_model.h5',
            monitor='val_accuracy',
            save_best_only = True
        )        
    ]
    
    if use_augmentation:
        data_aug = createDataAug()
        train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
        train_dataset = train_dataset.shuffle(1000).batch(batch_size)
        train_dataset = train_dataset.map(
            lambda x, y: (data_aug(x, training=True), y),
            num_parallel_calls = tf.data.AUTOTUNE
        )
        train_Dataset = train_dataset.prefetch(tf.data.AUTOTUNE)
        
        history = model.fit(
            train_dataset, epochs = epochs,
            validation_data = (x_val, y_val), 
            callbacks = callbacks,
            verbose=0
        )
    else:
        history = model.fit(
            x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data = (x_val, y_val),
            callbacks = callbacks,
            verbose=1
        )

# Evaluation functins

In [20]:
def evaluate_model(model, x_test, y_test, class_names):
    print("MODEL EVAALUATION")
    
    y_pred = model.predict(x_test, verbose = 0)
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_true_classes = np.argmax(y_test, axis=1)
    
    test_loss, test_acc, test_top3 = model.evaluate(x_test, y_test, verbose = 0)
    print(f"\nTest Loss: {test_loss}")
    print(f"Test Accuracy: {test_acc} ({test_acc * 100}%)")
    print(f"Top-3 Accuracy: {test_top3} ({test_top3 * 100}%)")
    
    print(classification_report(y_true_classes, y_pred_classes, target_names=class_names, digits=4))
    
    for i, class_name in enumerate(class_names):
        class_mask = y_true_classes == i
        class_acc = np.mean(y_pred_classes[class_mask] == i)
        print(f" {class_names}:{class_acc}")
        
    return y_pred_classes

Load data


In [28]:
data_loader = dataLoader()
(x_train, y_train), (x_test, y_test) = data_loader.loadData()


x_train, y_train_cat, x_test, y_test_cat = data_loader.preprocessData(
    x_train, y_train, x_test, y_test
)

x_train, y_train_cat, x_val, y_val = data_loader.createValidationSplit(x_train, y_train_cat, val_split=0.1)

Loading CIFAR-10 dataset.
Training data shape: (50000, 32, 32, 3)
Test data shape: (10000, 32, 32, 3)
Training set: 45000 samples
Validation set: 5000 samples


train multiple architectures

In [30]:
architectures={
    "simpleCNN":CNNArchitectures.simpleCNN,
    'deepCNN':CNNArchitectures.deep_cnn,
    'vgg':CNNArchitectures.vgg_model,
    'resnet':CNNArchitectures.resnet_style
}

results={}

model = architectures['simpleCNN']()
model.summary

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<bound method Model.summary of <Sequential name=SimpleCNN, built=True>>

In [None]:
history = compile_and_train(model, x_train, y_train_cat, x_val, y_val,
                            epochs=30,
                            batch_size=64,
                            learning_rate=0.001,
                            use_augmentation=True
                            )
results['simpleCNN'] = {
    'model':model,
    'history': history
}

y_pred = evaluate_model(model, x_test, y_test_cat, data_loader.class_names)

y_true = np.argmax(y_test_cat, axis = 1)
y_true



In [None]:
model = architectures['deepcnn']()
model.summary
history = compile_and_train(model, x_train, y_train_cat, x_val, y_val,
                            epochs=30,
                            batch_size=64,
                            learning_rate=0.001,
                            use_augmentation=True
                            )
results['deepcnn'] = {
    'model':model,
    'history': history
}

y_pred = evaluate_model(model, x_test, y_test_cat, data_loader.class_names)

y_true = np.argmax(y_test_cat, axis = 1)
y_true

In [None]:
model = architectures['vgg']()
model.summary
history = compile_and_train(model, x_train, y_train_cat, x_val, y_val,
                            epochs=30,
                            batch_size=64,
                            learning_rate=0.001,
                            use_augmentation=True
                            )
results['vgg'] = {
    'model':model,
    'history': history
}

y_pred = evaluate_model(model, x_test, y_test_cat, data_loader.class_names)

y_true = np.argmax(y_test_cat, axis = 1)
y_true

In [None]:
model = architectures['resnet']()
model.summary
history = compile_and_train(model, x_train, y_train_cat, x_val, y_val,
                            epochs=30,
                            batch_size=64,
                            learning_rate=0.001,
                            use_augmentation=True
                            )
results['resnet'] = {
    'model':model,
    'history': history
}

y_pred = evaluate_model(model, x_test, y_test_cat, data_loader.class_names)

y_true = np.argmax(y_test_cat, axis = 1)
y_true