#The Following is a demonstration of Neural Networks working on the same dataset in order to compete and compare the strengths and weaknesses of each.

This is all the imports that will be used for the rest of the code cells

In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from keras.optimizers import SGD
from keras.regularizers import L2
from keras.constraints import max_norm
from sklearn.metrics import accuracy_score
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.datasets import cifar10
from tensorflow.keras. models import Sequential
from sklearn.metrics import classification_report
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.resnet50 import preprocess_input
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Flatten, Dropout, BatchNormalization


#Convolutional Neural Network

Here we see the model architecture with 10 layers reaching a result I wasn't able to further optimize with the 10 layer restriction.

In [None]:
# Loading the Data
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.cifar10.load_data()

# Data Preprocessing
train_images, test_images = train_images / 255.0, test_images / 255.0


# CNN model architecture
model = models.Sequential()

model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)))
model.add(layers.MaxPooling2D((3, 3)))

model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(BatchNormalization())

model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D(2,2))

model.add(layers.Conv2D(128, (3, 3), activation='relu'))

model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
# CIFAR-10 dataset has 10 classes so the last layer specifies that
model.add(layers.Dense(10))

# Hyperparameters set before training model
# learning rate and batch size
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

# demonstrating a summary of the model architecture
model.summary()



Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_27 (Conv2D)          (None, 30, 30, 32)        896       
                                                                 
 max_pooling2d_6 (MaxPoolin  (None, 10, 10, 32)        0         
 g2D)                                                            
                                                                 
 conv2d_28 (Conv2D)          (None, 8, 8, 64)          18496     
                                                                 
 batch_normalization_43 (Ba  (None, 8, 8, 64)          256       
 tchNormalization)                                               
                                                                 
 conv2d_29 (Conv2D)          (None, 6, 6, 64)          36928     
                                                                 
 max_pooling2d_7 (MaxPoolin  (None, 3, 3, 64)         

Here we see the model being trained and the epoch restriction of 10 being applied as well. This will result in a graph that demonstrates accuracy as well as validation accuracy.

In [None]:
history = model.fit(train_images, train_labels, epochs=10, validation_data=(test_images, test_labels))

test_loss, test_acc = model.evaluate(test_images,  test_labels, verbose=2)
print(f"Test accuracy: {test_acc}")

plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')
plt.show()

predictions = model.predict(test_images)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Here we see the results being adapted for a confusion matrix for better visualization.

In [None]:
# Model predictions prepped for confusion matrix
predictions = model.predict(test_images)
predicted_labels = np.argmax(predictions, axis=1)

# Create the confusion matrix
conf_matrix = confusion_matrix(test_labels, predicted_labels)

# Display the confusion matrix
labels = ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"]
disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix, display_labels=labels)
disp.plot(cmap='bone_r', values_format=".4g", xticks_rotation='vertical')

plt.show()


#Residual Neural Network

Here we see the model architecture for a Residual Neural Network. As you can tell we remain within the bounds of the 10 model layer limit and we continue on with the same process. Notice the differences between the CNN and this ResNet.

In [None]:
# Load CIFAR-10 dataset
(train_images, train_labels), (test_images, test_labels) = cifar10.load_data()

# Normalize pixel values to be between 0 and 1
train_images, test_images = train_images / 255.0, test_images / 255.0

# Convert labels to one-hot encoding
num_classes = 10
train_labels = to_categorical(train_labels, num_classes)
test_labels = to_categorical(test_labels, num_classes)

# Load pre-trained ResNet50 model
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(32, 32, 3))

# Freeze base model layers
for layer in base_model.layers:
    layer.trainable = False

# Normalize input data for ResNet50
train_images, test_images = preprocess_input(train_images), preprocess_input(test_images)

# Create a custom top for classification
model = models.Sequential()

model.add(base_model)
model.add(layers.Flatten())

model.add(layers.Dense(512, activation='relu', kernel_initializer='he_normal'))
model.add(BatchNormalization())

model.add(layers.Dense(128, activation='relu', kernel_initializer='he_normal'))
model.add(Dropout(0.5))
model.add(BatchNormalization())

model.add(layers.Dense(32, activation='relu', kernel_initializer='he_normal'))
model.add(BatchNormalization())

model.add(layers.Dense(num_classes, activation='softmax'))  # Output layer for classification

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Print model summary to check the architecture
model.summary()



Here we see the ResNet being trained and demonstating accuracy and validation results.

In [None]:
history = model.fit(train_images, train_labels, epochs=10, validation_data=(test_images, test_labels))

test_generator = ImageDataGenerator(preprocessing_function=preprocess_input).flow(test_images, test_labels, batch_size=32, shuffle=False)
test_loss, test_acc = model.evaluate_generator(test_generator, steps=len(test_generator), verbose=1)
print(f"Test accuracy: {test_acc}")

plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label='val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')
plt.show()


Here we see the preparation of the confusion matrix for the ResNet as well as the demonstration of the matrix.

In [None]:
true_labels = np.argmax(test_labels, axis=1)

predictions = model.predict(test_images)
predicted_labels = np.argmax(predictions, axis=1)

conf_matrix = confusion_matrix(true_labels, predicted_labels)

labels = ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"]
disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix, display_labels=labels)
disp.plot(cmap='bone_r', values_format=".4g", xticks_rotation='vertical')

plt.show()


#Capsule Neural Network

Here we reload the dataset as a precaution due to prior complications with the Capsule Network model architecture.

In [None]:
# Load CIFAR-10 dataset
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.cifar10.load_data()

# Normalize pixel values to be between 0 and 1
train_images, test_images = train_images / 255.0, test_images / 255.0

# Convert labels to integers
train_labels = train_labels.astype(int)
test_labels = test_labels.astype(int)

Here we see the model architecture for the CapsNet. The model layers are created within the build_capsule_network function. Notice how they differ from both the CNN and ResNet.

In [None]:
# Defining capsule layer class for CapsNet
class CapsuleLayer(layers.Layer):
    def __init__(self, num_capsules, capsule_dim, routings=3):
        super(CapsuleLayer, self).__init__()
        self.num_capsules = num_capsules
        self.capsule_dim = capsule_dim
        self.routings = routings
        self.activation = squash

    def build(self, input_shape):
        # Create a trainable weight variable for this layer.
        self.kernel = self.add_weight(
            name='capsule_kernel',
            shape=(self.num_capsules, input_shape[-1], self.capsule_dim),
            initializer='glorot_uniform',
            trainable=True,
        )
        super(CapsuleLayer, self).build(input_shape)

def call(self, inputs):
    # Expand the dimensions of the input tensor
    inputs_expanded = tf.expand_dims(inputs, axis=-2)

    # Tile the input tensor along the capsule dimensions
    inputs_tiled = tf.tile(inputs_expanded, [1, 1, 1, self.num_capsules, 1])

    # Tile the capsule kernel along the input dimensions
    kernel_tiled = tf.tile(self.kernel, [1, 1, 1, tf.shape(inputs)[-2]])

    # Compute the dot product between the input tensor and the capsule kernel
    votes = tf.reduce_sum(inputs_tiled * kernel_tiled, axis=-1)

    # Routing by agreement
    # Determines the weight between different capsules
    # between one layer and its subsequent layers.
    logit_shape = votes.shape[:-1]
    b = tf.zeros(logit_shape)
    for i in range(self.routings):
        c = tf.nn.softmax(b, axis=1)
        outputs = self.activation(tf.reduce_sum(c[..., None] * votes, axis=-2))
        if i < self.routings - 1:
            b += tf.reduce_sum(outputs[..., None] * votes, axis=-1)
    return outputs

    def compute_output_shape(self, input_shape):
        return (None, self.num_capsules, self.capsule_dim)

def squash(vector):
    squared_norm = tf.reduce_sum(tf.square(vector), axis=-1, keepdims=True)
    scale = squared_norm / (1 + squared_norm)
    return scale * vector / tf.sqrt(squared_norm + tf.keras.backend.epsilon())

# Define CapsuleLayer and squash function as you have done

def build_capsule_network(input_shape, num_classes):
    inputs = layers.Input(shape=input_shape)

    # Convolutional layers with Batch Normalization
    conv1 = layers.Conv2D(128, (3, 3), activation='relu')(inputs)
    conv1 = layers.BatchNormalization()(conv1)

    conv2 = layers.Conv2D(64, (3, 3), activation='relu')(conv1)
    conv2 = layers.BatchNormalization()(conv2)

    conv3 = layers.Conv2D(32, (3, 3), activation='relu')(conv2)
    conv3 = layers.BatchNormalization()(conv3)

    # Primary Capsule layer
    primary_capsules = CapsuleLayer(num_capsules=8, capsule_dim=16)(conv3)

    # Flatten Capsules for Output
    flattened_capsules = layers.Reshape((-1,))(primary_capsules)

    # Dense layers with Batch Normalization
    dense1 = layers.Dense(128, activation='relu')(flattened_capsules)
    dense1 = layers.BatchNormalization()(dense1)

    dense2 = layers.Dense(64, activation='relu')(dense1)
    dense2 = layers.BatchNormalization()(dense2)

    # Output Layer
    output_layer = layers.Dense(num_classes, activation='softmax')(dense2)

    # Building the model
    model = models.Model(inputs=inputs, outputs=output_layer)

    return model

# Learning rate schedule
def lr_schedule(epoch):
    lr = 1e-3
    if epoch > 5:
        lr *= 1e-1
    elif epoch > 10:
        lr *= 1e-2
    return lr

lr_scheduler = LearningRateScheduler(lr_schedule)

# Load CIFAR-10 dataset
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.cifar10.load_data()

# Normalize pixel values to be between 0 and 1
train_images, test_images = train_images / 255.0, test_images / 255.0

# Convert labels to integers
train_labels = train_labels.astype(int)
test_labels = test_labels.astype(int)

# Build and compile the model
num_classes = 10
model = build_capsule_network(input_shape=(32, 32, 3), num_classes=num_classes)
model.compile(optimizer=Adam(learning_rate=1e-3), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Display the model summary
model.summary()


Training and demonstrating the models accuracy.

In [None]:
history = model.fit(train_images, train_labels, epochs=10, validation_data=(test_images, test_labels))

test_generator = ImageDataGenerator(preprocessing_function=preprocess_input).flow(test_images, test_labels, batch_size=32, shuffle=False)
test_loss, test_acc = model.evaluate_generator(test_generator, steps=len(test_generator), verbose=1)
print(f"Test accuracy: {test_acc}")

plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label='val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')
plt.show()

The confusion matrix is prepared and presented for the CapsNet model.

In [None]:
predictions = model.predict(test_images)
predicted_labels = np.argmax(predictions, axis=1)

conf_matrix = confusion_matrix(test_labels, predicted_labels)

labels = ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"]
disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix, display_labels=labels)
disp.plot(cmap='bone_r', values_format=".4g", xticks_rotation='vertical')

plt.show()


An additional manual accuracy statement was added in due to complications with adding it into the previous code cells. I wanted to be persistent, however, so I demonstrated it by its lonesome.

In [None]:
accuracy = accuracy_score(test_labels, predicted_labels)
print(f"Manual Accuracy: {accuracy}")
