<a href="https://colab.research.google.com/github/Jason-Gitau/Neural-Networks/blob/main/cifar10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from IPython import get_ipython
from IPython.display import display
# %%
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense ,Dropout, BatchNormalization
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator # Import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau # Import callbacks
from tensorflow.keras.optimizers import Adam # Import Adam optimizer to set learning rate

In [2]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 0us/step


In [3]:
# checking the shape of the dataset
print("Original shape:", x_train.shape)

Original shape: (50000, 32, 32, 3)


In [4]:
# Normalize pixel values
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

In [5]:
# one-hot encode the labels
num_classes = 10  # CIFAR-10 has 10 classes
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)
print("One-hot encoded y_train shape:", y_train.shape)
print("One-hot encoded y_test shape:", y_test.shape)

One-hot encoded y_train shape: (50000, 10)
One-hot encoded y_test shape: (10000, 10)


In [6]:
# Implement Data Augmentation
datagen = ImageDataGenerator(
    rotation_range=10,       # randomly rotate images by 15 degrees
    width_shift_range=0.05,   # randomly shift images horizontally by 10%
    height_shift_range=0.1,  # randomly shift images vertically by 10%
    horizontal_flip=True,    # randomly flip images horizontally
    zoom_range=0.1,          # randomly zoom in/out by 10%
)

# Fit the generator on your training data (important for transformations like zca_whitening, not for basic ones)
#datagen.fit(x_train) # Only necessary if you use featurewise_center/std_normalization or zca_whitening


In [7]:
# # Define the CNN model with Dropout layers and Batch Normalization
# model = Sequential([
#     Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(32, 32, 3), padding='same'),
#     BatchNormalization(),
#     MaxPooling2D(pool_size=(2, 2)),
#     Dropout(0.25),
#     Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same'),
#     BatchNormalization(),
#     MaxPooling2D(pool_size=(2, 2)),
#     Dropout(0.25),
#     Flatten(),
#     Dense(128, activation='relu'),
#     Dropout(0.5),
#     Dense(10, activation='softmax')
# ])

# # %%
# # Compile the model with a reduced initial learning rate
# optimizer = Adam(learning_rate=0.00005) # Reduce initial learning rate
# model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# # Print model summary
# model.summary()



# Helper function to create a convolutional block
def conv_block(filters, kernel_size=(3, 3), padding='same', dropout_rate=0.25):
    """
    Creates a convolutional block with two Conv2D layers, Batch Normalization,
    MaxPooling, and Dropout.
    """
    return [
        Conv2D(filters, kernel_size=kernel_size, activation='relu', padding=padding),
        BatchNormalization(),
        Conv2D(filters, kernel_size=kernel_size, activation='relu', padding=padding),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2, 2)),
        Dropout(dropout_rate)
    ]

# Define the CNN model
model = Sequential()

# Input Block: First convolutional block with input shape
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', padding='same', input_shape=(32, 32, 3)))
model.add(BatchNormalization())
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

# Second Convolutional Block
# model.add(*conv_block(filters=64, dropout_rate=0.25)) # Original incorrect line
for layer in conv_block(filters=64, dropout_rate=0.25): # Iterate through the layers returned by conv_block
    model.add(layer) # Add each layer individually

# Third Convolutional Block
# model.add(*conv_block(filters=128, dropout_rate=0.25)) # Original incorrect line
for layer in conv_block(filters=128, dropout_rate=0.25): # Iterate through the layers returned by conv_block
    model.add(layer) # Add each layer individually


# Fully Connected Layers
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Print model summary
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
# Implement Early Stopping and ReduceLROnPlateau callbacks (more aggressive)
early_stopping = EarlyStopping(monitor='val_loss', patience=6, restore_best_weights=True) # Keep patience at 4 for now
reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2,       # Reduce learning rate by 20%
    patience=4,       # Reduce after just 2 epochs of no val_loss improvement
    min_lr=0.00001,   # Minimum learning rate
    verbose=1         # Print message when learning rate is reduced
)

# Train the model using data augmentation and increased batch size
BATCH_SIZE = 64 # Increase batch size
history = model.fit(
    datagen.flow(x_train, y_train, batch_size=BATCH_SIZE), # Use the data generator with increased batch size
    epochs=50, # You can set a higher number of epochs, Early Stopping will stop training when needed
    steps_per_epoch=x_train.shape[0] // BATCH_SIZE, # Update steps per epoch for the new batch size
    validation_data=(x_test, y_test),
    callbacks=[early_stopping, reduce_lr] # Add both callbacks
)




  self._warn_if_super_not_called()


Epoch 1/50
[1m279/781[0m [32m━━━━━━━[0m[37m━━━━━━━━━━━━━[0m [1m4:56[0m 592ms/step - accuracy: 0.2670 - loss: 2.4073

In [None]:
# Evaluate on test set
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

In [None]:
import matplotlib.pyplot as plt

# Plot accuracy
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

# Plot loss
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()