In [1]:
# This is to install necessary components to run the assignment
# Note: For compatability purposes, libraries have been updated to match to current versions; 
# hence some of the package invocations may differ slightly from the book
!pip install -r requirements.txt

[31mERROR: Could not find a version that satisfies the requirement numpy==1.26 (from versions: 1.3.0, 1.4.1, 1.5.0, 1.5.1, 1.6.0, 1.6.1, 1.6.2, 1.7.0, 1.7.1, 1.7.2, 1.8.0, 1.8.1, 1.8.2, 1.9.0, 1.9.1, 1.9.2, 1.9.3, 1.10.0.post2, 1.10.1, 1.10.2, 1.10.4, 1.11.0, 1.11.1, 1.11.2, 1.11.3, 1.12.0, 1.12.1, 1.13.0, 1.13.1, 1.13.3, 1.14.0, 1.14.1, 1.14.2, 1.14.3, 1.14.4, 1.14.5, 1.14.6, 1.15.0, 1.15.1, 1.15.2, 1.15.3, 1.15.4, 1.16.0, 1.16.1, 1.16.2, 1.16.3, 1.16.4, 1.16.5, 1.16.6, 1.17.0, 1.17.1, 1.17.2, 1.17.3, 1.17.4, 1.17.5, 1.18.0, 1.18.1, 1.18.2, 1.18.3, 1.18.4, 1.18.5, 1.19.0, 1.19.1, 1.19.2, 1.19.3, 1.19.4, 1.19.5)[0m
[31mERROR: No matching distribution found for numpy==1.26[0m


In [2]:
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import RMSprop, SGD, Adam
import matplotlib.pyplot as plt
import tensorflow as tf
import os


In [3]:
# CIFAR_10 is a set of 60K images 32 x 32 pixels on 3 channels
IMG_CHANNELS = 3
IMG_ROWS = 32
IMG_COLS = 32

# Constant 
BATCH_SIZE = 128
NB_EPOCH = 20
NB_CLASSES = 10
VERBOSE = 1
VALIDATION_SPLIT = 0.2
OPTIM = RMSprop()

# If an UNKNOWN ERROR LOG DISPLAYS, it's just tensorflow trying to connect to GPU. 
# It'll simply redirect and use the CPU!

In [4]:
# Load the datasets
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

X_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples


In [5]:
# One-hot Encoding & Normalization of images
# In the book, the older version is:
# Y_train = np_utils.to_categorical(y_train, NB_CLASSES)

Y_train = to_categorical(y_train, NB_CLASSES)
Y_test = to_categorical(y_test, NB_CLASSES)
                        
# float and normalization
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

In [6]:
# Network
model = Sequential()
model.add(Conv2D(32, (3, 3), padding="same",
                input_shape=(IMG_ROWS, IMG_COLS, IMG_CHANNELS)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(NB_CLASSES))
model.add(Activation('softmax'))
model.summary()
# Below red is simply a userwarning. 

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 32, 32, 32)        896       
_________________________________________________________________
activation (Activation)      (None, 32, 32, 32)        0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 16, 16, 32)        0         
_________________________________________________________________
dropout (Dropout)            (None, 16, 16, 32)        0         
_________________________________________________________________
flatten (Flatten)            (None, 8192)              0         
_________________________________________________________________
dense (Dense)                (None, 512)               4194816   
_________________________________________________________________
activation_1 (Activation)    (None, 512)               0

In [7]:
# Training (NOTE: THIS MAY TAKE SOME TIME. Go grab a cup of coffee!)
model.compile(loss="categorical_crossentropy", optimizer=OPTIM, metrics=["accuracy"])
model.fit(X_train, Y_train, batch_size=BATCH_SIZE, epochs=NB_EPOCH, validation_split=VALIDATION_SPLIT, verbose=VERBOSE)
score = model.evaluate(X_test, Y_test, batch_size=BATCH_SIZE, verbose=VERBOSE)
print('Test Score:', score[0])
print('Test Accuracy:', score[1])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test Score: 1.1128840446472168
Test Accuracy: 0.6585000157356262


In [8]:
# Save the model
os.makedirs('output', exist_ok=True)
model_json = model.to_json()
open('./output/cifar10_architecture.json', 'w').write(model_json)

# And the weights learned by our deep network on the training set
model.save_weights('./output/cifar10.weights.h5', overwrite=True) # NOTE, it is now cifar10.weights, not cifar10_weights

In [9]:
# Improving the CIFAR-10 performance with deeper network
model = Sequential()
model.add(Conv2D(32, (3,3), padding='same', input_shape=(IMG_ROWS, IMG_COLS, IMG_CHANNELS)))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(NB_CLASSES))
model.add(Activation('softmax'))

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 32, 32, 32)        896       
_________________________________________________________________
activation_3 (Activation)    (None, 32, 32, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 32, 32, 32)        9248      
_________________________________________________________________
activation_4 (Activation)    (None, 32, 32, 32)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 16, 16, 32)        0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 16, 16, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 16, 16, 64)       

In [10]:
# Improving the CIFAR-10 performance with data augmentation
from tensorflow.keras.preprocessing.image import ImageDataGenerator # Note, we are using Tensorflow's Keras package!
from keras.datasets import cifar10
import numpy as np
NUM_TO_AUGMENT = 5

In [11]:
# load dataset
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

# Augmenting
print("Augmenting training set images...")
datagen = ImageDataGenerator(rotation_range=40,
                            width_shift_range=0.2,
                            height_shift_range=0.2,
                            zoom_range=0.2,
                            horizontal_flip=True,
                            fill_mode='nearest')


# RUN THE BELOW TO CREATE IMAGES FROM THE ABOVE IMAGEDATAGENERATOR. 
# NOTE: IF YOU RUN IT, IT WILL TAKE A WHILE!

# xtas, ytas = [], []

#for i in range(X_train.shape[0]):
#    num_aug = 0
#     x = X_train[i]  # (32, 32, 3)
#     x = x.reshape((1,) + x.shape)  # (1, 32, 32, 3)
    
#     for x_aug in datagen.flow(x, batch_size=1, save_to_dir='preview', save_prefix='cifar', save_format='jpeg'):
#        if num_aug >= NUM_TO_AUGMENT:
#             break
#         xtas.append(x_aug[0])
#         ytas.append(y_train[i])  # save the label as well
#         num_aug += 1

Augmenting training set images...


In [12]:
optimizer = RMSprop() # Recreating the optimizer

#for the datagen
datagen.fit(X_train)

model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])


# train changed from model.fit_generator()
history = model.fit(
    datagen.flow(X_train, Y_train, batch_size=BATCH_SIZE),
    steps_per_epoch=X_train.shape[0] // BATCH_SIZE,
    epochs=NB_EPOCH,
    validation_data=tf.data.Dataset.from_tensor_slices((X_test, Y_test)).batch(BATCH_SIZE),
    verbose=VERBOSE
)

score = model.evaluate(X_test, Y_test, batch_size=BATCH_SIZE, verbose=VERBOSE)

print("Test Score:", score[0])
print("Test Accuracy:", score[1])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test Score: 1.2094701528549194
Test Accuracy: 0.5601000189781189


# MARKDOWN WORK HERE

- Analyze the ethical and privacy implications of the algorithm you just created here!