<a id='1'></a>
<div class="alert alert-block alert-danger">
<h2>1 Import Packages</h2>
</div>

In [25]:
import warnings
warnings.filterwarnings('ignore')

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'    # ignore information messages

import tensorflow as tf
from tensorflow.keras.datasets import cifar10
# natural images of 10 different classes with 50000 training and 10000 test grayscale 32x32 pixels RGB images
from tensorflow.keras import Sequential, Input, Model
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, BatchNormalization
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.activations import relu

<a id='2'></a>
<div class="alert alert-block alert-danger">
   <h2>
    2 Load Dataset
    </h2>
</div>

In [56]:
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(50000, 32, 32, 3) (50000, 1)
(10000, 32, 32, 3) (10000, 1)


In [57]:
X_train = X_train.astype("float32") / 255.0
X_test = X_test.astype("float32") / 255.0

- X_train will be a numpy array float64. We can convert it to float32 to minimize some of the computation
- We will also normalize the value to between 0 and 1
- These are going to be numpy arrays

<a id='3'></a>
<div class="alert alert-block alert-danger">
   <h2>
    3 Create a basic neural network
    </h2>
</div>

<a id='31'></a>
<div class="alert alert-block alert-info">
   <h3>
        3.1 Sequential API
   </h3>
</div>

In [18]:
model = Sequential([
    Input(shape=(32, 32, 3)),
    Conv2D(32, 3, padding='valid', activation='relu'),   # (None, 30, 30, 32)
    # 32 -> outchannels. In the beginning it is; 3 -> kernel size (3,3)
    # padding -> valid(default):32,32 is gonna change depending on kernel size, same:32,32 will be same
    MaxPooling2D(pool_size=(2,2)),   # the input size pixels will be half (None, 15, 15, 32)
    Conv2D(64, 3, padding='valid', activation='relu'),
    MaxPooling2D(),
    Conv2D(128, 3, padding='valid', activation='relu'),
    Flatten(),
    Dense(64, activation='relu'),   # Dense is for a fully connected layer with nodes
    Dense(10)   # 10 -> (10 different classes)
])



In [21]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_13 (Conv2D)           (None, 30, 30, 32)        896       
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 15, 15, 32)        0         
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 13, 13, 64)        18496     
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 6, 6, 64)          0         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 4, 4, 128)         73856     
_________________________________________________________________
flatten_3 (Flatten)          (None, 2048)              0         
_________________________________________________________________
dense_6 (Dense)              (None, 64)               

- 225,034 very small network

In [22]:
# specify the training configuration (optimizer, loss, metrics)
model.compile(optimizer=Adam(lr=3e-4),   # learning rate
             loss=SparseCategoricalCrossentropy(from_logits=True),
              # from_logits=True -> Dense(10, activation='softmax')
             metrics=['accuracy'])

In [23]:
%%time
# train the model by slicing the data into "batches" of size batch_size, and repeatedly iterating 
# over the entire dataset for a given number of epochs.
model.fit(X_train, y_train, batch_size=64, epochs=5)
model.evaluate(X_test, y_test, batch_size=64)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
CPU times: user 20.4 s, sys: 18.9 s, total: 39.3 s
Wall time: 41.1 s


[1.0302128791809082, 0.6374000310897827]

In [62]:
def my_model():
    inputs = Input(shape=(32, 32, 3))
    x = Conv2D(32, 3)(inputs)
    x = BatchNormalization()(x)
    x = relu(x)
    x = MaxPooling2D()(x)
    x = Conv2D(64, 5, padding='same')(x)
    x = BatchNormalization()(x)
    x = relu(x)
    x = Conv2D(128, 3)(x)
    x = BatchNormalization()(x)
    x = relu(x)    
    x = Flatten()(x)
    x = Dense(64, activation='relu')(x) 
    outputs = Dense(10)(x) 
    model = Model(inputs=inputs, outputs=outputs)
    
    return model 

In [63]:
model = my_model()

In [64]:
model.summary()

Model: "model_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_37 (InputLayer)        [(None, 32, 32, 3)]       0         
_________________________________________________________________
conv2d_99 (Conv2D)           (None, 30, 30, 32)        896       
_________________________________________________________________
batch_normalization_67 (Batc (None, 30, 30, 32)        128       
_________________________________________________________________
tf.nn.relu_60 (TFOpLambda)   (None, 30, 30, 32)        0         
_________________________________________________________________
max_pooling2d_42 (MaxPooling (None, 15, 15, 32)        0         
_________________________________________________________________
conv2d_100 (Conv2D)          (None, 15, 15, 64)        51264     
_________________________________________________________________
batch_normalization_68 (Batc (None, 15, 15, 64)        256 

In [65]:
# specify the training configuration (optimizer, loss, metrics)
model.compile(optimizer=Adam(lr=3e-4),   # learning rate
             loss=SparseCategoricalCrossentropy(from_logits=True),
              # from_logits=True -> Dense(10, activation='softmax')
             metrics=['accuracy'])

In [66]:
%%time
# train the model by slicing the data into "batches" of size batch_size, and repeatedly iterating 
# over the entire dataset for a given number of epochs.
model.fit(X_train, y_train, batch_size=64, epochs=5)
model.evaluate(X_test, y_test, batch_size=64)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
CPU times: user 28.1 s, sys: 27.2 s, total: 55.3 s
Wall time: 1min 33s


[0.8995259404182434, 0.6968000531196594]

- the testset accuracy has not actually improved much
- the model is overfitting the training data 