In [2]:
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

### Idiomatic Programmer Code Labs

## Code Labs #3 - Get Familiar with Data Curation

## Prerequistes:

    1. Familiar with Python
    2. Completed Handbook 2/Part 13: Data Curation

## Objectives:

    1. Preprocessing a builtin dataset (cifar-10)
    2. Train dataset for a few epochs
    3. Use same model architecture for larger number of classes (cifar-100)
    4. Use image augmentation

## Setup

Let's import the builtin datasets for:

    CIFAR-10: 32x32 images, 10 classes, 60000 images (6000 per class)
    CIFAR-100: 32x32 images, 100 classes, 60000 images (600 per class)

In [3]:
from keras.datasets import cifar10, cifar100
from keras.utils import to_categorical

## Model Architecture

Let's start with a function that will create our model using a simple CNN architecture, as follows:

*Stem Group*

*Convolutional Blocks*

*Classifier*/

TODO

In [4]:
from keras import Input, Model
from keras.layers import Conv2D, MaxPooling2D, Dropout, BatchNormalization, GlobalAveragePooling2D, Dense

def convNet(input_shape, nclasses):
    def stem(inputs, nb_filters):
        ''' Stem Convolutional Group '''

        # Use two 3x3 convolutional layers (no downsampling, strides=1)
        x = Conv2D(nb_filters, (3, 3), strides=1, padding='same', activation='relu')(inputs)
        x = Conv2D(nb_filters, (3, 3), strides=1, padding='same', activation='relu')(x)

        # Downsample with Max Pooling
        x = MaxPooling2D(pool_size=(2, 2), strides=2, padding='same')(x)
        x = Dropout(0.25)(x)
        return x

    def conv_block(x, nb_filters):
        ''' Convolutional Block '''

        # A 3x3 and 1x1 factorization of two 3x3 convolutional layers
        x = Conv2D(nb_filters, (3, 3), strides=1, padding='same', activation='relu')(x)
        x = Conv2D(nb_filters, (1, 1), strides=1, padding='same', activation='relu')(x)

        # Downsample with Max Pooling
        x = MaxPooling2D(pool_size=(2, 2), strides=2, padding='same')(x)
        return x
    
    def classifier(x, nclasses):
        ''' Classifier '''
        x = GlobalAveragePooling2D()(x)
        x = Dense(128, activation='relu')(x)
        x = Dense(nclasses, activation='softmax')(x)
        return x


    # Input and Stem Group
    inputs = Input(input_shape)
    x = stem(inputs, 32)

    # Two Convolutional Blocks, each doubles the number of filters
    for nb_filters in [64, 128]:
        x = conv_block(x, nb_filters)

    outputs = classifier(x, nclasses)

    model = Model(inputs, outputs)

    # HERE
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

## CIFAR-10

TODO

In [5]:
import numpy as np
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

y_train = to_categorical(y_train)
y_test  = to_categorical(y_test)

# HERE
x_train = (x_train / 255.0).astype(np.float32)
x_test  = (x_test  / 255.0).astype(np.float32)

## Build the Model

```
Total params: 140,970
Trainable params: 140,970
Non-trainable params: 0
```

In [6]:
model = convNet((32, 32, 3), 10)
model.summary()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 32, 32, 3)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 32, 32, 32)        896       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 32, 32, 32)        9248      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 16, 16, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 16, 16, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)    

In [7]:

model.fit(x_train, y_train, batch_size=32, epochs=3, validation_split=0.1, verbose=1)
score = model.evaluate(x_test, y_test)
print(score)

Instructions for updating:
Use tf.cast instead.
Train on 45000 samples, validate on 5000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
[1.076918307876587, 0.612]


## CIFAR-100

In [8]:

(x_train, y_train), (x_test, y_test) = cifar100.load_data()

y_train = to_categorical(y_train)
y_test  = to_categorical(y_test)

# HERE
mean = np.mean(x_train)
std  = np.std(x_train)
x_train = ((x_train - mean) / std).astype(np.float32)
x_test  = ((x_test - mean) / std).astype(np.float32)

## Build the Model

```
Total params: 152,580
Trainable params: 152,580
Non-trainable params: 0
```

In [9]:
model = convNet((32, 32, 3), 100)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 32, 32, 3)         0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 32, 32, 32)        896       
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 32, 32, 32)        9248      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 16, 16, 32)        0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 16, 16, 32)        0         
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 16, 16, 64)        18496     
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 16, 16, 64)        4160      
__________

In [10]:
model.fit(x_train, y_train, batch_size=32, epochs=3, validation_split=0.1, verbose=1)
score = model.evaluate(x_test, y_test)
print(score)

Train on 45000 samples, validate on 5000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
[3.006009662628174, 0.2443]


## Image Augmentation with ImageDataGenerator

TODO

In [11]:
from keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(vertical_flip=True, horizontal_flip=True, rotation_range=30)

#HERE - Validation Split
pivot = int(len(x_train) * 0.9)
x_val = x_train[pivot:]
y_val = y_train[pivot:]
x_train = x_train[:pivot]
y_train = y_train[:pivot]

model = convNet((32, 32, 3), 100)
model.fit_generator(datagen.flow(x_train, y_train, batch_size=32), epochs=3, steps_per_epoch=len(x_train)/32, 
                    validation_data=(x_val, y_val), verbose=1)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f2a300be7b8>