In [0]:
import numpy as np
import tensorflow as tf
import os

In [0]:
BATCH_SIZE = 256 #@param ["512", "256", "128"] {type:"raw"}
MOMENTUM = 0.9 #@param ["0.9", "0.95", "0.975"] {type:"raw"}
WEIGHT_DECAY = 0.000125 #@param ["0.000125", "0.00025", "0.0005"] {type:"raw"}
LEARNING_RATE = 0.4 #@param ["0.4", "0.2", "0.1"] {type:"raw"}
EPOCHS = 50 #@param {type:"slider", min:0, max:100, step:1}
WARMUP = 5 #@param {type:"slider", min:0, max:24, step:1}

In [0]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()
n_train, n_test = X_train.shape[0], X_test.shape[0]
img_size = X_train.shape[1]
n_classes = y_train.max() + 1

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [0]:
n_classes

10

In [0]:
y_train.shape

(50000, 1)

In [0]:
X_train_mean = np.mean(X_train, axis=(0,1,2))
X_train_std = np.std(X_train, axis=(0,1,2))
X_train = (X_train - X_train_mean) / X_train_std
X_test = (X_test - X_train_mean) / X_train_std

#Data Augmentation

In [0]:
from imgaug import augmenters as iaa
from keras.preprocessing.image import ImageDataGenerator


In [0]:
img_aug1 = iaa.Sequential([
    iaa.Fliplr(1), # horizontally flip 50% of the images
    iaa.CropAndPad(px=(4, 4)),#crops/pads images by defined amounts in pixels or percent (relative to input image size)

],random_order=True)

In [0]:

def get_random_eraser(p=0.5, s_l=0.02, s_h=0.4, r_1=0.3, r_2=1/0.3, v_l=0, v_h=255, pixel_level=False):
    def eraser(input_img):
        img_h, img_w, img_c = input_img.shape
        p_1 = np.random.rand()

        if p_1 > p:
            return input_img

        while True:
            s = np.random.uniform(s_l, s_h) * img_h * img_w
            r = np.random.uniform(r_1, r_2)
            w = int(np.sqrt(s / r))
            h = int(np.sqrt(s * r))
            left = np.random.randint(0, img_w)
            top = np.random.randint(0, img_h)

            if left + w <= img_w and top + h <= img_h:
                break

        if pixel_level:
            c = np.random.uniform(v_l, v_h, (h, w, img_c))
        else:
            c = np.random.uniform(v_l, v_h)

        input_img[top:top + h, left:left + w, :] = c

        return input_img

    return eraser

In [0]:
def normalize_img(img):
  img = img/255
  img = img - norms
  img = img/stds
  return img

def pad_img(img):
  return np.pad(img, ((4,4),(4,4),(0,0)), mode='constant',)

def random_crop(x, random_crop_size):
    w, h = x.shape[0], x.shape[1]
    rangew = (w - random_crop_size[0]) // 2
    rangeh = (h - random_crop_size[1]) // 2
    offsetw = 0 if rangew == 0 else np.random.randint(rangew)
    offseth = 0 if rangeh == 0 else np.random.randint(rangeh)
    return x[offsetw:offsetw+random_crop_size[0], offseth:offseth+random_crop_size[1],:]
  
def reg_fn(x):
  x = pad_img(x)
  x = random_crop(x,[32,32])
  x = get_random_eraser(v_l=0, v_h=1, pixel_level=True)(x)
  return x

In [0]:
import os
import numpy as np
import warnings

from keras.callbacks import Callback
from keras import backend as K


# Code is ported from https://github.com/fastai/fastai
class OneCycleLR(Callback):
    def __init__(
                 self,
                 epochs,
                 batch_size,
                 samples,
                 steps,
                 max_lr,
                 end_percentage=0.1,
          scale_percentage=None,
                 scale=None,
                 maximum_momentum=0.95,
                 minimum_momentum=0.85,
                 verbose=True):
        """ This callback implements a cyclical learning rate policy (CLR).
        This is a special case of Cyclic Learning Rates, where we have only 1 cycle.
        After the completion of 1 cycle, the learning rate will decrease rapidly to
        100th its initial lowest value.
        # Arguments:
            max_lr: Float. Initial learning rate. This also sets the
                starting learning rate (which will be 10x smaller than
                this), and will increase to this value during the first cycle.
            end_percentage: Float. The percentage of all the epochs of training
                that will be dedicated to sharply decreasing the learning
                rate after the completion of 1 cycle. Must be between 0 and 1.
            scale_percentage: Float or None. If float, must be between 0 and 1.
                If None, it will compute the scale_percentage automatically
                based on the `end_percentage`.
            maximum_momentum: Optional. Sets the maximum momentum (initial)
                value, which gradually drops to its lowest value in half-cycle,
                then gradually increases again to stay constant at this max value.
                Can only be used with SGD Optimizer.
            minimum_momentum: Optional. Sets the minimum momentum at the end of
                the half-cycle. Can only be used with SGD Optimizer.
            verbose: Bool. Whether to print the current learning rate after every
                epoch.
        # Reference
            - [A disciplined approach to neural network hyper-parameters: Part 1 -- learning rate, batch size, weight_decay, and weight decay](https://arxiv.org/abs/1803.09820)
            - [Super-Convergence: Very Fast Training of Residual Networks Using Large Learning Rates](https://arxiv.org/abs/1708.07120)
        """
        super(OneCycleLR, self).__init__()

        if end_percentage < 0. or end_percentage > 1.:
            raise ValueError("`end_percentage` must be between 0 and 1")

        if scale_percentage is not None and (scale_percentage < 0. or scale_percentage > 1.):
            raise ValueError("`scale_percentage` must be between 0 and 1")

        self.initial_lr = max_lr
        self.end_percentage = end_percentage
        self.scale = float(scale_percentage) if scale_percentage is not None else float(end_percentage)
        self.max_momentum = maximum_momentum
        self.min_momentum = minimum_momentum
        self.verbose = verbose

        if self.max_momentum is not None and self.min_momentum is not None:
            self._update_momentum = True
        else:
            self._update_momentum = False

        self.clr_iterations = 0.
        self.history = {}

        self.epochs = epochs
        self.batch_size = batch_size
        self.samples = samples
        self.steps = steps
        self.num_iterations = None
        self.mid_cycle_id = None

    def _reset(self):
        """
        Reset the callback.
        """
        self.clr_iterations = 0.
        self.history = {}

    def compute_lr(self):
        """
        Compute the learning rate based on which phase of the cycle it is in.
        - If in the first half of training, the learning rate gradually increases.
        - If in the second half of training, the learning rate gradually decreases.
        - If in the final `end_percentage` portion of training, the learning rate
            is quickly reduced to near 100th of the original min learning rate.
        # Returns:
            the new learning rate
        """
        if self.clr_iterations > 2 * self.mid_cycle_id:
            current_percentage = (self.clr_iterations - 2 * self.mid_cycle_id)
            current_percentage /= float((self.num_iterations - 2 * self.mid_cycle_id))
            new_lr = self.initial_lr * (1. + (current_percentage *
                                              (1. - 100.) / 100.)) * self.scale

        elif self.clr_iterations > self.mid_cycle_id:
            current_percentage = 1. - (
                self.clr_iterations - self.mid_cycle_id) / self.mid_cycle_id
            new_lr = self.initial_lr * (1. + current_percentage *
                                        (self.scale * 100 - 1.)) * self.scale

        else:
            current_percentage = self.clr_iterations / self.mid_cycle_id
            new_lr = self.initial_lr * (1. + current_percentage *
                                        (self.scale * 100 - 1.)) * self.scale

        if self.clr_iterations == self.num_iterations:
            self.clr_iterations = 0

        return new_lr

    def compute_momentum(self):
        """
         Compute the momentum based on which phase of the cycle it is in.
        - If in the first half of training, the momentum gradually decreases.
        - If in the second half of training, the momentum gradually increases.
        - If in the final `end_percentage` portion of training, the momentum value
            is kept constant at the maximum initial value.
        # Returns:
            the new momentum value
        """
        if self.clr_iterations > 2 * self.mid_cycle_id:
            new_momentum = self.max_momentum

        elif self.clr_iterations > self.mid_cycle_id:
            current_percentage = 1. - ((self.clr_iterations - self.mid_cycle_id) / float(
                                        self.mid_cycle_id))
            new_momentum = self.max_momentum - current_percentage * (
                self.max_momentum - self.min_momentum)

        else:
            current_percentage = self.clr_iterations / float(self.mid_cycle_id)
            new_momentum = self.max_momentum - current_percentage * (
                self.max_momentum - self.min_momentum)

        return new_momentum

    def on_train_begin(self, logs={}):
        logs = logs or {}

        if self.steps is not None:
            self.num_iterations = self.epochs * self.steps
        else:
            if (self.samples % self.batch_size) == 0:
                remainder = 0
            else:
                remainder = 1
            self.num_iterations = (self.epochs + remainder) * self.samples // self.batch_size

        self.mid_cycle_id = int(self.num_iterations * ((1. - self.end_percentage)) / float(2))

        self._reset()
        K.set_value(self.model.optimizer.lr, self.compute_lr())

        if self._update_momentum:
            if not hasattr(self.model.optimizer, 'momentum'):
                raise ValueError("Momentum can be updated only on SGD optimizer !")

            new_momentum = self.compute_momentum()
            K.set_value(self.model.optimizer.momentum, new_momentum)

    def on_batch_end(self, epoch, logs=None):
        logs = logs or {}

        self.clr_iterations += 1
        new_lr = self.compute_lr()

        self.history.setdefault('lr', []).append(
            K.get_value(self.model.optimizer.lr))
        K.set_value(self.model.optimizer.lr, new_lr)

        if self._update_momentum:
            if not hasattr(self.model.optimizer, 'momentum'):
                raise ValueError("Momentum can be updated only on SGD optimizer !")

            new_momentum = self.compute_momentum()

            self.history.setdefault('momentum', []).append(
                K.get_value(self.model.optimizer.momentum))
            K.set_value(self.model.optimizer.momentum, new_momentum)

        for k, v in logs.items():
            self.history.setdefault(k, []).append(v)

    def on_epoch_end(self, epoch, logs=None):
        if self.verbose:
            if self._update_momentum:
                print(" - lr: %0.5f - momentum: %0.2f " %
                      (self.history['lr'][-1], self.history['momentum'][-1]))

            else:
                print(" - lr: %0.5f " % (self.history['lr'][-1]))






In [0]:
#train_datagen = ImageDataGenerator(zoom_range=0.0,preprocessing_function=img_aug1.augment_image)

In [0]:

datagen = ImageDataGenerator(zoom_range=0.0, 
                             horizontal_flip=True,
                             preprocessing_function=reg_fn)

In [0]:
from keras.layers import Input, Conv2D, Activation, BatchNormalization, GlobalAveragePooling2D, MaxPooling2D
from keras.layers.merge import add
from keras.activations import relu, softmax
from keras.models import Model
from keras import regularizers

#Architecture

In [0]:
def _bn_relu(input):
    """Helper to build a BN -> relu block
    """
    norm = BatchNormalization(axis=CHANNEL_AXIS)(input)
    return Activation("relu")(norm)


def _conv_bn_relu(**conv_params):
    """Helper to build a conv -> BN -> relu block
    """
    #import pdb;pdb.set_trace()
    filters = conv_params["filters"]
    kernel_size = conv_params["kernel_size"]
    strides = conv_params.setdefault("strides", (1, 1))
    kernel_initializer = conv_params.setdefault("kernel_initializer", "he_normal")
    padding = conv_params.setdefault("padding", "same")
    # kernel_regularizer = conv_params.setdefault("kernel_regularizer", l2(1.e-4))
    kernel_regularizer = None

    def f(input):
        #import pdb;pdb.set_trace()

        conv = Conv2D(filters=filters, kernel_size=kernel_size,
                      strides=strides, padding=padding,
                      kernel_initializer=kernel_initializer,
                      kernel_regularizer=kernel_regularizer)(input)
        return _bn_relu(conv)

    return f


def _bn_relu_conv(**conv_params):
    """Helper to build a BN -> relu -> conv block.
    This is an improved scheme proposed in http://arxiv.org/pdf/1603.05027v2.pdf
    """
    #import pdb;pdb.set_trace()

    filters = conv_params["filters"]
    kernel_size = conv_params["kernel_size"]
    strides = conv_params.setdefault("strides", (1, 1))
    kernel_initializer = conv_params.setdefault("kernel_initializer", "he_normal")
    padding = conv_params.setdefault("padding", "same")
    # kernel_regularizer = conv_params.setdefault("kernel_regularizer", l2(1.e-4))
    kernel_regularizer = None

    def f(input):
        activation = _bn_relu(input)
        return Conv2D(filters=filters, kernel_size=kernel_size,
                      strides=strides, padding=padding,
                      kernel_initializer=kernel_initializer,
                      kernel_regularizer=kernel_regularizer)(activation)

    return f


def _shortcut_mul(input, residual):
    """Adds a shortcut between input and residual block and merges them with "sum"
    """
    # Expand channels of shortcut to match residual.
    # Stride appropriately to match residual (width, height)
    # Should be int if network architecture is correctly configured.
    input_shape = K.int_shape(input)
    residual_shape = K.int_shape(residual)
    stride_width = int(round(input_shape[ROW_AXIS] / residual_shape[ROW_AXIS]))
    stride_height = int(round(input_shape[COL_AXIS] / residual_shape[COL_AXIS]))
    equal_channels = input_shape[CHANNEL_AXIS] == residual_shape[CHANNEL_AXIS]

    shortcut = input
    # 1 X 1 conv if shape is different. Else identity.
    if stride_width > 1 or stride_height > 1 or not equal_channels:
        shortcut = Conv2D(filters=residual_shape[CHANNEL_AXIS],
                          kernel_size=(1, 1),
                          strides=(stride_width, stride_height),
                          padding="valid",
                          kernel_initializer="he_normal")(input)

    return multiply([shortcut, residual])
  
def _shortcut(input, residual):
    """Adds a shortcut between input and residual block and merges them with "sum"
    """
    # Expand channels of shortcut to match residual.
    # Stride appropriately to match residual (width, height)
    # Should be int if network architecture is correctly configured.
    input_shape = K.int_shape(input)
    residual_shape = K.int_shape(residual)
    stride_width = int(round(input_shape[ROW_AXIS] / residual_shape[ROW_AXIS]))
    stride_height = int(round(input_shape[COL_AXIS] / residual_shape[COL_AXIS]))
    equal_channels = input_shape[CHANNEL_AXIS] == residual_shape[CHANNEL_AXIS]

    shortcut = input
    # 1 X 1 conv if shape is different. Else identity.
    if stride_width > 1 or stride_height > 1 or not equal_channels:
        shortcut = Conv2D(filters=residual_shape[CHANNEL_AXIS],
                          kernel_size=(1, 1),
                          strides=(stride_width, stride_height),
                          padding="valid",
                          kernel_initializer="he_normal")(input)

    return add([shortcut, residual])


def _residual_block(block_function, filters, repetitions, is_first_layer=False):
    """Builds a residual block with repeating bottleneck blocks.
    """
    def f(input):
        channel_dim = K.int_shape(input)[1:]
        for i in range(repetitions):
            #import pdb;pdb.set_trace()
            init_strides = (1, 1)
            
            if i == 0 and not is_first_layer and channel_dim[1] > 8:
                init_strides = (2, 2)
            input = block_function(filters=int(filters*(1.25**i)), init_strides=init_strides,
                                   is_first_block_of_first_layer=(is_first_layer and i == 0))(input)
        return input

    return f


def basic_block(filters, init_strides=(1, 1), is_first_block_of_first_layer=False):
    """Basic 3 X 3 convolution blocks for use on resnets with layers <= 34.
    Follows improved proposed scheme in http://arxiv.org/pdf/1603.05027v2.pdf
    """
    def f(input):

        if is_first_block_of_first_layer:
            # don't repeat bn->relu since we just did bn->relu->maxpool
            conv1 = Conv2D(filters=filters, kernel_size=(3, 3),
                           strides=init_strides,
                           padding="same",
                           kernel_initializer="he_normal",
                           kernel_regularizer=l2(1e-4))(input)
        else:
            conv1 = _bn_relu_conv(filters=filters, kernel_size=(3, 3),
                                  strides=init_strides)(input)

        residual = _bn_relu_conv(filters=filters, kernel_size=(3, 3))(conv1)
        return _shortcut(input, residual)

    return f


def bottleneck(filters, init_strides=(1, 1), is_first_block_of_first_layer=False):
    """Bottleneck architecture for > 34 layer resnet.
    Follows improved proposed scheme in http://arxiv.org/pdf/1603.05027v2.pdf
    Returns:
        A final conv layer of filters * 4
    """
    def f(input):

        if is_first_block_of_first_layer:
            # don't repeat bn->relu since we just did bn->relu->maxpool
            conv_1_1 = Conv2D(filters=filters, kernel_size=(1, 1),
                              strides=init_strides,
                              padding="same",
                              kernel_initializer="he_normal",
                              kernel_regularizer=l2(1e-4))(input)
        else:
            conv_1_1 = _bn_relu_conv(filters=filters, kernel_size=(1, 1),
                                     strides=init_strides)(input)

        conv_3_3 = _bn_relu_conv(filters=filters, kernel_size=(3, 3))(conv_1_1)
        residual = _bn_relu_conv(filters=filters * 4, kernel_size=(1, 1))(conv_3_3)
        return _shortcut(input, residual)

    return f


def _handle_dim_ordering():
    global ROW_AXIS
    global COL_AXIS
    global CHANNEL_AXIS
    ROW_AXIS = 1
    COL_AXIS = 2
    CHANNEL_AXIS = 3
    

class ResnetBuilder(object):
    @staticmethod
    def build(input_shape, num_outputs, block_fn, repetitions, 
              growth_rate=1.5, resnet_init_filters = 64, first_conv_filters = 64,):
        """Builds a custom ResNet like architecture.
        Args:
            input_shape: The input shape in the form (nb_channels, nb_rows, nb_cols)
            num_outputs: The number of outputs at final softmax layer
            block_fn: The block function to use. This is either `basic_block` or `bottleneck`.
                The original paper used basic_block for layers < 50
            repetitions: Number of repetitions of various block units.
                At each block unit, the number of filters are doubled and the input size is halved
        Returns:
            The keras `Model`.
        """
        _handle_dim_ordering()
        if len(input_shape) != 3:
            raise Exception("Input shape should be a tuple (nb_channels, nb_rows, nb_cols)")

        # Permute dimension order if necessary
        
        input_shape = (input_shape[1], input_shape[2], input_shape[0])
            
        double_stride_first = input_shape[1] >= 96 or len(repetitions)<=2
        print("Double Stride in 7x7 =",double_stride_first)
        pool_first = input_shape[1] >= 200
        print("Initial Pooling After 7x7 =",pool_first)

        # Load function from str if needed.
        strides = (2,2) if double_stride_first else (1,1)
          
        input = Input(shape=input_shape)
        conv1 = _conv_bn_relu(filters=first_conv_filters, kernel_size=(7, 7), strides=strides)(input)
        print("Shape After 7x7 = ",K.int_shape(conv1)[1:])
        if pool_first:
          pool1 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding="same")(conv1)
        else:
          pool1 = conv1

        block = pool1
        filters = resnet_init_filters
        for i, r in enumerate(repetitions):
            #import pdb;pdb.set_trace()
            block = _residual_block(block_fn, filters=filters, repetitions=r, is_first_layer=(i == 0))(block)
            print("Filters in Resnet %s = %s, Output Shape = %s"%((i+1),filters, K.int_shape(block)[1:]))
            filters *= growth_rate
            filters = int(filters)

        # Last activation
        block = _bn_relu(block)

        # Classifier block
        block_shape = K.int_shape(block)
        pool2 = GlobalAveragePooling2D()(block)
        #flatten1 = Flatten()(pool2)
        dense = Dense(units=num_outputs, kernel_initializer="he_normal",
                      activation="softmax", use_bias=False)(pool2)

        model = Model(inputs=input, outputs=dense)
        return model

    @staticmethod # 8 
    def build_resnet_9(input_shape, num_outputs,
                      growth_rate=2, resnet_init_filters = 80, first_conv_filters = 80,):
        return ResnetBuilder.build(input_shape, num_outputs, basic_block, [2, 2], 
                                  growth_rate = growth_rate, resnet_init_filters=resnet_init_filters, first_conv_filters=first_conv_filters)
    
    @staticmethod # 8 
    def build_resnet_11(input_shape, num_outputs,
                      growth_rate=2, resnet_init_filters = 80, first_conv_filters = 80,):
        return ResnetBuilder.build(input_shape, num_outputs, basic_block, [3, 2], 
                                  growth_rate = growth_rate, resnet_init_filters=resnet_init_filters, first_conv_filters=first_conv_filters)
    
    
    
    @staticmethod # 8 
    def build_resnet_13(input_shape, num_outputs,
                      growth_rate=1.75, resnet_init_filters = 64, first_conv_filters = 64,):
        return ResnetBuilder.build(input_shape, num_outputs, basic_block, [2, 2, 2], 
                                  growth_rate = growth_rate, resnet_init_filters=resnet_init_filters, first_conv_filters=first_conv_filters)
    
    
    
    @staticmethod
    def build_resnet_18(input_shape, num_outputs,
                       growth_rate=1.5, resnet_init_filters = 64, first_conv_filters = 64,):
        return ResnetBuilder.build(input_shape, num_outputs, basic_block, [2, 2, 2, 2],
                                  growth_rate = growth_rate, resnet_init_filters=resnet_init_filters, first_conv_filters=first_conv_filters)

    @staticmethod
    def build_resnet_34(input_shape, num_outputs,
                       growth_rate=1.5, resnet_init_filters = 64, first_conv_filters = 64,):
        return ResnetBuilder.build(input_shape, num_outputs, basic_block, [3, 4, 6, 3],
                                  growth_rate = growth_rate, resnet_init_filters=resnet_init_filters, first_conv_filters=first_conv_filters)

    @staticmethod
    def build_resnet_50(input_shape, num_outputs):
        return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [3, 4, 6, 3])

    @staticmethod
    def build_resnet_101(input_shape, num_outputs):
        return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [3, 4, 23, 3])

    @staticmethod
    def build_resnet_152(input_shape, num_outputs):
        return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [3, 8, 36, 3])

In [0]:
from keras.layers import Input, Conv2D, Activation, BatchNormalization, GlobalAveragePooling2D, MaxPooling2D
from keras.layers.merge import add
from keras.activations import relu, softmax
from keras.models import Model
from keras import regularizers
from tensorflow.keras import backend as K
from tensorflow.keras.regularizers import l2
from keras.layers import Activation, Flatten, Dense, Dropout



model = ResnetBuilder.build_resnet_18((3, 32, 32), 10)


W0809 18:11:18.999397 139854778972032 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0809 18:11:19.033806 139854778972032 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0809 18:11:19.043527 139854778972032 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4185: The name tf.truncated_normal is deprecated. Please use tf.random.truncated_normal instead.

W0809 18:11:19.083477 139854778972032 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.

W0809 18:11:19.084368

Double Stride in 7x7 = False
Initial Pooling After 7x7 = False


W0809 18:11:21.934625 139854778972032 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:1834: The name tf.nn.fused_batch_norm is deprecated. Please use tf.compat.v1.nn.fused_batch_norm instead.



Shape After 7x7 =  (32, 32, 64)
Filters in Resnet 1 = 64, Output Shape = (32, 32, 80)
Filters in Resnet 2 = 96, Output Shape = (16, 16, 120)
Filters in Resnet 3 = 144, Output Shape = (8, 8, 180)
Filters in Resnet 4 = 216, Output Shape = (8, 8, 270)


In [0]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 32, 32, 3)    0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 32, 32, 64)   9472        input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 32, 32, 64)   256         conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 32, 32, 64)   0           batch_normalization_1[0][0]      
__________________________________________________________________________________________________
conv2d_2 (

In [0]:
from keras.utils import np_utils

Y_train = np_utils.to_categorical(y_train, 10)
Y_test = np_utils.to_categorical(y_test, 10)

In [0]:
from keras.optimizers import SGD


optimizer = SGD(lr=0.04, momentum=0.9, nesterov=True,decay=0.0)
model.compile(loss = 'categorical_crossentropy',
              optimizer = optimizer,
              metrics = ['accuracy'])
print("Model Params = ",model.count_params(), ", Metric Names = ",model.metrics_names)


W0809 18:11:35.668329 139854778972032 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.



Model Params =  3570534 , Metric Names =  ['loss', 'acc']


In [0]:
datagen_validation = ImageDataGenerator(featurewise_center=False,featurewise_std_normalization=False,)
datagen_validation.fit(X_test)
train_iterator = datagen.flow(X_train, Y_train, batch_size = 256,shuffle=True)
validation_iterator = datagen_validation.flow(X_test, Y_test, batch_size=256,shuffle=True)

In [0]:
from google.colab import drive
drive.mount('../content/drive/', force_remount=True)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at ../content/drive/


In [0]:
import os
os.listdir('../content/drive/My Drive/EVA')

[]

In [0]:
from keras.callbacks import *
filepath="/content/gdrive/My Drive/EVA/Session11/epochs_11.hdf5"
#checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1,save_weights_only=True, save_best_only=True, mode='max',period=5)
olr = OneCycleLR(epochs=50, batch_size = BATCH_SIZE,steps=len(train_iterator), 
                 samples=X_train.shape[0], max_lr=0.5, verbose = True,
                 maximum_momentum = 0.9, minimum_momentum=0.8)
callbacks_list = [olr]

In [0]:
print(X_train.shape)

(50000, 32, 32, 3)


In [0]:
triangle_tilt=0.75
batch_size = 256
train_history = model.fit_generator(train_iterator,
                    steps_per_epoch=len(train_iterator), 
                    validation_data = validation_iterator, 
                    validation_steps = len(validation_iterator),
                    epochs=50, verbose=2,callbacks=callbacks_list)

W0809 18:20:56.493206 139854778972032 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch 1/50
 - 61s - loss: 1.6366 - acc: 0.3999 - val_loss: 1.4734 - val_acc: 0.4826
 - lr: 0.06990 - momentum: 0.90 
Epoch 2/50
 - 49s - loss: 1.3239 - acc: 0.5273 - val_loss: 3.1176 - val_acc: 0.3036
 - lr: 0.08990 - momentum: 0.89 
Epoch 3/50
 - 50s - loss: 1.1479 - acc: 0.5938 - val_loss: 3.0849 - val_acc: 0.3452
 - lr: 0.10990 - momentum: 0.89 
Epoch 4/50
 - 49s - loss: 1.0300 - acc: 0.6387 - val_loss: 1.3645 - val_acc: 0.5470
 - lr: 0.12990 - momentum: 0.88 
Epoch 5/50
 - 50s - loss: 0.9216 - acc: 0.6796 - val_loss: 1.8130 - val_acc: 0.5404
 - lr: 0.14990 - momentum: 0.88 
Epoch 6/50
 - 50s - loss: 0.8447 - acc: 0.7089 - val_loss: 1.2616 - val_acc: 0.5875
 - lr: 0.16990 - momentum: 0.87 
Epoch 7/50
 - 50s - loss: 0.7705 - acc: 0.7362 - val_loss: 1.3260 - val_acc: 0.6318
 - lr: 0.18990 - momentum: 0.87 
Epoch 8/50
 - 50s - loss: 0.7152 - acc: 0.7578 - val_loss: 1.8260 - val_acc: 0.5690
 - lr: 0.20990 - momentum: 0.86 
Epoch 9/50
 - 50s - loss: 0.6666 - acc: 0.7733 - val_loss: 1.316