In [3]:
"""
References:
https://arxiv.org/pdf/1512.03385.pdf
https://arxiv.org/pdf/1604.04112v4.pdf
"""
import numpy as np
from tensorflow.keras.layers import Add, Dense, Conv2D, BatchNormalization,ReLU
from tensorflow.keras.layers import Activation, AveragePooling2D, Input, Flatten,MaxPool2D
from tensorflow.keras.callbacks import LearningRateScheduler,TensorBoard,EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import cifar100
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import Model,load_model
from cutout_generator import CutoutImageDataGenerator
from mixup_generator import MixupGenerator
from cutmix_generator import CutMixGenerator

# hyperparameters

In [4]:
batch_size = 128
epochs = 120
n_classes = 100
learning_rate = 0.1

# load data

In [5]:
(x_train, y_train), (x_test, y_test) = cifar100.load_data()
y_train = to_categorical(y_train, n_classes)
y_test = to_categorical(y_test, n_classes)

print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)           #(50000, 32, 32, 3) (50000, 100) (10000, 32, 32, 3) (10000,100)

(50000, 32, 32, 3) (50000, 100) (10000, 32, 32, 3) (10000, 100)


# normalization

In [6]:
def get_mean_std(images):
    images = images.astype(np.float32)
    mean_vals = ()
    std_vals = ()
    for i in range(images.shape[-1]):
        mean_vals += (np.mean(images[:, :, :, i]),)
        std_vals += (np.std(images[:, :, :, i]),)
    return mean_vals, std_vals

mean_vals, std_vals = get_mean_std(x_train)
print(mean_vals)
print(std_vals)

(129.30428, 124.07023, 112.43411)
(68.17024, 65.391785, 70.4184)


In [7]:
def normalize(images, mean_vals, std_vals):
    images = images.astype(np.float32)
    for i in range(images.shape[-1]):
        images[:, :, :, i] = (images[:, :, :, i] - mean_vals[i])/std_vals[i]
    return images

x_train = normalize(x_train, mean_vals, std_vals)
x_test = normalize(x_test, mean_vals, std_vals)
print(x_test.max(), x_test.min())

2.0245545 -1.8973366


# divide train and val

In [8]:
val_frac = 0.2
perm_ids = np.random.permutation(x_train.shape[0])
val_ids = perm_ids[:int(val_frac*x_train.shape[0])]
train_ids = perm_ids[int(val_frac*x_train.shape[0]):]
x_val, y_val = x_train[val_ids], y_train[val_ids]
x_train, y_train = x_train[train_ids], y_train[train_ids]
print(x_train.shape, y_train.shape, x_val.shape, y_val.shape)

(40000, 32, 32, 3) (40000, 100) (10000, 32, 32, 3) (10000, 100)


# network

In [9]:
class Resnet:
    def __init__(self, size=56, stacks=3, starting_filter=16):
        self.size = size
        self.stacks = stacks
        self.starting_filter = starting_filter
        self.residual_blocks = (size - 2) // 6
        
    def get_model(self, input_shape=(32, 32, 3), n_classes=100):
        n_filters = self.starting_filter

        inputs = Input(shape=input_shape)
        network = self.layer(inputs, n_filters)
        network = self.stack(network, n_filters, True)

        for _ in range(self.stacks - 1):
            n_filters *= 2
            network = self.stack(network, n_filters)

        network = Activation('elu')(network)
        pd=network.shape[1]
        network = AveragePooling2D(pool_size=(pd,pd))(network)
        network = Flatten()(network)
        outputs = Dense(n_classes, activation='softmax', 
                        kernel_initializer='he_normal')(network)

        model = Model(inputs=inputs, outputs=outputs)

        return model
    
    def stack(self, inputs, n_filters, first_stack=False):
        stack = inputs

        if first_stack:
            stack = self.identity_block(stack, n_filters)
        else:
            stack = self.convolution_block(stack, n_filters)

        for _ in range(self.residual_blocks - 1):
            stack = self.identity_block(stack, n_filters)

        return stack
    
    def identity_block(self, inputs, n_filters):
        shortcut = inputs

        block = self.layer(inputs, n_filters, normalize_batch=False)
        block = self.layer(block, n_filters, activation=None)

        block = Add()([shortcut, block])

        return block

    def convolution_block(self, inputs, n_filters, strides=2):
        shortcut = inputs

        block = self.layer(inputs, n_filters, strides=strides,
                           normalize_batch=False)
        block = self.layer(block, n_filters, activation=None)

        shortcut = self.layer(shortcut, n_filters,
                              kernel_size=1, strides=strides,
                              activation=None)

        block = Add()([shortcut, block])

        return block
    
    def layer(self, inputs, n_filters, kernel_size=3,
              strides=1, activation='elu', normalize_batch=True):
    
        convolution = Conv2D(n_filters, kernel_size=kernel_size,
                             strides=strides, padding='same',
                             kernel_initializer="he_normal",
                             kernel_regularizer=l2(1e-4))

        x = convolution(inputs)

        if normalize_batch:
            x = BatchNormalization()(x)

        if activation is not None:
            x = Activation(activation)(x)

        return x
    

In [10]:
resnet = Resnet()

In [11]:
model = resnet.get_model()

optimizer = SGD(lr=learning_rate, momentum=0.9)
model.compile(loss='categorical_crossentropy',
              optimizer=optimizer, metrics=['accuracy'])

model.summary()

Instructions for updating:
Colocations handled automatically by placer.
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 32, 32, 3)    0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 32, 32, 16)   448         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_v1 (BatchNo (None, 32, 32, 16)   64          conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 32, 32, 16)   0           batch_normalization_v1[0][0]     
_____________________________________

# Train Network

### baseline witout data augmentation

In [None]:
log= TensorBoard(log_dir="./logs/baseline")

callbacks = [log,EarlyStopping(monitor = 'val_loss', patience = 13), 
             ReduceLROnPlateau(monitor = 'val_loss', factor = 0.2, patience = 6, verbose = 1，min_lr=1e-4)]
#datagen = ImageDataGenerator(width_shift_range=0.15,
#                             height_shift_range=0.15,
#                            horizontal_flip=True)
datagen = ImageDataGenerator()
datagen.fit(x_train)

model.fit(datagen.flow(x_train, y_train, batch_size=batch_size),
          validation_data=(x_val, y_val),
          steps_per_epoch=x_train.shape[0] // batch_size,
          epochs=epochs,
          callbacks=callbacks)

model.save('model_withoutaug.h5')

### baseline

In [15]:
model = resnet.get_model()

optimizer = SGD(lr=learning_rate, momentum=0.9)
model.compile(loss='categorical_crossentropy',
              optimizer=optimizer, metrics=['accuracy'])

log= TensorBoard(log_dir="./logs/baseline_1")

callbacks = [log,EarlyStopping(monitor = 'val_loss', patience = 13), 
             ReduceLROnPlateau(monitor = 'val_loss', factor = 0.2, patience = 6, verbose = 1,min_lr=1e-5)]

datagen = ImageDataGenerator(width_shift_range=0.15,
                             height_shift_range=0.15,
                             horizontal_flip=True)
datagen.fit(x_train)

model.fit(datagen.flow(x_train, y_train, batch_size=batch_size),
          validation_data=(x_val, y_val),
          steps_per_epoch=x_train.shape[0] // batch_size,
          epochs=epochs,
          callbacks=callbacks)

model.save('model_baseline_1.h5')

Epoch 1/120
Epoch 2/120
Epoch 3/120
Epoch 4/120
Epoch 5/120
Epoch 6/120
Epoch 7/120
Epoch 8/120
Epoch 9/120
Epoch 10/120
Epoch 11/120
Epoch 12/120
Epoch 13/120
Epoch 14/120
Epoch 15/120
Epoch 16/120
Epoch 17/120
Epoch 18/120
Epoch 19/120
Epoch 20/120
Epoch 21/120
Epoch 22/120
Epoch 23/120
Epoch 24/120
Epoch 25/120
Epoch 26/120
Epoch 27/120
Epoch 28/120
Epoch 29/120
Epoch 30/120
Epoch 31/120
Epoch 32/120
Epoch 33/120
Epoch 34/120
Epoch 35/120
Epoch 36/120

Epoch 00036: ReduceLROnPlateau reducing learning rate to 0.020000000298023225.


Epoch 37/120
Epoch 38/120
Epoch 39/120
Epoch 40/120
Epoch 41/120
Epoch 42/120
Epoch 43/120
Epoch 44/120
Epoch 45/120
Epoch 46/120
Epoch 47/120

Epoch 00047: ReduceLROnPlateau reducing learning rate to 0.003999999910593033.
Epoch 48/120
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120

Epoch 00053: ReduceLROnPlateau reducing learning rate to 0.0007999999448657036.
Epoch 54/120


### Cutout

In [19]:
model = resnet.get_model()
optimizer = SGD(lr=learning_rate, momentum=0.9)

model.compile(loss='categorical_crossentropy',
              optimizer=optimizer, metrics=['accuracy'])

log= TensorBoard(log_dir="./logs/cutout")

callbacks = [log,EarlyStopping(monitor = 'val_loss', patience = 13), 
             ReduceLROnPlateau(monitor = 'val_loss', factor = 0.2, patience = 6, verbose = 1,min_lr=1e-4)]

datagen = CutoutImageDataGenerator(width_shift_range=0.15,
                                   height_shift_range=0.15,
                                   horizontal_flip=True,
                                   cutout_mask_size=10)

model.fit(datagen.flow(x_train, y_train, batch_size=batch_size),
          validation_data=(x_val, y_val),
          steps_per_epoch=x_train.shape[0] // batch_size,
          epochs=epochs,
          callbacks=callbacks)

model.save('model_cutout.h5')

Epoch 1/120
Epoch 2/120
Epoch 3/120
Epoch 4/120
Epoch 5/120
Epoch 6/120
Epoch 7/120
Epoch 8/120
Epoch 9/120
Epoch 10/120
Epoch 11/120
Epoch 12/120
Epoch 13/120
Epoch 14/120
Epoch 15/120
Epoch 16/120
Epoch 17/120
Epoch 18/120
Epoch 19/120
Epoch 20/120
Epoch 21/120
Epoch 22/120
Epoch 23/120
Epoch 24/120
Epoch 25/120
Epoch 26/120
Epoch 27/120
Epoch 28/120
Epoch 29/120
Epoch 30/120
Epoch 31/120
Epoch 32/120
Epoch 33/120

Epoch 00033: ReduceLROnPlateau reducing learning rate to 0.020000000298023225.
Epoch 34/120
Epoch 35/120
Epoch 36/120


Epoch 37/120
Epoch 38/120
Epoch 39/120
Epoch 40/120
Epoch 41/120
Epoch 42/120
Epoch 43/120
Epoch 44/120
Epoch 45/120

Epoch 00045: ReduceLROnPlateau reducing learning rate to 0.003999999910593033.
Epoch 46/120
Epoch 47/120
Epoch 48/120
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120
Epoch 54/120
Epoch 55/120
Epoch 56/120

Epoch 00056: ReduceLROnPlateau reducing learning rate to 0.0007999999448657036.
Epoch 57/120
Epoch 58/120
Epoch 59/120
Epoch 60/120
Epoch 61/120
Epoch 62/120

Epoch 00062: ReduceLROnPlateau reducing learning rate to 0.00015999998431652786.
Epoch 63/120


### Mixup

In [13]:
model = resnet.get_model()
optimizer = SGD(lr=learning_rate, momentum=0.9)
model.compile(loss='categorical_crossentropy',
              optimizer=optimizer, metrics=['accuracy'])

log= TensorBoard(log_dir="./logs/mixup")

callbacks = [log,EarlyStopping(monitor = 'val_loss', patience = 13), 
             ReduceLROnPlateau(monitor = 'val_loss', factor = 0.3, patience = 6, verbose = 1,min_lr=1e-5)]

datagen = ImageDataGenerator(width_shift_range=0.15,
                             height_shift_range=0.15,
                             horizontal_flip=True)

training_generator = MixupGenerator(x_train, y_train, batch_size=batch_size, alpha=0.4, datagen=datagen)()
model.fit(training_generator,
          steps_per_epoch=x_train.shape[0] // batch_size,
          validation_data=(x_val, y_val),
          epochs=epochs,
          callbacks=callbacks)

model.save('model_mixup.h5')

Epoch 1/120
Epoch 2/120
Epoch 3/120
Epoch 4/120
Epoch 5/120
Epoch 6/120
Epoch 7/120
Epoch 8/120
Epoch 9/120
Epoch 10/120
Epoch 11/120
Epoch 12/120
Epoch 13/120
Epoch 14/120
Epoch 15/120
Epoch 16/120
Epoch 17/120
Epoch 18/120
Epoch 19/120
Epoch 20/120
Epoch 21/120
Epoch 22/120
Epoch 23/120
Epoch 24/120
Epoch 25/120
Epoch 26/120
Epoch 27/120
Epoch 28/120
Epoch 29/120
Epoch 30/120
Epoch 31/120
Epoch 32/120
Epoch 33/120
Epoch 34/120
Epoch 35/120
Epoch 36/120


Epoch 37/120
Epoch 38/120
Epoch 39/120
Epoch 40/120
Epoch 41/120
Epoch 42/120

Epoch 00042: ReduceLROnPlateau reducing learning rate to 0.030000000447034835.
Epoch 43/120
Epoch 44/120
Epoch 45/120
Epoch 46/120
Epoch 47/120
Epoch 48/120
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120
Epoch 54/120
Epoch 55/120
Epoch 56/120
Epoch 57/120
Epoch 58/120
Epoch 59/120
Epoch 60/120
Epoch 61/120

Epoch 00061: ReduceLROnPlateau reducing learning rate to 0.009000000357627868.
Epoch 62/120
Epoch 63/120
Epoch 64/120
Epoch 65/120
Epoch 66/120
Epoch 67/120
Epoch 68/120
Epoch 69/120
Epoch 70/120
Epoch 71/120
Epoch 72/120
Epoch 73/120
Epoch 74/120
Epoch 75/120

Epoch 00075: ReduceLROnPlateau reducing learning rate to 0.002700000163167715.
Epoch 76/120
Epoch 77/120
Epoch 78/120
Epoch 79/120
Epoch 80/120
Epoch 81/120
Epoch 82/120
Epoch 83/120
Epoch 84/120
Epoch 85/120
Epoch 86/120
Epoch 87/120
Epoch 88/120

Epoch 00088: ReduceLROnPlateau reducing learning rate to 0.0008100000210106373.
Epo

Epoch 108/120

Epoch 00108: ReduceLROnPlateau reducing learning rate to 0.00024299999931827186.
Epoch 109/120
Epoch 110/120
Epoch 111/120
Epoch 112/120
Epoch 113/120
Epoch 114/120
Epoch 115/120
Epoch 116/120
Epoch 117/120
Epoch 118/120

Epoch 00118: ReduceLROnPlateau reducing learning rate to 7.290000066859647e-05.
Epoch 119/120
Epoch 120/120


### Cutmix

In [18]:
model = resnet.get_model()
optimizer = SGD(lr=learning_rate, momentum=0.9)
model.compile(loss='categorical_crossentropy',
              optimizer=optimizer, metrics=['accuracy'])

log= TensorBoard(log_dir="./logs/cutmix")

callbacks = [log,EarlyStopping(monitor = 'val_loss', patience = 13), 
             ReduceLROnPlateau(monitor = 'val_loss', factor = 0.2, patience = 6, verbose = 1,min_lr=1e-5)]

datagen = ImageDataGenerator(width_shift_range=0.15,
                             height_shift_range=0.15,
                             horizontal_flip=True)

training_generator = CutMixGenerator(x_train, y_train, batch_size=batch_size, alpha=0.8, datagen=datagen)()
model.fit(training_generator,
          steps_per_epoch=x_train.shape[0] // batch_size,
          validation_data=(x_val, y_val),
          epochs=epochs,
          callbacks=callbacks)

model.save('model_cutmix.h5')

Epoch 1/120
Epoch 2/120
Epoch 3/120
Epoch 4/120
Epoch 5/120
Epoch 6/120
Epoch 7/120
Epoch 8/120
Epoch 9/120
Epoch 10/120
Epoch 11/120
Epoch 12/120
Epoch 13/120
Epoch 14/120
Epoch 15/120
Epoch 16/120
Epoch 17/120
Epoch 18/120
Epoch 19/120
Epoch 20/120
Epoch 21/120
Epoch 22/120
Epoch 23/120
Epoch 24/120
Epoch 25/120
Epoch 26/120
Epoch 27/120
Epoch 28/120
Epoch 29/120
Epoch 30/120
Epoch 31/120
Epoch 32/120
Epoch 33/120
Epoch 34/120
Epoch 35/120
Epoch 36/120


Epoch 37/120
Epoch 38/120
Epoch 39/120
Epoch 40/120
Epoch 41/120
Epoch 42/120
Epoch 43/120
Epoch 44/120
Epoch 45/120
Epoch 46/120
Epoch 47/120
Epoch 48/120
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120

Epoch 00053: ReduceLROnPlateau reducing learning rate to 0.020000000298023225.
Epoch 54/120
Epoch 55/120
Epoch 56/120
Epoch 57/120
Epoch 58/120
Epoch 59/120
Epoch 60/120
Epoch 61/120
Epoch 62/120
Epoch 63/120
Epoch 64/120
Epoch 65/120
Epoch 66/120
Epoch 67/120
Epoch 68/120
Epoch 69/120
Epoch 70/120
Epoch 71/120
Epoch 72/120


Epoch 73/120
Epoch 74/120
Epoch 75/120
Epoch 76/120
Epoch 77/120
Epoch 78/120
Epoch 79/120
Epoch 80/120
Epoch 81/120
Epoch 82/120

Epoch 00082: ReduceLROnPlateau reducing learning rate to 0.003999999910593033.
Epoch 83/120
Epoch 84/120
Epoch 85/120
Epoch 86/120
Epoch 87/120
Epoch 88/120
Epoch 89/120
Epoch 90/120
Epoch 91/120
Epoch 92/120
Epoch 93/120
Epoch 94/120
Epoch 95/120
Epoch 96/120
Epoch 97/120
Epoch 98/120
Epoch 99/120
Epoch 100/120
Epoch 101/120
Epoch 102/120
Epoch 103/120
Epoch 104/120
Epoch 105/120
Epoch 106/120
Epoch 107/120


Epoch 108/120

Epoch 00108: ReduceLROnPlateau reducing learning rate to 0.0007999999448657036.
Epoch 109/120
Epoch 110/120
Epoch 111/120
Epoch 112/120
Epoch 113/120
Epoch 114/120
Epoch 115/120
Epoch 116/120
Epoch 117/120
Epoch 118/120
Epoch 119/120
Epoch 120/120


# Evaluate

In [12]:
def evaluate(model,x_test,y_test):
    loss,acc=model.evaluate(x_test,y_test)
    return loss,acc

In [15]:
prnt = ('test loss {:0.4f}  test acc:{:0.4f}')

model_base=load_model("model_withoutaug.h5")
loss,acc=evaluate(model_base,x_test,y_test)
print("baseline without aug :"+prnt.format(loss,acc)+'\n')

model_baseline=load_model("model_baseline.h5")
loss,acc=evaluate(model_baseline,x_test,y_test)
print("baseline: "+prnt.format(loss,acc)+'\n')

model_cutout=load_model("model_cutout.h5")
loss,acc=evaluate(model_cutout,x_test,y_test)
print("baseline  with cutout: "+prnt.format(loss,acc)+'\n')

model_mixup=load_model("model_mixup.h5")
loss,acc=evaluate(model_mixup,x_test,y_test)
print("baseline  with mixup: "+prnt.format(loss,acc)+'\n')

model_cutmix=load_model("model_cutmix.h5")
loss,acc=evaluate(model_cutmix,x_test,y_test)
print("baseline with cutmix: "+prnt.format(loss,acc)+'\n')

baseline without aug :test loss 2.6015  test acc:0.5659

baseline: test loss 1.9347  test acc:0.6669

baseline  with cutout: test loss 1.7301  test acc:0.6745

baseline  with mixup: test loss 1.4141  test acc:0.6896

baseline with cutmix: test loss 1.4149  test acc:0.6923



# Further more

In [17]:
def cutout(cutout_mask_size = 8):
    def cut(input_img):
        image = np.copy(input_img)
        mask_value = image.mean()
        
        
        h, w, _ = image.shape
        
        y = np.random.randint(h)
        x = np.random.randint(w)
        
        
        top = np.clip(y - cutout_mask_size // 2, 0, h)
        bottom  = np.clip(y + cutout_mask_size // 2, 0, h)
        left = np.clip(x - cutout_mask_size // 2, 0, w)
        right = np.clip(x + cutout_mask_size // 2, 0, w)
        
        image[top:bottom, left:right, :].fill(mask_value)
        return image

    return cut

## cutout + mixup

In [18]:
model = resnet.get_model()
optimizer = SGD(lr=learning_rate, momentum=0.9)
model.compile(loss='categorical_crossentropy',
              optimizer=optimizer, metrics=['accuracy'])

log= TensorBoard(log_dir="./logs/cm")

callbacks = [log,EarlyStopping(monitor = 'val_loss', patience = 13), 
             ReduceLROnPlateau(monitor = 'val_loss', factor = 0.2, patience = 6, verbose = 1,min_lr=1e-5)]

datagen = ImageDataGenerator(width_shift_range=0.15,
                             height_shift_range=0.15,
                             horizontal_flip=True,
                             preprocessing_function=cutout(cutout_mask_size = 10))

training_generator = MixupGenerator(x_train, y_train, batch_size=batch_size, alpha=0.4, datagen=datagen)()
model.fit(training_generator,
          steps_per_epoch=x_train.shape[0] // batch_size,
          validation_data=(x_val, y_val),
          epochs=epochs,
          callbacks=callbacks)

model.save('model.cm.h5')

Epoch 1/120
Epoch 2/120
Epoch 3/120
Epoch 4/120
Epoch 5/120
Epoch 6/120
Epoch 7/120
Epoch 8/120
Epoch 9/120
Epoch 10/120
Epoch 11/120
Epoch 12/120
Epoch 13/120
Epoch 14/120
Epoch 15/120
Epoch 16/120
Epoch 17/120
Epoch 18/120
Epoch 19/120
Epoch 20/120
Epoch 21/120
Epoch 22/120
Epoch 23/120
Epoch 24/120
Epoch 25/120
Epoch 26/120
Epoch 27/120
Epoch 28/120
Epoch 29/120
Epoch 30/120
Epoch 31/120
Epoch 32/120
Epoch 33/120
Epoch 34/120
Epoch 35/120
Epoch 36/120


Epoch 37/120
Epoch 38/120
Epoch 39/120
Epoch 40/120
Epoch 41/120
Epoch 42/120
Epoch 43/120
Epoch 44/120
Epoch 45/120
Epoch 46/120
Epoch 47/120
Epoch 48/120

Epoch 00048: ReduceLROnPlateau reducing learning rate to 0.020000000298023225.
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120
Epoch 54/120
Epoch 55/120
Epoch 56/120
Epoch 57/120
Epoch 58/120
Epoch 59/120
Epoch 60/120
Epoch 61/120
Epoch 62/120
Epoch 63/120
Epoch 64/120
Epoch 65/120
Epoch 66/120
Epoch 67/120
Epoch 68/120
Epoch 69/120
Epoch 70/120
Epoch 71/120
Epoch 72/120


Epoch 73/120
Epoch 74/120
Epoch 75/120
Epoch 76/120
Epoch 77/120
Epoch 78/120
Epoch 79/120

Epoch 00079: ReduceLROnPlateau reducing learning rate to 0.003999999910593033.
Epoch 80/120
Epoch 81/120
Epoch 82/120
Epoch 83/120
Epoch 84/120
Epoch 85/120
Epoch 86/120
Epoch 87/120
Epoch 88/120
Epoch 89/120
Epoch 90/120

Epoch 00090: ReduceLROnPlateau reducing learning rate to 0.0007999999448657036.
Epoch 91/120
Epoch 92/120
Epoch 93/120
Epoch 94/120
Epoch 95/120
Epoch 96/120
Epoch 97/120
Epoch 98/120
Epoch 99/120
Epoch 100/120
Epoch 101/120
Epoch 102/120
Epoch 103/120
Epoch 104/120
Epoch 105/120
Epoch 106/120

Epoch 00106: ReduceLROnPlateau reducing learning rate to 0.00015999998431652786.
Epoch 107/120
Epoch 108/120
Epoch 109/120
Epoch 110/120
Epoch 111/120
Epoch 112/120

Epoch 00112: ReduceLROnPlateau reducing learning rate to 3.199999628122896e-05.
Epoch 113/120


## cutout + cutmix

In [27]:
model = resnet.get_model()
optimizer = SGD(lr=learning_rate, momentum=0.9)
model.compile(loss='categorical_crossentropy',
              optimizer=optimizer, metrics=['accuracy'])

log= TensorBoard(log_dir="./logs/cc")

callbacks = [log,EarlyStopping(monitor = 'val_loss', patience = 13), 
             ReduceLROnPlateau(monitor = 'val_loss', factor = 0.3, patience = 6, verbose = 1,min_lr=1e-5)]

datagen = ImageDataGenerator(width_shift_range=0.15,
                             height_shift_range=0.15,
                             horizontal_flip=True,
                            preprocessing_function=cutout(cutout_mask_size = 10))

training_generator = CutMixGenerator(x_train, y_train, batch_size=batch_size, alpha=0.8, datagen=datagen)()
model.fit(training_generator,
          steps_per_epoch=x_train.shape[0] // batch_size,
          validation_data=(x_val, y_val),
          epochs=epochs,
          callbacks=callbacks)

model.save('model_cc.h5')

Epoch 1/120
Epoch 2/120
Epoch 3/120
Epoch 4/120
Epoch 5/120
Epoch 6/120
Epoch 7/120
Epoch 8/120
Epoch 9/120
Epoch 10/120
Epoch 11/120
Epoch 12/120
Epoch 13/120
Epoch 14/120
Epoch 15/120
Epoch 16/120
Epoch 17/120
Epoch 18/120
Epoch 19/120
Epoch 20/120
Epoch 21/120
Epoch 22/120
Epoch 23/120
Epoch 24/120
Epoch 25/120
Epoch 26/120
Epoch 27/120
Epoch 28/120
Epoch 29/120
Epoch 30/120
Epoch 31/120
Epoch 32/120
Epoch 33/120
Epoch 34/120
Epoch 35/120
Epoch 36/120


Epoch 37/120
Epoch 38/120
Epoch 39/120
Epoch 40/120

Epoch 00040: ReduceLROnPlateau reducing learning rate to 0.030000000447034835.
Epoch 41/120
Epoch 42/120
Epoch 43/120
Epoch 44/120
Epoch 45/120
Epoch 46/120
Epoch 47/120
Epoch 48/120
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120
Epoch 54/120
Epoch 55/120
Epoch 56/120

Epoch 00056: ReduceLROnPlateau reducing learning rate to 0.009000000357627868.
Epoch 57/120
Epoch 58/120
Epoch 59/120
Epoch 60/120
Epoch 61/120
Epoch 62/120
Epoch 63/120
Epoch 64/120
Epoch 65/120
Epoch 66/120
Epoch 67/120
Epoch 68/120
Epoch 69/120
Epoch 70/120
Epoch 71/120

Epoch 00071: ReduceLROnPlateau reducing learning rate to 0.002700000163167715.


Epoch 72/120
Epoch 73/120
Epoch 74/120
Epoch 75/120
Epoch 76/120
Epoch 77/120
Epoch 78/120
Epoch 79/120
Epoch 80/120
Epoch 81/120
Epoch 82/120
Epoch 83/120
Epoch 84/120
Epoch 85/120
Epoch 86/120
Epoch 87/120
Epoch 88/120
Epoch 89/120
Epoch 90/120
Epoch 91/120
Epoch 92/120
Epoch 93/120
Epoch 94/120
Epoch 95/120
Epoch 96/120
Epoch 97/120
Epoch 98/120
Epoch 99/120

Epoch 00099: ReduceLROnPlateau reducing learning rate to 0.0008100000210106373.
Epoch 100/120
Epoch 101/120
Epoch 102/120
Epoch 103/120
Epoch 104/120
Epoch 105/120

Epoch 00105: ReduceLROnPlateau reducing learning rate to 0.00024299999931827186.
Epoch 106/120


## evaluate

In [17]:
model_cutmix=load_model("model.cutout+mixup.h5")
loss,acc=evaluate(model_cutmix,x_test,y_test)
print("cutout+mixup : "+prnt.format(loss,acc)+'\n')

model_cutmix=load_model("model_cutout+cutmix.h5")
loss,acc=evaluate(model_cutmix,x_test,y_test)
print("cutout+cutmix : "+prnt.format(loss,acc)+'\n')

cutout+mixup : test loss 1.3716  test acc:0.6971

cutout+cutmix : test loss 1.4316  test acc:0.6896

