# Cyclical Learning using Fashion_MNIST dataset
## References:
1. Leslie Smith, "Cyclical Learning Rates for Training Neural Networks", arXiv: 1506.01186
2. https://www.pyimagesearch.com/2019/07/29/cyclical-learning-rates-with-keras-and-deep-learning/

In [1]:
import numpy as np
import matplotlib.pyplot as plt
#import matplotlib
#matplotlib.use("Agg")

import os, sys
import cv2
from keras.models import Model
from keras.models import Sequential
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import classification_report
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
#from keras.datasets import fashion_mnist
from keras.layers import Dense, Activation, Convolution2D, MaxPooling2D, Flatten, BatchNormalization
#from keras.datasets import cifar10

from learningratefinder import LearningRateFinder
from clr_callback import CyclicLR
#from model.minigooglenet import MiniGoogLeNet

Using TensorFlow backend.


# Define hyper-parameters

In [None]:
# initialize the list of class label names
CLASSES = ["top", "trouser", "pullover", "dress", "coat", "sandal", "shirt", "sneaker", "bag", "ankle boot"]

# define hyper-parameters
MIN_LR = 1e-5
MAX_LR = 1e-2
BATCH_SIZE = 64
STEP_SIZE = 8
CLR_METHOD = "triangular"
NUM_EPOCHS = 48
lr_find = 1

# define plot path
LRFIND_PLOT_PATH = os.path.sep.join(["output", "lrfind_plot2.png"])
TRAINING_PLOT_PATH = os.path.sep.join(["output", "training_plot2.png"])
CLR_PLOT_PATH = os.path.sep.join(["output", "clr_plot2.png"])

# Load data

In [None]:
print("[INFO] loading Fashion MNIST data...")
((X_train, Y_train), (X_test, Y_test)) = fashion_mnist.load_data()

# Fashion MNIST images are 28x28, reshape to 32x32
X_train = np.array([cv2.resize(x, (32, 32)) for x in X_train])
X_test = np.array([cv2.resize(x, (32, 32)) for x in X_test])

# normalization
X_train = X_train.astype("float") / 255.0
X_test = X_test.astype("float") / 255.0

# reshape
X_train = X_train.reshape((X_train.shape[0], 32, 32, 1))
X_test = X_test.reshape((X_test.shape[0], 32, 32, 1))

"""One can also try cifar10 dataset
print("[INFO] loading CIFAR-10 data...")
((X_train, Y_train), (X_test, Y_test)) = cifar10.load_data()
X_train = X_train.astype("float")
X_test = X_test.astype("float")

# apply mean subtraction to the data
mean = np.mean(X_train, axis=0)
X_train -= mean
X_test -= mean
"""

# One-hot encoding
lb = LabelBinarizer()
Y_train = lb.fit_transform(Y_train)
Y_test = lb.transform(Y_test)

In [None]:
# construct the image generator for data augmentation
gen_aug = ImageDataGenerator(width_shift_range=0.1, height_shift_range=0.1, horizontal_flip=True, fill_mode="nearest")

# Model construction

In [10]:
conv1,conv2,linear = (3, 3, 5, 1, 'same'), (3, 3, 5, 1, 'same'), ("", 512, 2)
channelsize = (36, 72)

def weight(x):
    model = Sequential()
    model.add(Convolution2D(batch_input_shape=(None, channelsize[0], channelsize[1], conv1[0]), 
                            filters=conv1[1], 
                            kernel_size=conv1[2], 
                            strides=conv1[3],
                            padding=conv1[4])
             )
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Convolution2D(filters=conv2[1], 
                            kernel_size=conv2[2], 
                            strides=conv2[3],
                            padding=conv2[4])
             )
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    return model(x)

def resnet():
    x = weight(x)
    
    model = Sequential()
    model.add(Flatten())
    model.add(Dense(linear[1]))
    model.add(Dense(linear[2]))
    model.add(Activation('softmax'))




In [11]:
# initialize a model
#model = MiniGoogLeNet.build(width=32, height=32, depth=1, classes=10)
model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_7 (Conv2D)            (None, 72, 36, 3)         228       
_________________________________________________________________
batch_normalization_3 (Batch (None, 72, 36, 3)         12        
_________________________________________________________________
activation_4 (Activation)    (None, 72, 36, 3)         0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 72, 36, 3)         228       
_________________________________________________________________
batch_normalization_4 (Batch (None, 72, 36, 3)         12        
_________________________________________________________________
activation_5 (Activation)    (None, 72, 36, 3)         0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 7776)             

In [63]:
# initialize the optimizer
print("[INFO] compiling model...")
opt = SGD(lr=MIN_LR, momentum=0.9)
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])

[INFO] compiling model...


NameError: name 'MIN_LR' is not defined

# Find learning rate

In [38]:
if lr_find > 0:
# initialize the learning rate finder (from 1e-10 to 1e+1)
    print("[INFO] finding learning rate...")
    lrf = LearningRateFinder(model)
    lrf.find(gen_aug.flow(X_train, Y_train, batch_size=BATCH_SIZE), 1e-10, 1e+1,
    stepsPerEpoch=np.ceil((len(X_train) / float(BATCH_SIZE))), batchSize=BATCH_SIZE)

    # plot the loss and save the resulting plot to disk
    lrf.plot_loss()
    plt.savefig(LRFIND_PLOT_PATH)
    
    print("[INFO] learning rate finder complete")
    print("[INFO] examine plot and adjust learning rates before training")
    #exit(0)

NameError: name 'lr_find' is not defined

# Cyclical training

In [None]:
# otherwise, we have already defined a learning rate space to train
# over, so compute the step size and initialize the cyclic learning
# rate method
stepSize = STEP_SIZE * (X_train.shape[0] // BATCH_SIZE)
clr = CyclicLR(mode=CLR_METHOD, base_lr=MIN_LR, max_lr=MAX_LR, step_size=stepSize)

# train the network
print("[INFO] training network...")
H = model.fit_generator(gen_aug.flow(X_train, Y_train, batch_size=BATCH_SIZE),
    validation_data=(X_test, Y_test),
    steps_per_epoch=X_train.shape[0] // BATCH_SIZE,
    epochs=NUM_EPOCHS,
    callbacks=[clr],
    verbose=1)

# evaluate the network and show a classification report
print("[INFO] evaluating network...")
predictions = model.predict(X_test, batch_size=BATCH_SIZE)
print(classification_report(Y_test.argmax(axis=1),
predictions.argmax(axis=1), target_names=CLASSES))

# Plot results

In [None]:
# construct a plot that plots and saves the training history
N = np.arange(0, NUM_EPOCHS)
plt.style.use("ggplot")
plt.figure()
plt.plot(N, H.history["loss"], label="train_loss")
plt.plot(N, H.history["val_loss"], label="val_loss")
plt.plot(N, H.history["acc"], label="train_acc")
plt.plot(N, H.history["val_acc"], label="val_acc")
plt.title("Training Loss and Accuracy")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower left")
plt.savefig(TRAINING_PLOT_PATH)

# plot the learning rate history
N = np.arange(0, len(clr.history["lr"]))
plt.figure()
plt.plot(N, clr.history["lr"])
plt.title("Cyclical Learning Rate (CLR)")
plt.xlabel("Training Iterations")
plt.ylabel("Learning Rate")
plt.savefig(CLR_PLOT_PATH)