In [14]:
# import libraries
import keras
from keras.datasets import mnist
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import SGD
from keras.callbacks import LearningRateScheduler

So far we have only used a fixed learning rate, but in practice, when working with larger datasets where you need to run about 200 to 300 epochs. Using a single learning rate would stop your accuracy from improving beyond certain points, the 
key is to gradually reduce your accuracy after certain number of epochs. If you started with a learning rate of 0.1, you might divide the learning rate by 10 after 
30 epochs, 60 epochs and 90 epochs, hence at epoch 90, you would have much lower learning rate. Keras provides a handy Learning Rate Scheduler to do this.

Below is our first example, modified to use a dynamic learning rate

In [15]:
(train_x, train_y), (test_x, test_y) = mnist.load_data()

In [16]:
#normalize the data
train_x = train_x.astype('float32')/255
test_x = test_x.astype('float32')/255

In [17]:
#flatten the image
train_x = train_x.reshape(60000, 784)
test_x = test_x.reshape(10000, 784)

In [18]:
#encode the labels to vectors
train_y = keras.utils.to_categorical(train_y, 10)
test_y = keras.utils.to_categorical(test_y, 10)

In [21]:
model = Sequential()
model.add(Dense(units= 128, activation= 'relu', input_shape = (784,)))
model.add(Dense(units = 128, activation= 'relu'))
model.add(Dense(units = 128, activation= 'relu'))
model.add(Dense(units = 128, activation= 'relu'))
model.add(Dense(units = 128, activation= 'relu'))
model.add(Dense(units = 128, activation= 'relu'))
model.add(Dense(units = 128, activation= 'relu'))
model.add(Dense(units = 10, activation= 'sigmoid'))

In [22]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_11 (Dense)            (None, 128)               100480    
                                                                 
 dense_12 (Dense)            (None, 128)               16512     
                                                                 
 dense_13 (Dense)            (None, 128)               16512     
                                                                 
 dense_14 (Dense)            (None, 128)               16512     
                                                                 
 dense_15 (Dense)            (None, 128)               16512     
                                                                 
 dense_16 (Dense)            (None, 128)               16512     
                                                                 
 dense_17 (Dense)            (None, 128)              

In [23]:
#define the learning rate schedule function
def lr_schedule(epoch):
  lr = 0.1

  if epoch > 15:
    lr = lr/100

  elif epoch > 10:
    lr = lr/10

  elif epoch >5:
    lr = lr / 5

  print ('Learning Rate: ', lr)

  return lr

In [24]:
#pass the scheduler function to the learning rate scheduler class
lr_scheduler = LearningRateScheduler(lr_schedule)

In [25]:
#specify the training components
model.compile(optimizer= SGD(lr_schedule(0)), loss = 'categorical_crossentropy', metrics = ['accuracy'])

Learning Rate:  0.1


In [26]:
model.fit(train_x, train_y, batch_size=  64, epochs= 30, verbose= 1, callbacks= [lr_scheduler])

Learning Rate:  0.1
Epoch 1/30
Learning Rate:  0.1
Epoch 2/30
Learning Rate:  0.1
Epoch 3/30
Learning Rate:  0.1
Epoch 4/30
Learning Rate:  0.1
Epoch 5/30
Learning Rate:  0.1
Epoch 6/30
Learning Rate:  0.02
Epoch 7/30
Learning Rate:  0.02
Epoch 8/30
Learning Rate:  0.02
Epoch 9/30
Learning Rate:  0.02
Epoch 10/30
Learning Rate:  0.02
Epoch 11/30
Learning Rate:  0.01
Epoch 12/30
Learning Rate:  0.01
Epoch 13/30
Learning Rate:  0.01
Epoch 14/30
Learning Rate:  0.01
Epoch 15/30
Learning Rate:  0.01
Epoch 16/30
Learning Rate:  0.001
Epoch 17/30
Learning Rate:  0.001
Epoch 18/30
Learning Rate:  0.001
Epoch 19/30
Learning Rate:  0.001
Epoch 20/30
Learning Rate:  0.001
Epoch 21/30
Learning Rate:  0.001
Epoch 22/30
Learning Rate:  0.001
Epoch 23/30
Learning Rate:  0.001
Epoch 24/30
Learning Rate:  0.001
Epoch 25/30
Learning Rate:  0.001
Epoch 26/30
Learning Rate:  0.001
Epoch 27/30
Learning Rate:  0.001
Epoch 28/30
Learning Rate:  0.001
Epoch 29/30
Learning Rate:  0.001
Epoch 30/30


<keras.src.callbacks.History at 0x236040b81d0>

In [27]:
accuracy = model.evaluate(x=test_x, y = test_y, batch_size= 64)
print('Accuracy: ', accuracy)

Accuracy:  [0.0730333924293518, 0.9818000197410583]


## MODEL CHECKPOINTS

In our examples so far, we only save the model after complete training. However, in practice, you would want to save your model after very N epochs. The reason is  that sometimes, our final epoch maybe less accurate than some epochs, usually we want the best, so by saving many, we can go back to a previously saved model that is better than our final model.

Keras provides the ModelCheckpoint utility to handle this.

In [30]:
# import libraries
import keras
from keras.datasets import mnist
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import SGD
from keras.callbacks import LearningRateScheduler, ModelCheckpoint
import os

In [31]:
#load the mnist dataset
(train_x, train_y) , (test_x, test_y) = mnist.load_data()
#normalize the data
train_x = train_x.astype('float32') / 255
test_x = test_x.astype('float32') / 255

In [32]:
#Flatten the images
train_x = train_x.reshape(60000,784)
test_x = test_x.reshape(10000,784)


#Encode the labels to vectors
train_y = keras.utils.to_categorical(train_y,10)
test_y = keras.utils.to_categorical(test_y,10)


#Define the model
model = Sequential()
model.add(Dense(units=128,activation="relu",input_shape=(784,)))
model.add(Dense(units=128,activation="relu"))
model.add(Dense(units=128,activation="relu"))
model.add(Dense(units=10,activation="softmax"))


#Print a Summary of the model
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_19 (Dense)            (None, 128)               100480    
                                                                 
 dense_20 (Dense)            (None, 128)               16512     
                                                                 
 dense_21 (Dense)            (None, 128)               16512     
                                                                 
 dense_22 (Dense)            (None, 10)                1290      
                                                                 
Total params: 134794 (526.54 KB)
Trainable params: 134794 (526.54 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [33]:
#define the learning rate schedule function
def lr_schedule(epoch):
  lr = 0.1

  if epoch > 15:
    lr = lr/100

  elif epoch > 10:
    lr = lr/10

  elif epoch >5:
    lr = lr / 5

  print ('Learning Rate: ', lr)

  return lr

In [34]:
#Pass the scheduler function to the Learning Rate Scheduler class
lr_scheduler = LearningRateScheduler(lr_schedule)

#Directory in which to create models
save_direc = os.path.join(os.getcwd(), 'mnistsavedmodels')

In [43]:
#name of model files
model_name = 'mnistmodel. {epoch:03d}.h5'

In [44]:
#create a directory if it doesn't exist
if not os.path.isdir(save_direc):
  os.makedirs(save_direc)
  

In [47]:
#Join the directory with the model file
modelpath = os.path.join(save_direc, model_name)

In [48]:
checkpoint = ModelCheckpoint(filepath=modelpath,
                             monitor='val_accuracy',
                             verbose=1,
                             save_best_only=True,
                             save_weights_only=True,
                             mode='max',
                             save_freq='epoch')

In [50]:
#Specify the training components
model.compile(optimizer=SGD(lr_schedule(0)),loss="categorical_crossentropy",metrics=["accuracy"])


#Fit the model
model.fit(train_x,train_y,batch_size=32,epochs=20,shuffle=True,verbose=1,validation_data=(test_x, test_y),callbacks=[checkpoint, lr_scheduler])

Learning Rate:  0.1
Learning Rate:  0.1
Epoch 1/20
Epoch 1: val_accuracy improved from -inf to 0.97550, saving model to c:\Users\PC\Desktop\deep_learning\mnistsavedmodels\mnistmodel. 001.h5
Learning Rate:  0.1
Epoch 2/20
Epoch 2: val_accuracy improved from 0.97550 to 0.97860, saving model to c:\Users\PC\Desktop\deep_learning\mnistsavedmodels\mnistmodel. 002.h5
Learning Rate:  0.1
Epoch 3/20
Epoch 3: val_accuracy improved from 0.97860 to 0.97980, saving model to c:\Users\PC\Desktop\deep_learning\mnistsavedmodels\mnistmodel. 003.h5
Learning Rate:  0.1
Epoch 4/20
Epoch 4: val_accuracy improved from 0.97980 to 0.98100, saving model to c:\Users\PC\Desktop\deep_learning\mnistsavedmodels\mnistmodel. 004.h5
Learning Rate:  0.1
Epoch 5/20
Epoch 5: val_accuracy did not improve from 0.98100
Learning Rate:  0.1
Epoch 6/20
Epoch 6: val_accuracy improved from 0.98100 to 0.98140, saving model to c:\Users\PC\Desktop\deep_learning\mnistsavedmodels\mnistmodel. 006.h5
Learning Rate:  0.02
Epoch 7/20
Epoc

<keras.src.callbacks.History at 0x23607abb890>

In [51]:
model.fit(train_x,train_y,batch_size=32,epochs=20,shuffle=True,validation_split=0.1,verbose=1,callbacks=[checkpoint,lr_scheduler])

Learning Rate:  0.1
Epoch 1/20
Epoch 1: val_accuracy improved from 0.98350 to 1.00000, saving model to c:\Users\PC\Desktop\deep_learning\mnistsavedmodels\mnistmodel. 001.h5
Learning Rate:  0.1
Epoch 2/20
Epoch 2: val_accuracy did not improve from 1.00000
Learning Rate:  0.1
Epoch 3/20
Epoch 3: val_accuracy did not improve from 1.00000
Learning Rate:  0.1
Epoch 4/20
Epoch 4: val_accuracy did not improve from 1.00000
Learning Rate:  0.1
Epoch 5/20
Epoch 5: val_accuracy did not improve from 1.00000
Learning Rate:  0.1
Epoch 6/20
Epoch 6: val_accuracy did not improve from 1.00000
Learning Rate:  0.02
Epoch 7/20
Epoch 7: val_accuracy did not improve from 1.00000
Learning Rate:  0.02
Epoch 8/20
Epoch 8: val_accuracy did not improve from 1.00000
Learning Rate:  0.02
Epoch 9/20
Epoch 9: val_accuracy did not improve from 1.00000
Learning Rate:  0.02
Epoch 10/20
Epoch 10: val_accuracy did not improve from 1.00000
Learning Rate:  0.02
Epoch 11/20
Epoch 11: val_accuracy did not improve from 1.0000

<keras.src.callbacks.History at 0x23602af33d0>

### FUNCTIONAL API

Keras has two APIs for constructing models, the first is the Sequential API which we have used so far for simplicity sake, however, going forward we shall be using the more advanced functional API. The advantages might not seem obvious at this 
stage, but it is absolutely essential when designing more complex networks, as we shall do later

In [53]:
#IMPORT needed libraries
import keras
from keras.datasets import mnist
from keras.layers import Dense,Input
from keras.models import Model
from keras.optimizers import SGD
from keras.callbacks import LearningRateScheduler

In [54]:
#Define the model
def MiniModel(input_shape):
 images = Input(input_shape)
 net = Dense(units=128,activation="relu")(images)
 net = Dense(units=128, activation="relu")(net)
 net = Dense(units=128, activation="relu")(net)
 net = Dense(units=10,activation="softmax")(net)
 model = Model(inputs=images,outputs=net)
 return model

In [55]:
model = MiniModel((784,))

model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 784)]             0         
                                                                 
 dense_23 (Dense)            (None, 128)               100480    
                                                                 
 dense_24 (Dense)            (None, 128)               16512     
                                                                 
 dense_25 (Dense)            (None, 128)               16512     
                                                                 
 dense_26 (Dense)            (None, 10)                1290      
                                                                 
Total params: 134794 (526.54 KB)
Trainable params: 134794 (526.54 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [56]:
#define the learning rate schedule function
def lr_schedule(epoch):
  lr = 0.1

  if epoch > 15:
    lr = lr/100

  elif epoch > 10:
    lr = lr/10

  elif epoch >5:
    lr = lr / 5

  print ('Learning Rate: ', lr)

  return lr

In [57]:
#Pass the scheduler function to the Learning Rate Scheduler class
lr_scheduler = LearningRateScheduler(lr_schedule)


#Directory in which to create models
save_direc = os.path.join(os.getcwd(), 'mnistsavedmodels')


#Name of model files
model_name = 'mnistmodel.{epoch:03d}.h5'


#Create Directory if it doesn't exist
if not os.path.isdir(save_direc):
 os.makedirs(save_direc)


#Join the directory with the model file
modelpath = os.path.join(save_direc, model_name)
checkpoint = ModelCheckpoint(filepath=modelpath,
 monitor='val_acc',
 verbose=1,
 period=1)


#Specify the training components
model.compile(optimizer=SGD(lr_schedule(0)),loss="categorical_crossentropy",metrics=["accuracy"])

Learning Rate:  0.1


In [58]:
model.fit(train_x,train_y,batch_size=32,epochs=20,shuffle=True,validation_split=0.1,verbose=1,callbacks=[checkpoint,lr_scheduler])

Learning Rate:  0.1
Epoch 1/20
Epoch 1: saving model to c:\Users\PC\Desktop\deep_learning\mnistsavedmodels\mnistmodel.001.h5
Learning Rate:  0.1
Epoch 2/20
  29/1688 [..............................] - ETA: 6s - loss: 0.1343 - accuracy: 0.9558

  saving_api.save_model(


Epoch 2: saving model to c:\Users\PC\Desktop\deep_learning\mnistsavedmodels\mnistmodel.002.h5
Learning Rate:  0.1
Epoch 3/20
Epoch 3: saving model to c:\Users\PC\Desktop\deep_learning\mnistsavedmodels\mnistmodel.003.h5
Learning Rate:  0.1
Epoch 4/20
Epoch 4: saving model to c:\Users\PC\Desktop\deep_learning\mnistsavedmodels\mnistmodel.004.h5
Learning Rate:  0.1
Epoch 5/20
Epoch 5: saving model to c:\Users\PC\Desktop\deep_learning\mnistsavedmodels\mnistmodel.005.h5
Learning Rate:  0.1
Epoch 6/20
Epoch 6: saving model to c:\Users\PC\Desktop\deep_learning\mnistsavedmodels\mnistmodel.006.h5
Learning Rate:  0.02
Epoch 7/20
Epoch 7: saving model to c:\Users\PC\Desktop\deep_learning\mnistsavedmodels\mnistmodel.007.h5
Learning Rate:  0.02
Epoch 8/20
Epoch 8: saving model to c:\Users\PC\Desktop\deep_learning\mnistsavedmodels\mnistmodel.008.h5
Learning Rate:  0.02
Epoch 9/20
Epoch 9: saving model to c:\Users\PC\Desktop\deep_learning\mnistsavedmodels\mnistmodel.009.h5
Learning Rate:  0.02
Epoch 1

<keras.src.callbacks.History at 0x23626e30f10>

In [59]:
#Evaluate the accuracy of the test dataset
accuracy = model.evaluate(x=test_x,y=test_y,batch_size=32)
print("Accuracy: ",accuracy[1])

Accuracy:  0.982699990272522
