# In this tutorial we will build basic CNN for image classification.
Author :- Ankur Mali
* We will define our model and learn how to use keras module to build custom layers
* We will also design our own training loop, that is identical to model.fit in Keras.
* The aim of this excercise is to teach, how to use exisiting Tensorflow API to construct our own module and integrate it with tf.keras API.

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import math
np.random.seed(1234)

#Things to do
* Remember to Normalize your data and create validation split from train set.
* Learn about tf.data, tf.slices and also tf.records

In [2]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
x_val = x_train[50000:60000]
x_train = x_train[0:50000]
y_val = y_train[50000:60000]
y_train = y_train[0:50000]
x_train = x_train.astype(np.float32).reshape(-1,28,28,1) / 255.0
x_val = x_val.astype(np.float32).reshape(-1,28,28,1) / 255.0
x_test = x_test.astype(np.float32).reshape(-1,28,28,1) / 255.0
y_train = tf.one_hot(y_train, depth=10)
y_val = tf.one_hot(y_val, depth=10)
y_test = tf.one_hot(y_test, depth=10)
print(x_train.shape)
print(x_test.shape)
print(x_val.shape)
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(128)
train_dataset_full = train_dataset.shuffle(buffer_size=1024).batch(len(train_dataset))
val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))
val_dataset = val_dataset.batch(128)
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_dataset = test_dataset.batch(128)
print(len(train_dataset))
print(len(test_dataset))

(50000, 28, 28, 1)
(10000, 28, 28, 1)
(10000, 28, 28, 1)
391
79


# Create your custom CNN class
* Convolution layers has 4D weights of size (h,w,input_feature, output_feature), where h=height of your kernel and w = width of our kernel. If you add batches then it is 5D.
* Now your model will convolve across your input feature map with kernel and create output feature map, that is then passed to next layer.
* As we have learned in our prior class, to initialize your weights, we use tf.Variable(weight_init(size)), tf.keras.layers.Conv2D will do this for you. Play with the function and see how it works for your problem.
* Few important concepts, learn to save your model after every k epochs and start re-training from last checkpoint. This is very useful, and you don't need to retrain your model from scratch.


In [3]:
class ImageRecognitionCNN(tf.keras.Model):
    
    def __init__(self, num_classes, device='cpu:0', checkpoint_directory=None, method="pre"):
        ''' Define the parameterized layers used during forward-pass, the device
            where you would like to run the computation (GPU, TPU, CPU) on and the checkpoint
            directory.
            
            Args:
                num_classes: the number of labels in the network.
                device: string, 'cpu:n' or 'gpu:n' (n can vary). Default, 'cpu:0'.
                checkpoint_directory: the directory where you would like to save or 
                                      restore a model.
        ''' 
        super(ImageRecognitionCNN, self).__init__()
        
        # Initialize layers
        self.conv1 = tf.keras.layers.Conv2D(64, 3, padding='same', activation=None)
        self.conv2 = tf.keras.layers.Conv2D(64, 3,padding='same', activation=None)
        self.pool1 = tf.keras.layers.MaxPool2D()
        self.conv3 = tf.keras.layers.Conv2D(64, 3, padding='same', activation=None)
        self.conv4 = tf.keras.layers.Conv2D(64, 3, padding='same', activation=None)
        # self.pool2 = tf.keras.layers.MaxPool2D()
        # self.conv5 = tf.keras.layers.Conv2D(64, 3, padding='same', activation=None)
        # self.pool2 = tf.keras.layers.MaxPool2D()
        # self.conv6 = tf.keras.layers.Conv2D(64, 3, 2, padding='same', activation=None)
        # self.conv7 = tf.keras.layers.Conv2D(64, 1, padding='same', activation=None)
        self.conv8 = tf.keras.layers.Conv2D(num_classes, 1, padding='same', activation=None)
        self.val_stat = []
        self.test_stat = []
        self.gamma = tf.Variable(tf.ones([1,1]))
        self.beta = tf.Variable(tf.zeros([1,1]))
        self.variables.append(self.gamma)
        self.variables.append(self.beta)
        self.test_mean = tf.Variable(0.)
        self.test_std = tf.Variable(1.)
        # Define the device 
        self.device = device
        self.method = method
        # Define the checkpoint directory
        self.checkpoint_directory = checkpoint_directory
        self.acc = tf.keras.metrics.Accuracy()


    def predict_pre(self, images, training):
        """ Predicts the probability of each class, based on the input sample.
            
            Args:
                images: 4D tensor. Either an image or a batch of images.
                training: Boolean. Either the network is predicting in
                          training mode or not.
        """
        images = tf.cast(images, dtype=tf.float32)
        x = self.conv1(images)
        x = self.bn(x, training)
        x = tf.nn.relu(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.bn(x, training)
        x = tf.nn.relu(x)
        x = self.pool1(x)
        x = self.conv3(x)
        x = self.bn(x, training)
        x = tf.nn.relu(x)
        x = self.pool1(x)
        x = self.conv4(x)
        x = self.bn(x, training)
        x = tf.nn.relu(x)
        x = self.pool1(x)
        x = self.conv8(x)
        #x = tf.nn.relu(x)
        #print(x.shape)
        x = tf.reshape(x, (-1, 1, 10))
        #x = tf.keras.layers.Flatten(x)
        return x

    def predict_post(self, images, training):
        """ Predicts the probability of each class, based on the input sample.
            
            Args:
                images: 4D tensor. Either an image or a batch of images.
                training: Boolean. Either the network is predicting in
                          training mode or not.
        """
        images = tf.cast(images, dtype=tf.float32)
        x = self.conv1(images)
        x = tf.nn.relu(x)
        x = self.bn(x, training)
        x = self.pool1(x)
        x = self.conv2(x)
        x = tf.nn.relu(x)
        x = self.bn(x, training)
        x = self.pool1(x)
        x = self.conv3(x)
        x = tf.nn.relu(x)
        x = self.bn(x, training)
        x = self.pool1(x)
        x = self.conv4(x)
        x = tf.nn.relu(x)
        x = self.bn(x, training)
        x = self.pool1(x)
        x = self.conv8(x)
        #x = tf.nn.relu(x)
        #print(x.shape)
        x = tf.reshape(x, (-1, 1, 10))
        #x = tf.keras.layers.Flatten(x)
        return x

    def bn(self, X, training):
      decay =0.999
      eps = 0.0001
      # if training:
      mu = tf.reduce_mean(X, [0])
      sigma = tf.reduce_mean(tf.square(X-mu),[0])
        # tf.compat.v1.assign(self.test_mean, self.test_mean * decay + mu * (1 - decay))
        # tf.compat.v1.assign(self.test_std, self.test_std * decay + sigma * (1 - decay))
      h1_hat = (X-mu)/(tf.sqrt(sigma+eps))
      # else:
      #   h1_hat = (X-self.test_mean)/(tf.sqrt(self.test_std+eps))
      h1_out = self.gamma * h1_hat + self.beta
      return h1_out

    def loss_fn(self, images, target, training, method):
        """ Defines the loss function used during 
            training.         
        """
        if method == "pre":

          preds = self.predict_pre(images, training)
        else:
          preds = self.predict_post(images, training)
        #print(preds.shape)
        #print(target.shape)
        loss = tf.nn.softmax_cross_entropy_with_logits(labels=target, logits=preds)
        return loss


    def grads_fn(self, images, target, training, method):
        """ Dynamically computes the gradients of the loss value
            with respect to the parameters of the model, in each
            forward pass.
        """
        with tf.GradientTape() as tape:
            loss = self.loss_fn(images, target, training, method)
        return tape.gradient(loss, self.variables)
     

    def compute_accuracy_2(self, images, targets, training, method):
        """ Compute the accuracy on the input data.
        """
        with tf.device(self.device):
            
            # Predict the probability of each class
            if method == "pre":
              logits = self.predict_pre(images, training=False)
            else:
              logits = self.predict_post(images, training=False)
            # Select the class with the highest probability
            
            logits = tf.nn.softmax(logits)
            logits = tf.reshape(logits, [-1, 10])
            targets = tf.reshape(targets, [-1,10])
            preds = tf.argmax(logits, axis=1)
            goal = tf.argmax(targets, axis=1)
            self.acc.update_state(goal, preds)
            # Compute the accuracy
            result = self.acc.result().numpy()
        return result

  
    def fit_fc(self, training_data, eval_data, test_data, optimizer, num_epochs=500, 
            early_stopping_rounds=10, verbose=10, train_from_scratch=False):
        """ Function to train the model, using the selected optimizer and
            for the desired number of epochs. You can either train from scratch
            or load the latest model trained. Early stopping is used in order to
            mitigate the risk of overfitting the network.
            
            Args:
                training_data: the data you would like to train the model on.
                                Must be in the tf.data.Dataset format.
                eval_data: the data you would like to evaluate the model on.
                            Must be in the tf.data.Dataset format.
                optimizer: the optimizer used during training.
                num_epochs: the maximum number of iterations you would like to 
                            train the model.
                early_stopping_rounds: stop training if the loss on the eval 
                                       dataset does not decrease after n epochs.
                verbose: int. Specify how often to print the loss value of the network.
                train_from_scratch: boolean. Whether to initialize variables of the
                                    the last trained model or initialize them
                                    randomly.
        """ 
    
        # if train_from_scratch==False:
        #     self.restore_model()
        
        # Initialize best loss. This variable will store the lowest loss on the
        # eval dataset.
        best_loss = 999
        
        # Initialize classes to update the mean loss of train and eval
        train_loss = tf.keras.metrics.Mean('train_loss')
        eval_loss = tf.keras.metrics.Mean('eval_loss')
        test_loss = tf.keras.metrics.Mean('test_loss')
        acc_train = tf.keras.metrics.Mean('train_acc')
        acc_val = tf.keras.metrics.Mean('val_acc')
        acc_test = tf.keras.metrics.Mean('test_acc')
        # Initialize dictionary to store the loss history
        self.history = {}
        self.history['train_loss'] = []
        self.history['eval_loss'] = []
        self.history['test_loss'] = []
        self.history['train_acc'] = []
        self.history['val_acc'] = []
        self.history['test_acc'] = []
        
        # calculate stat for val and test

        # Begin training
        with tf.device(self.device):
            for i in tqdm(range(num_epochs)):
                # Training with gradient descent
                #training_data_x = training_data.shuffle(buffer_size=1024).batch(128)
                for step, (images, target) in enumerate(training_data):
                    grads = self.grads_fn(images, target, True, self.method)
                    optimizer.apply_gradients(zip(grads, self.variables))
                    
                # Compute the loss on the training data after one epoch
                for step, (images, target) in enumerate(training_data):
                    loss = self.loss_fn(images, target, True, self.method)
                    accuracy = self.compute_accuracy_2(images,target, True, self.method)
                    acc_train(accuracy)
                    train_loss(loss)
                self.history['train_loss'].append(train_loss.result().numpy())
                self.history['train_acc'].append(acc_train.result().numpy())
                # Reset metrics
                train_loss.reset_states()
                acc_train.reset_states()
                
                # Compute the loss on the eval data after one epoch
                for step, (images, target) in enumerate(eval_data):
                    loss = self.loss_fn(images, target, False, self.method)
                    accuracy = self.compute_accuracy_2(images,target, False, self.method)
                    acc_val(accuracy)
                    eval_loss(loss)
                self.history['eval_loss'].append(eval_loss.result().numpy())
                self.history['val_acc'].append(acc_val.result().numpy())
                # Reset metrics
                eval_loss.reset_states()
                acc_val.reset_states()
                
                # Print train and eval losses
                if (i==0) | ((i+1)%verbose==0):
                    print('Train loss at epoch %d: ' %(i+1), self.history['train_loss'][-1])
                    print('Train Acc at epoch %d: ' %(i+1), self.history['train_acc'][-1])
                    
                    print('Eval loss at epoch %d: ' %(i+1), self.history['eval_loss'][-1])
                    print('Eval Acc at epoch %d: ' %(i+1), self.history['val_acc'][-1])

                # Check for early stopping
                if self.history['eval_loss'][-1]<best_loss:
                    best_loss = self.history['eval_loss'][-1]
                    count = early_stopping_rounds
                else:
                    count -= 1
                if count==0:
                    break


            #test_loss_total = 0.0
            for step, (images, target) in enumerate(test_data):
                loss = self.loss_fn(images, target, False, self.method)
                accuracy = self.compute_accuracy_2(images,target, False, self.method)
                acc_test(accuracy)
                test_loss(loss)
            self.history['test_loss'].append(test_loss.result().numpy())
            self.history['test_acc'].append(acc_test.result().numpy())
            # Reset metrics
            test_loss.reset_states()
            acc_test.reset_states()
            print('Train loss: ', self.history['train_loss'][-1])
            print('Train Acc: ' , self.history['train_acc'][-1])
            
            print('Eval loss: ', self.history['eval_loss'][-1])
            print('Eval Acc: ' , self.history['val_acc'][-1])
            print('Test loss: ', self.history['test_loss'][-1])
            print('Test Acc: ', self.history['test_acc'][-1])            

In [4]:
# Specify the path where you want to save/restore the trained variables.
checkpoint_directory = 'models_checkpoints/mnist/'

# Use the GPU if available.
device = 'gpu:0'

# Define optimizer.
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=0.01)

# Instantiate model. This doesn't initialize the variables yet.
model = ImageRecognitionCNN(num_classes=10, device=device, 
                              checkpoint_directory=checkpoint_directory)

#model = ImageRecognitionCNN(num_classes=7, device=device)

In [5]:
# Train model
model.fit_fc(train_dataset, val_dataset, test_dataset, optimizer, num_epochs=10, 
          early_stopping_rounds=2, verbose=1, train_from_scratch=True)

 10%|█         | 1/10 [00:32<04:50, 32.28s/it]

Train loss at epoch 1:  0.3373233
Train Acc at epoch 1:  0.87015057
Eval loss at epoch 1:  0.36896893
Eval Acc at epoch 1:  0.8763949


 20%|██        | 2/10 [00:53<03:25, 25.69s/it]

Train loss at epoch 2:  0.26057258
Train Acc at epoch 2:  0.8826627
Eval loss at epoch 2:  0.3134951
Eval Acc at epoch 2:  0.8888966


 30%|███       | 3/10 [01:15<02:48, 24.11s/it]

Train loss at epoch 3:  0.24571805
Train Acc at epoch 3:  0.891399
Eval loss at epoch 3:  0.30624536
Eval Acc at epoch 3:  0.89476


 40%|████      | 4/10 [01:36<02:17, 22.90s/it]

Train loss at epoch 4:  0.20075168
Train Acc at epoch 4:  0.8978977
Eval loss at epoch 4:  0.27119428
Eval Acc at epoch 4:  0.9016436


 50%|█████     | 5/10 [01:57<01:51, 22.28s/it]

Train loss at epoch 5:  0.2000532
Train Acc at epoch 5:  0.903661
Eval loss at epoch 5:  0.28234324
Eval Acc at epoch 5:  0.90612346


 50%|█████     | 5/10 [02:20<02:20, 28.08s/it]

Train loss at epoch 6:  0.18031551
Train Acc at epoch 6:  0.90784
Eval loss at epoch 6:  0.28074485
Eval Acc at epoch 6:  0.9099714





Train loss:  0.18031551
Train Acc:  0.90784
Eval loss:  0.28074485
Eval Acc:  0.9099714
Test loss:  0.2957353
Test Acc:  0.9097773
