Group Details:

*   Janusz Feigel
*   Bhavana Malla
*   Brinda Rao

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
os.chdir('/content/drive/MyDrive/Colab Notebooks/Introduction to Deep Learning')
os.getcwd()

'/content/drive/MyDrive/Colab Notebooks/Introduction to Deep Learning'

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from datasets import MNISTDataset
from tensorflow.keras.utils import to_categorical
from tensorflow import keras
from tensorflow.keras import layers
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from keras import backend as k
from tensorflow.keras.layers import Dropout
from tensorflow.keras.optimizers import SGD

In [None]:
def load_dataset_mnist():
	# load dataset
  
  (trainX, trainY), (testX, testY) = mnist.load_data()
	  # reshape dataset to have a single channel
  if k.image_data_format() == 'channels_first':
   trainX = trainX.reshape(trainX.shape[0], 1, img_rows, img_cols)
   testX = testX.reshape(testX.shape[0], 1, img_rows, img_cols)
   inpx = (1, img_rows, img_cols)
  else:
   trainX = trainX.reshape(trainX.shape[0], img_rows, img_cols, 1)
   testX = testX.reshape(testX.shape[0], img_rows, img_cols, 1)
   inpx = (img_rows, img_cols, 1)
	  # one hot encode target values
  #trainY = to_categorical(trainY,10)
  #testY = to_categorical(testY,10)
  return inpx,trainX, trainY, testX, testY

def prepare_dataset(trainX, trainY, testX, testY):

# Prepare the training dataset.
  train_dataset = tf.data.Dataset.from_tensor_slices((trainX, trainY))
  train_dataset = train_dataset.shuffle(buffer_size=1024).batch(128)

#Prepare the test dataset
  test_dataset =  tf.data.Dataset.from_tensor_slices((testX, testY))
  test_dataset = test_dataset.batch(128)

  return train_dataset,test_dataset

def prep_pixels(train, test):
	# convert from integers to floats
	train_norm = train.astype('float32')
	test_norm = test.astype('float32')
	# normalize to range 0-1
	train_norm = train_norm / 255.0
	test_norm = test_norm / 255.0
	# return normalized images
	return train_norm, test_norm

def define_model_mnist():
  model = Sequential()
  model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', input_shape=(28,28,1)))
  model.add(MaxPooling2D((2, 2),padding='same'))
  model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
  model.add(MaxPooling2D((2, 2),padding='same'))
  model.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
  model.add(MaxPooling2D((2, 2),padding='same'))
  model.add(Flatten())
  model.add(Dropout(0.4))
  model.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
  model.add(Dense(10, activation='softmax'))
  return model

In [None]:
	# compile model
	opt = keras.optimizers.Adam(learning_rate=1e-3)
	loss_fn = keras.losses.SparseCategoricalCrossentropy()

# MNIST **Dataset**

In [None]:
# load dataset
img_rows, img_cols=28, 28
mnist = tf.keras.datasets.mnist
inpx, trainX, trainY, testX, testY = load_dataset_mnist()
#prepare dataset
train_dataset,test_dataset = prepare_dataset(trainX, trainY, testX, testY)
# prepare pixel data
trainX, testX = prep_pixels(trainX, testX)
# define model
model = define_model_mnist()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [None]:
# Prepare the metrics.
train_acc_metric = keras.metrics.SparseCategoricalAccuracy()
val_acc_metric = keras.metrics.SparseCategoricalAccuracy()

In [None]:
@tf.function
def train_step(x, y):
    with tf.GradientTape() as tape:
        logits = model(x, training=True)
        loss_value = loss_fn(y, logits)
    grads = tape.gradient(loss_value, model.trainable_weights)
    opt.apply_gradients(zip(grads, model.trainable_weights))
    train_acc_metric.update_state(y, logits)
    return loss_value

@tf.function
def test_step(x, y):
    test_logits = model(x, training=False)
    test_acc_metric.update_state(y, test_logits)

In [None]:
import time

epochs = 25
for epoch in range(epochs):
    print("\nStart of epoch %d" % (epoch,))
    start_time = time.time()

    # Iterate over the batches of the dataset.
    for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
        loss_value = train_step(x_batch_train, y_batch_train)

        # Log every 200 batches.
        if step % 200 == 0:
            print(
                "Training loss (for one batch) at step %d: %.4f"
                % (step, float(loss_value))
            )

    # Display metrics at the end of each epoch.
    train_acc = train_acc_metric.result()
    print("Training acc over epoch: %.4f" % (float(train_acc),))

    # Reset training metrics at the end of each epoch
    train_acc_metric.reset_states()

    # Run a validation loop at the end of each epoch.
    for x_batch_val, y_batch_val in test_dataset:
        test_step(x_batch_val, y_batch_val)

    test_acc = test_acc_metric.result()
    test_acc_metric.reset_states()
print("Validation acc: %.4f" % (float(test_acc),))
print("Time taken: %.2fs" % (time.time() - start_time))


Start of epoch 0
Training loss (for one batch) at step 0: 0.0056
Training loss (for one batch) at step 200: 0.0805
Training loss (for one batch) at step 400: 0.0023
Training acc over epoch: 0.9930

Start of epoch 1
Training loss (for one batch) at step 0: 0.0102
Training loss (for one batch) at step 200: 0.0206
Training loss (for one batch) at step 400: 0.1133
Training acc over epoch: 0.9932

Start of epoch 2
Training loss (for one batch) at step 0: 0.0049
Training loss (for one batch) at step 200: 0.0045
Training loss (for one batch) at step 400: 0.0503
Training acc over epoch: 0.9921

Start of epoch 3
Training loss (for one batch) at step 0: 0.0397
Training loss (for one batch) at step 200: 0.0063
Training loss (for one batch) at step 400: 0.0030
Training acc over epoch: 0.9933

Start of epoch 4
Training loss (for one batch) at step 0: 0.0108
Training loss (for one batch) at step 200: 0.0290
Training loss (for one batch) at step 400: 0.0001
Training acc over epoch: 0.9933

Start of 

Using SGD the o/p after two epochs:

Training acc over epoch: 0.9381
Validation acc: 0.9495

# CIFAR **Dataset**

In [None]:
from keras.datasets import cifar10
def load_dataset_cifar():
  # load dataset
  (trainX, trainY), (testX, testY) = cifar10.load_data()
	# one hot encode target values
  #trainY = to_categorical(trainY)
  #testY = to_categorical(testY)
  return trainX, trainY, testX, testY


def prepare_dataset(trainX, trainY, testX, testY):

# Prepare the training dataset.
  train_dataset = tf.data.Dataset.from_tensor_slices((trainX, trainY))
  train_dataset = train_dataset.shuffle(buffer_size=50000).batch(128)

#Prepare the test dataset
  test_dataset =  tf.data.Dataset.from_tensor_slices((testX, testY))
  test_dataset = test_dataset.batch(128)

  return train_dataset,test_dataset

def prep_pixels(train, test):
	# convert from integers to floats
	train_norm = train.astype('float32')
	test_norm = test.astype('float32')
	# normalize to range 0-1
	train_norm = train_norm / 255.0
	test_norm = test_norm / 255.0
	# return normalized images
	return train_norm, test_norm

def define_model_cifar():
 
  model = Sequential()
  model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', input_shape=(32,32,3)))
  model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
  model.add(MaxPooling2D((2, 2)))
  model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
  model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
  model.add(MaxPooling2D((2, 2)))
  model.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
  model.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
  model.add(MaxPooling2D((2, 2)))
  model.add(Flatten())
  model.add(Dropout(0.4))
  model.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
  model.add(Dense(10, activation='softmax'))
  return model

def define_model2():

  data_augmentation = keras.Sequential(
  [layers.RandomFlip("horizontal", input_shape=(32,32,3)),
   layers.RandomRotation(0.1),
   layers.RandomZoom(0.1)]
  )


  model = tf.keras.Sequential(
    [data_augmentation,
     layers.Input(shape=(32,32,3)),
     layers.Conv2D(64, 7, padding='same', activation='relu'),
     layers.MaxPooling2D(padding='same'),
     layers.Conv2D(128, 5, padding='same', activation='relu'),
     layers.MaxPooling2D(padding='same'),
     layers.Conv2D(128, 5, padding='same', activation='relu'),
     layers.MaxPooling2D(padding='same'),
     layers.Conv2D(256, 3, padding='same', activation='relu'),
     layers.MaxPooling2D(padding='same'),
     layers.Conv2D(256, 3, padding='same', activation='relu'),
     layers.MaxPooling2D(padding='same'),
     layers.Conv2D(512, 3, padding='same', activation='relu'),
     layers.MaxPooling2D(padding='same'),
     layers.Conv2D(512, 3, padding='same', activation='relu'),
     layers.MaxPooling2D(padding='same'),
     layers.Flatten(),
     layers.Dropout(.2),
     layers.Dense(512, activation='relu'),
     layers.Dense(10)     
    ]
  )
  return model

@tf.function
def train_step(x, y):
    with tf.GradientTape() as tape:
        logits = model(x, training=True)
        loss_value = loss_fn(y, logits)
    grads = tape.gradient(loss_value, model.trainable_weights)
    opt.apply_gradients(zip(grads, model.trainable_weights))
    train_acc_metric.update_state(y, logits)
    return loss_value
    
@tf.function
def test_step(x, y):
    test_logits = model(x, training=False)
    test_acc_metric.update_state(y, test_logits)

In [None]:
# load dataset
trainX, trainY, testX, testY = load_dataset_cifar()
#prepare dataset
train_dataset,test_dataset = prepare_dataset(trainX, trainY, testX, testY)
# prepare pixel data
trainX, testX = prep_pixels(trainX, testX)
# define model
model = define_model_cifar()
# compile model
opt = keras.optimizers.Adam(learning_rate=1e-3)
loss_fn = keras.losses.SparseCategoricalCrossentropy()

In [None]:
# Prepare the metrics.
train_acc_metric = keras.metrics.SparseCategoricalAccuracy()
test_acc_metric = keras.metrics.SparseCategoricalAccuracy()

In [None]:
import time

epochs = 50
for epoch in range(epochs):
    print("\nStart of epoch %d" % (epoch,))
    start_time = time.time()

    # Iterate over the batches of the dataset.
    for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
        loss_value = train_step(x_batch_train, y_batch_train)

        # Log every 200 batches.
        if step % 200 == 0:
            print(
                "Training loss (for one batch) at step %d: %.4f"
                % (step, float(loss_value))
            )

    # Display metrics at the end of each epoch.
    train_acc = train_acc_metric.result()
    print("Training acc over epoch: %.4f" % (float(train_acc),))

    # Reset training metrics at the end of each epoch
    train_acc_metric.reset_states()

    # Run a validation loop at the end of each epoch.
    for x_batch_val, y_batch_val in test_dataset:
        test_step(x_batch_val, y_batch_val)

    val_acc = test_acc_metric.result()
    test_acc_metric.reset_states()
print("Test acc: %.4f" % (float(val_acc),))
print("Time taken: %.2fs" % (time.time() - start_time))


Start of epoch 0
Training loss (for one batch) at step 0: 575.9200
Training loss (for one batch) at step 200: 1.8255
Training acc over epoch: 0.2700

Start of epoch 1
Training loss (for one batch) at step 0: 1.5836
Training loss (for one batch) at step 200: 1.7260
Training acc over epoch: 0.4183

Start of epoch 2
Training loss (for one batch) at step 0: 1.5391
Training loss (for one batch) at step 200: 1.3596
Training acc over epoch: 0.4869

Start of epoch 3
Training loss (for one batch) at step 0: 1.3958
Training loss (for one batch) at step 200: 1.3891
Training acc over epoch: 0.5327

Start of epoch 4
Training loss (for one batch) at step 0: 1.1006
Training loss (for one batch) at step 200: 1.1597
Training acc over epoch: 0.5678

Start of epoch 5
Training loss (for one batch) at step 0: 1.0242
Training loss (for one batch) at step 200: 1.0455
Training acc over epoch: 0.5955

Start of epoch 6
Training loss (for one batch) at step 0: 1.1376
Training loss (for one batch) at step 200: 1

## **Experiments**

## **For CIFAR Dataset**

Accuracy using different dropout values
**dropout -0.2**
Training acc over epoch: **0.8738**
Validation acc: **0.7098**

**dropout-0.3**
Training acc over epoch: **0.8369**
Test acc: **0.7333**

**dropout-0.4**---model with 3 convolutional layer
Training acc over epoch: **0.9114**
Test acc: **0.7613**



1.   When using dropout after maxpooling, the accuracy is very low, as low as 20%

2.   We've had much overfitting with around 30 % better train accuracy than test accuracy, so we added dropout and data augmentation. The results were a little worse for train accuracy, but asignificantly better test accuracy, which was around 20% higher than before.  

1.   We tried different layer sizes and came to the conclusion, that more layers help a little bit.  

2.   We tried different output channel sizes and came to the conclusion, that more output channels help a little bit

1.   We tried different kernel sizes and had some good results with first 7 then 5 then for the remaining layers 3, as the kernel size.








# **For MNIST Dataset**
1.   For (1,1) filter size,the train accuracy is 86% and the test accuracy is 88%
2.   For (28,28) filter size,the train accuracy is 98% and the test accuracy is around 98%





**Questions**



1. Where to use the dropout ??
2. What is best dropout to use?


