### Mounting drive on Colab

In [1]:
from google.colab import drive
drive.mount('/gdrive', force_remount = True)

Mounted at /gdrive


###Libraries

In [2]:
!pip install tensorflow_addons

Collecting tensorflow_addons
  Downloading tensorflow_addons-0.16.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 4.2 MB/s 
Installing collected packages: tensorflow-addons
Successfully installed tensorflow-addons-0.16.1


In [3]:
import os
import cv2
import os
import h5py
import numpy as np
import pickle
from tqdm import tqdm
from PIL import Image
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow_addons.metrics import HammingLoss
from tensorflow_addons.metrics import F1Score
os.chdir('/gdrive/My Drive/')
from numpy import savez_compressed
import matplotlib.pyplot as plt
from PIL import Image, ImageEnhance
import tensorflow_datasets as tfds
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras import layers, models
import tensorflow.keras as keras
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from keras.models import Sequential, load_model
from keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Dropout
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers
from tensorflow.keras import Model

## Effects of changing Batch Size



---


Experimental Setup:

Following are the details of the experimental setup for the above-mentioned experiment:

Model used: ResNet50

Note: In order to make sure even though a range of batch sizes were used for this experiment all the models were trained for the same number of steps the epochs were changed accordingly.

For Ex: 

        Batch sizes used: [12, 24, 36, 48, 60, 72, 84, 96, 108, 120]

        Number of steps: 2000

        Given the relationship:

        Number of steps = (Dataset Size/ Batch Size) * Number of Epochs

        Therefore, if: Dataset Size = 24,000

        Batch Size = 12

        Number of Steps = 24,000/12 * epochs = 2,000 * epochs

        Number of Epochs = 1

        Dataset Size = 24,000

        Batch Size = 24

        Number of Steps = 24,000/24 * epochs = 1000 * epochs

        Number of Epochs = 2

        Therefore, Epochs: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

### For ImageNet as a source

In [5]:
from tensorflow.keras.applications.resnet50 import ResNet50

from timeit import default_timer as timer

class TimingCallback(keras.callbacks.Callback):
    def __init__(self, logs={}):
        self.logs=[]
    def on_epoch_begin(self, epoch, logs={}):
        self.starttime = timer()
    def on_epoch_end(self, epoch, logs={}):
        self.logs.append(timer()-self.starttime)

cb = TimingCallback()


pre_trained_model = ResNet50(input_shape = (96, 96, 3), 
                                include_top = False, 
                                weights = None)

#for layer in pre_trained_model.layers:
#  layer.trainable = False

In [6]:
def compile_model():
  from tensorflow.keras.optimizers import RMSprop

  # Flatten the output layer to 1 dimension
  x = layers.Flatten()(pre_trained_model.output)
  # Add a fully connected layer with 2000 hidden units and ReLU activation
  x = layers.Dense(2000, activation='relu')(x)
  # Add a fully connected layer with 2000 hidden units and ReLU activation
  x = layers.Dense(1000, activation='relu')(x)
  # Add a fully connected layer with 2000 hidden units and ReLU activation
  x = layers.Dense(500, activation='relu')(x)                  
  # Add a final softmax layer for classification
  x = layers.Dense(15, activation='softmax')(x)           

  model = Model( pre_trained_model.input, x) 

  model.compile(optimizer = RMSprop(lr=0.0001), 
              loss = 'categorical_crossentropy', 
              metrics = ['acc', tf.keras.metrics.AUC(), tf.keras.metrics.Recall(), tf.keras.metrics.Precision(), HammingLoss(threshold=0.5, mode='multiclass'), F1Score(num_classes= 15, threshold=0.5)])
  return model

In [7]:
data = np.load('Xtrain.npy', allow_pickle=True)
labels = np.load('ytrain.npy', allow_pickle=True)

X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size = 0.2, random_state = 42, stratify = labels)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size = 0.5, random_state = 42, stratify = y_test)

In [8]:
y_train = to_categorical(y_train, 15)
y_val = to_categorical(y_val, 15)
y_test = to_categorical(y_test, 15)

In [9]:
print("*********** Fitting model for 12 batch size and for 1 epochs")
model = compile_model()
model.fit(X_train, y_train, batch_size = 12, epochs=1, validation_data=(X_val, y_val), callbacks = [cb])
test_acc = model.evaluate(X_test, y_test)
print("***********test accuracy is", test_acc,"***************")
print("******************* Train time is", sum(cb.logs), "***********")
print("f1 score is", sum(test_acc[6])/15)

*********** Fitting model for 12 batch size and for 1 epochs


  super(RMSprop, self).__init__(name, **kwargs)


***********test accuracy is [2.271965265274048, 0.2150000035762787, 0.7888017892837524, 0.024666666984558105, 0.4327485263347626, 0.9753333330154419, array([0.06666667, 0.        , 0.        , 0.08064517, 0.00985222,
       0.09433962, 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.07373272, 0.2413793 , 0.        , 0.08372093],
      dtype=float32)] ***************
******************* Train time is 500.4122308640001 ***********
f1 score is 0.04335577543824911


In [10]:
print("*********** Fitting model for 24 batch size and for 2 epochs")
model = compile_model()
model.fit(X_train, y_train, batch_size = 24, epochs=2, validation_data=(X_val, y_val), callbacks = [cb])
test_acc = model.evaluate(X_test, y_test)
print("***********test accuracy is", test_acc,"***************")
print("******************* Train time is", sum(cb.logs), "***********")
print("f1 score is", sum(test_acc[6])/15)

*********** Fitting model for 24 batch size and for 2 epochs


  super(RMSprop, self).__init__(name, **kwargs)


Epoch 1/2
Epoch 2/2
***********test accuracy is [1.0103740692138672, 0.6606666445732117, 0.9628415703773499, 0.5889999866485596, 0.7538396120071411, 0.41100001335144043, array([0.4042553 , 0.60714287, 0.6466166 , 0.7027027 , 0.77157354,
       0.8463612 , 0.50167227, 0.60377353, 0.6138614 , 0.7235142 ,
       0.379845  , 0.77134985, 0.884058  , 0.32520327, 0.7888889 ],
      dtype=float32)] ***************
******************* Train time is 1084.627522502 ***********
f1 score is 0.6380545715490977


In [11]:
print("*********** Fitting model for 36 batch size and for 3 epochs")
model = compile_model()
model.fit(X_train, y_train, batch_size = 36, epochs=3, validation_data=(X_val, y_val), callbacks = [cb])
test_acc = model.evaluate(X_test, y_test)
print("***********test accuracy is", test_acc,"***************")
print("******************* Train time is", sum(cb.logs), "***********")
print("f1 score is", sum(test_acc[6])/15)

*********** Fitting model for 36 batch size and for 3 epochs


  super(RMSprop, self).__init__(name, **kwargs)


Epoch 1/3
Epoch 2/3
Epoch 3/3
***********test accuracy is [4.017659664154053, 0.3840000033378601, 0.7652795314788818, 0.37033334374427795, 0.4077064096927643, 0.6296666860580444, array([0.4090909 , 0.56779665, 0.36567163, 0.36947793, 0.38345864,
       0.29835212, 0.41221377, 0.25514406, 0.41575494, 0.2682927 ,
       0.2903226 , 0.46879753, 0.4367816 , 0.31199998, 0.49417248],
      dtype=float32)] ***************
******************* Train time is 1757.8767792060003 ***********
f1 score is 0.3831551690896352


In [12]:
print("*********** Fitting model for 48 batch size and for 4 epochs")
model = compile_model()
model.fit(X_train, y_train, batch_size = 48, epochs=4, validation_data=(X_val, y_val), callbacks = [cb])
test_acc = model.evaluate(X_test, y_test)
print("***********test accuracy is", test_acc,"***************")
print("******************* Train time is", sum(cb.logs), "***********")
print("f1 score is", sum(test_acc[6])/15)

*********** Fitting model for 48 batch size and for 4 epochs


  super(RMSprop, self).__init__(name, **kwargs)


Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
***********test accuracy is [1.5368033647537231, 0.6626666784286499, 0.9263513088226318, 0.6349999904632568, 0.712415874004364, 0.36500000953674316, array([0.668693  , 0.70940167, 0.5559106 , 0.7431694 , 0.77472526,
       0.62454873, 0.735955  , 0.58709675, 0.5822102 , 0.64819944,
       0.514658  , 0.7726161 , 0.7880597 , 0.65045595, 0.67330676],
      dtype=float32)] ***************
******************* Train time is 2520.1151498780005 ***********
f1 score is 0.6686004360516866


In [13]:
print("*********** Fitting model for 60 batch size and for 5 epochs")
model = compile_model()
model.fit(X_train, y_train, batch_size = 60, epochs=5, validation_data=(X_val, y_val), callbacks = [cb])
test_acc = model.evaluate(X_test, y_test)
print("***********test accuracy is", test_acc,"***************")
print("******************* Train time is", sum(cb.logs), "***********")
print("f1 score is", sum(test_acc[6])/15)

*********** Fitting model for 60 batch size and for 5 epochs


  super(RMSprop, self).__init__(name, **kwargs)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
***********test accuracy is [0.6785600185394287, 0.8529999852180481, 0.9711548686027527, 0.8420000076293945, 0.8632946014404297, 0.15800000727176666, array([0.79017854, 0.83425415, 0.8418231 , 0.8733031 , 0.8571429 ,
       0.9347259 , 0.79069763, 0.81994456, 0.84705883, 0.8787879 ,
       0.8105263 , 0.9164557 , 0.90452266, 0.7710145 , 0.9226803 ],
      dtype=float32)] ***************
******************* Train time is 3363.146255004 ***********
f1 score is 0.8528744061787923


In [14]:
print("*********** Fitting model for 72 batch size and for 6 epochs")
model = compile_model()
model.fit(X_train, y_train, batch_size = 72, epochs=6, validation_data=(X_val, y_val), callbacks = [cb])
test_acc = model.evaluate(X_test, y_test)
print("***********test accuracy is", test_acc,"***************")
print("******************* Train time is", sum(cb.logs), "***********")
print("f1 score is", sum(test_acc[6])/15)

*********** Fitting model for 72 batch size and for 6 epochs


  super(RMSprop, self).__init__(name, **kwargs)


Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
***********test accuracy is [1.4021908044815063, 0.7630000114440918, 0.9398919939994812, 0.7519999742507935, 0.7790055274963379, 0.24799999594688416, array([0.8144045 , 0.79139787, 0.6875    , 0.8222811 , 0.8       ,
       0.70289856, 0.7732697 , 0.68085104, 0.6941431 , 0.8019802 ,
       0.6433121 , 0.86734694, 0.88220555, 0.74233127, 0.7597254 ],
      dtype=float32)] ***************
******************* Train time is 4303.073661699002 ***********
f1 score is 0.7642431577046712


In [15]:
print("*********** Fitting model for 84 batch size and for 7 epochs")
model = compile_model()
model.fit(X_train, y_train, batch_size = 84, epochs=7, validation_data=(X_val, y_val), callbacks = [cb])
test_acc = model.evaluate(X_test, y_test)
print("***********test accuracy is", test_acc,"***************")
print("******************* Train time is", sum(cb.logs), "***********")
print("f1 score is", sum(test_acc[6])/15)

*********** Fitting model for 84 batch size and for 7 epochs


  super(RMSprop, self).__init__(name, **kwargs)


Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
***********test accuracy is [0.6951754689216614, 0.8556666374206543, 0.9717326164245605, 0.8483333587646484, 0.869490921497345, 0.15166667103767395, array([0.86352354, 0.86233765, 0.8190955 , 0.79999995, 0.89247316,
       0.9414758 , 0.87027025, 0.7888889 , 0.8716706 , 0.9023747 ,
       0.8000001 , 0.8946015 , 0.8872549 , 0.79444444, 0.9017632 ],
      dtype=float32)] ***************
******************* Train time is 5327.043100336003 ***********
f1 score is 0.8593449433644612


In [16]:
print("*********** Fitting model for 96 batch size and for 8 epochs")
model = compile_model()
model.fit(X_train, y_train, batch_size = 96, epochs=8, validation_data=(X_val, y_val), callbacks = [cb])
test_acc = model.evaluate(X_test, y_test)
print("***********test accuracy is", test_acc,"***************")
print("******************* Train time is", sum(cb.logs), "***********")
print("f1 score is", sum(test_acc[6])/15)

*********** Fitting model for 96 batch size and for 8 epochs


  super(RMSprop, self).__init__(name, **kwargs)


Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
***********test accuracy is [0.6648156046867371, 0.8533333539962769, 0.9743264317512512, 0.8420000076293945, 0.8728403449058533, 0.15800000727176666, array([0.8508288 , 0.859155  , 0.7799443 , 0.8173077 , 0.8624339 ,
       0.9275362 , 0.8585131 , 0.8179419 , 0.85238093, 0.8729282 ,
       0.8146341 , 0.87362635, 0.9381443 , 0.8071066 , 0.91851854],
      dtype=float32)] ***************
******************* Train time is 6454.468905022001 ***********
f1 score is 0.8567333340644836


In [17]:
print("*********** Fitting model for 108 batch size and for 9 epochs")
model = compile_model()
model.fit(X_train, y_train, batch_size = 108, epochs=9, validation_data=(X_val, y_val), callbacks = [cb])
test_acc = model.evaluate(X_test, y_test)
print("***********test accuracy is", test_acc,"***************")
print("******************* Train time is", sum(cb.logs), "***********")
print("f1 score is", sum(test_acc[6])/15)

*********** Fitting model for 108 batch size and for 9 epochs


  super(RMSprop, self).__init__(name, **kwargs)


Epoch 1/9
Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
***********test accuracy is [8.64410400390625, 0.4663333296775818, 0.7474796175956726, 0.46266666054725647, 0.47146740555763245, 0.5373333096504211, array([0.40800002, 0.72727275, 0.3809524 , 0.6089552 , 0.61077845,
       0.32233742, 0.5810056 , 0.2672414 , 0.44124705, 0.29661018,
       0.4876325 , 0.38434985, 0.6688103 , 0.59078586, 0.5182482 ],
      dtype=float32)] ***************
******************* Train time is 7654.863783961001 ***********
f1 score is 0.48628181020418804


In [18]:
print("*********** Fitting model for 120 batch size and for 10 epochs")
model = compile_model()
model.fit(X_train, y_train, batch_size = 120, epochs=10, validation_data=(X_val, y_val), callbacks = [cb])
test_acc = model.evaluate(X_test, y_test)
print("***********test accuracy is", test_acc,"***************")
print("******************* Train time is", sum(cb.logs), "***********")
print("f1 score is", sum(test_acc[6])/15)

*********** Fitting model for 120 batch size and for 10 epochs


  super(RMSprop, self).__init__(name, **kwargs)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
***********test accuracy is [0.6716787815093994, 0.8939999938011169, 0.9720393419265747, 0.8923333287239075, 0.8995295763015747, 0.10766666382551193, array([0.8743169 , 0.8779221 , 0.87677723, 0.84412473, 0.8817204 ,
       0.9722922 , 0.8811189 , 0.9017632 , 0.91183877, 0.9363868 ,
       0.86699504, 0.9349398 , 0.9316456 , 0.849642  , 0.9016393 ],
      dtype=float32)] ***************
******************* Train time is 8959.172264169 ***********
f1 score is 0.8962081948916117
