###  Students
- ####  Tony James (Wednesday batch)
- ####  Mutum Jagatchandra (Wednesday batch)

In [1]:
#!pip install tqdm

In [2]:
!sudo nvidia-persistenced
!sudo nvidia-smi -ac 877,1530

nvidia-persistenced failed to initialize. Check syslog for more details.
Applications clocks set to "(MEM 877, SM 1530)" for GPU 00000000:00:04.0
All done.


##  What changes we did here

- batch size  = 4096
- lr          = 0.8
- Conv --> BN --> Relu --> MaxPool changed to Conv --> MaxPool --> BN --> Relu
    - is computation will be reduced
- converted input tf.float16 type
- before entorpy calculation h is converted to th.float32
- removed val_acc calculation for every epoch
    - this is not part of training

### Result

- val acc: 0.9005
- time taken for 24 epochs - 202.61555981636047

-  'epoch: 24 lr: 0.0 train loss: 0.07802369079589844 train acc: 0.97688 val loss: 0.31441468505859377 val acc: 0.9005 time: 202.61555981636047'

In [3]:
import numpy as np
import time, math
from tqdm import tqdm_notebook as tqdm

import tensorflow as tf
import tensorflow.contrib.eager as tfe
import os

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [4]:
os.environ['TF_ENABLE_AUTO_MIXED_PRECISION'] = '1'

In [5]:
tf.keras.backend.set_floatx('float16')

In [6]:
tf.__version__

'1.14.0'

In [7]:
tf.enable_eager_execution()

In [8]:
device_name = tf.test.gpu_device_name()
if not tf.test.is_gpu_available():
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [9]:
# Load the TensorBoard notebook extension.
%load_ext tensorboard

In [10]:
BATCH_SIZE = 4096 #@param {type:"integer"}
MOMENTUM = 0.9 #@param {type:"number"}
LEARNING_RATE = 0.8 #@param {type:"number"}
WEIGHT_DECAY = 5e-4 #@param {type:"number"}
EPOCHS = 24 #@param {type:"integer"}

In [11]:
def init_pytorch(shape, dtype=tf.float32, partition_info=None):
  fan = np.prod(shape[:-1])
  bound = 1 / math.sqrt(fan)
  return tf.random.uniform(shape, minval=-bound, maxval=bound, dtype=dtype)

In [12]:
class ConvBN(tf.keras.Model):
  def __init__(self, c_out):
    super().__init__()
    self.conv = tf.keras.layers.Conv2D(filters=c_out, kernel_size=3, padding="SAME", kernel_initializer=init_pytorch, use_bias=False)
    self.bn = tf.keras.layers.BatchNormalization(momentum=0.9, epsilon=1e-5)

  def call(self, inputs):
    return tf.nn.relu(self.bn(self.conv(inputs)))

In [13]:
class ConvBNPool(tf.keras.Model):
  def __init__(self, c_out):
    super().__init__()
    self.conv = tf.keras.layers.Conv2D(filters=c_out, kernel_size=3, padding="SAME", kernel_initializer=init_pytorch, use_bias=False)
    self.bn = tf.keras.layers.BatchNormalization(momentum=0.9, epsilon=1e-5)
    self.pool = tf.keras.layers.MaxPooling2D()
  def call(self, inputs):
    return tf.nn.relu(self.bn(self.pool(self.conv(inputs))))

In [14]:
class ResBlk(tf.keras.Model):
  def __init__(self, c_out, pool, res = False):
    super().__init__()
    #self.conv_bn = ConvBN(c_out)
    #self.pool = pool
    self.pool_out = ConvBNPool(c_out)
    self.res = res
    if self.res:
      self.res1 = ConvBN(c_out)
      self.res2 = ConvBN(c_out)

  def call(self, inputs):
    #h = self.pool(self.conv_bn(inputs))
    h = self.pool_out(inputs)
    if self.res:
      h = h + self.res2(self.res1(h))
    return h

In [15]:
class DavidNet(tf.keras.Model):
  def __init__(self, c=64, weight=0.125):
    super().__init__()
    pool = tf.keras.layers.MaxPooling2D()
    self.init_conv_bn = ConvBN(c)
    self.blk1 = ResBlk(c*2, pool, res = True)
    self.blk2 = ResBlk(c*4, pool)
    self.blk3 = ResBlk(c*8, pool, res = True)
    self.pool = tf.keras.layers.GlobalMaxPool2D()
    self.linear = tf.keras.layers.Dense(10, kernel_initializer=init_pytorch, use_bias=False)
    self.weight = weight

  def call(self, x, y):
    #x = tf.cast(x, tf.float16)
    h = self.pool(self.blk3(self.blk2(self.blk1(self.init_conv_bn(x)))))
    h = tf.cast(h, tf.float32)
    h = self.linear(h) * self.weight
    ce = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=h, labels=y)
    loss = tf.reduce_sum(ce)
    correct = tf.reduce_sum(tf.cast(tf.math.equal(tf.argmax(h, axis = 1), y), tf.float32))
    return loss, correct

In [16]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
len_train, len_test = len(x_train), len(x_test)
y_train = y_train.astype('int64').reshape(len_train)
y_test = y_test.astype('int64').reshape(len_test)

train_mean = np.mean(x_train, axis=(0,1,2))
train_std = np.std(x_train, axis=(0,1,2))

normalize = lambda x: ((x - train_mean) / train_std).astype('float32') # todo: check here
pad4 = lambda x: np.pad(x, [(0, 0), (4, 4), (4, 4), (0, 0)], mode='reflect')

x_train = normalize(pad4(x_train))
x_test = normalize(x_test)

x_train = tf.cast(x_train, tf.float16)
x_test  = tf.cast(x_test, tf.float16)

In [17]:
model = DavidNet()
batches_per_epoch = len_train//BATCH_SIZE + 1

lr_schedule = lambda t: np.interp([t], [0, (EPOCHS+1)//5, EPOCHS], [0, LEARNING_RATE, 0])[0]
global_step = tf.train.get_or_create_global_step()
lr_func = lambda: lr_schedule(global_step/batches_per_epoch)/BATCH_SIZE
opt = tf.train.MomentumOptimizer(lr_func, momentum=MOMENTUM, use_nesterov=True)
data_aug = lambda x, y: (tf.image.random_flip_left_right(tf.random_crop(x, [32, 32, 3])), y)

In [18]:
t = time.time()
test_set = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(BATCH_SIZE)

for epoch in range(EPOCHS):
  train_loss = test_loss = train_acc = test_acc = 0.0
  train_set = tf.data.Dataset.from_tensor_slices((x_train, y_train)).map(data_aug).shuffle(len_train).batch(BATCH_SIZE).prefetch(1)

  tf.keras.backend.set_learning_phase(1)
  for (x, y) in tqdm(train_set):
    with tf.GradientTape() as tape:
      loss, correct = model(x, y)

    var = model.trainable_variables
    grads = tape.gradient(loss, var)
    for g, v in zip(grads, var):
      g += v * WEIGHT_DECAY * BATCH_SIZE
    opt.apply_gradients(zip(grads, var), global_step=global_step)

    train_loss += loss.numpy()
    train_acc += correct.numpy()

  tf.keras.backend.set_learning_phase(0)
  print('epoch:', epoch+1, 'lr:', lr_schedule(epoch+1), 'train loss:', train_loss / len_train, 'train acc:', train_acc / len_train, 'val loss:', test_loss / len_test, 'val acc:', test_acc / len_test, 'time:', time.time() - t)


for (x, y) in test_set:
  loss, correct = model(x, y)
  test_loss += loss.numpy()
  test_acc += correct.numpy()
    
print('epoch:', epoch+1, 'lr:', lr_schedule(epoch+1), 'train loss:', train_loss / len_train, 'train acc:', train_acc / len_train, 'val loss:', test_loss / len_test, 'val acc:', test_acc / len_test, 'time:', time.time() - t)

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 1 lr: 0.16 train loss: 2.1309899194335937 train acc: 0.22496 val loss: 0.0 val acc: 0.0 time: 16.86810064315796


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 2 lr: 0.32 train loss: 1.7568363208007813 train acc: 0.35554 val loss: 0.0 val acc: 0.0 time: 25.11972427368164


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 3 lr: 0.48 train loss: 1.5638998559570312 train acc: 0.42562 val loss: 0.0 val acc: 0.0 time: 33.21082854270935


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 4 lr: 0.64 train loss: 1.3604082153320312 train acc: 0.51726 val loss: 0.0 val acc: 0.0 time: 41.21425819396973


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 5 lr: 0.8 train loss: 1.242136307373047 train acc: 0.5604 val loss: 0.0 val acc: 0.0 time: 49.23864722251892


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 6 lr: 0.7578947368421053 train loss: 0.9826478857421875 train acc: 0.65204 val loss: 0.0 val acc: 0.0 time: 57.29617238044739


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 7 lr: 0.7157894736842105 train loss: 0.7251234948730468 train acc: 0.7431 val loss: 0.0 val acc: 0.0 time: 65.24879026412964


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 8 lr: 0.6736842105263159 train loss: 0.614000869140625 train acc: 0.78426 val loss: 0.0 val acc: 0.0 time: 73.19326257705688


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 9 lr: 0.6315789473684211 train loss: 0.5015859729003906 train acc: 0.82526 val loss: 0.0 val acc: 0.0 time: 81.24234247207642


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 10 lr: 0.5894736842105264 train loss: 0.43268719177246096 train acc: 0.84986 val loss: 0.0 val acc: 0.0 time: 89.31560897827148


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 11 lr: 0.5473684210526316 train loss: 0.37281493774414065 train acc: 0.87078 val loss: 0.0 val acc: 0.0 time: 97.31055736541748


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 12 lr: 0.5052631578947369 train loss: 0.31778092041015626 train acc: 0.89006 val loss: 0.0 val acc: 0.0 time: 105.43039011955261


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 13 lr: 0.46315789473684216 train loss: 0.27875078369140627 train acc: 0.90488 val loss: 0.0 val acc: 0.0 time: 113.37890958786011


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 14 lr: 0.4210526315789474 train loss: 0.24332721618652345 train acc: 0.9165 val loss: 0.0 val acc: 0.0 time: 121.52454972267151


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 15 lr: 0.3789473684210527 train loss: 0.22020941772460936 train acc: 0.92496 val loss: 0.0 val acc: 0.0 time: 129.4773359298706


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 16 lr: 0.33684210526315794 train loss: 0.1922840231323242 train acc: 0.9345 val loss: 0.0 val acc: 0.0 time: 137.43982458114624


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 17 lr: 0.2947368421052632 train loss: 0.16713086364746094 train acc: 0.94384 val loss: 0.0 val acc: 0.0 time: 145.43990921974182


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 18 lr: 0.25263157894736843 train loss: 0.14942048217773438 train acc: 0.94922 val loss: 0.0 val acc: 0.0 time: 153.35033512115479


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 19 lr: 0.21052631578947378 train loss: 0.1270901643371582 train acc: 0.95778 val loss: 0.0 val acc: 0.0 time: 161.48028326034546


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 20 lr: 0.16842105263157903 train loss: 0.11443641479492188 train acc: 0.96282 val loss: 0.0 val acc: 0.0 time: 169.56586956977844


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 21 lr: 0.12631578947368427 train loss: 0.10127901947021484 train acc: 0.96776 val loss: 0.0 val acc: 0.0 time: 177.67206120491028


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 22 lr: 0.08421052631578951 train loss: 0.09050645858764648 train acc: 0.97122 val loss: 0.0 val acc: 0.0 time: 185.67810797691345


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 23 lr: 0.04210526315789476 train loss: 0.08261994720458984 train acc: 0.97506 val loss: 0.0 val acc: 0.0 time: 193.67231845855713


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 24 lr: 0.0 train loss: 0.07802369079589844 train acc: 0.97688 val loss: 0.0 val acc: 0.0 time: 201.59216713905334
epoch: 24 lr: 0.0 train loss: 0.07802369079589844 train acc: 0.97688 val loss: 0.31441468505859377 val acc: 0.9005 time: 202.61555981636047
