# CIFAR-10 using VGGNet

### Learning Activation Function used: https://arxiv.org/abs/1412.6830

#### Load dependencies

In [0]:
import numpy as np
np.random.seed(42)
import tensorflow as tf
tf.set_random_seed(42)
import matplotlib.pyplot as plt
from keras.utils import to_categorical

#### Load data

In [0]:
from tensorflow.examples.tutorials.mnist import input_data
(train_x, train_y), (test_x, test_y) = tf.keras.datasets.cifar10.load_data()

train_y = to_categorical(train_y)
test_y = to_categorical(test_y)

val_x = train_x[40000:50000]
val_y = train_y[40000:50000]

train_x = train_x[0:40000]
train_y = train_y[0:40000]

train_x = train_x/255
test_x = test_x/255
val_x = val_x/255

#### Set parameters for each layer

In [0]:
# input layer: 
n_input = 784

# first convolutional layer: 
n_conv_1 = 64
k_conv_1 = 3 # k_size

# second convolutional layer: 
n_conv_2 = 64
k_conv_2 = 3

n_conv_3 = 128
k_conv_3 = 3

n_conv_4 = 128
k_conv_4 = 3

n_conv_5 = 256
k_conv_5 = 3

n_conv_6 = 256
k_conv_6 = 3

n_conv_7 = 256
k_conv_7 = 3

n_conv_8 = 512
k_conv_8 = 3

n_conv_9 = 512
k_conv_9 = 3

n_conv_10 = 512
k_conv_10 = 3

n_conv_11 = 512
k_conv_11 = 3

n_conv_12 = 512
k_conv_12 = 3

n_conv_13 = 512
k_conv_13 = 3

n_dense = 512


# max pooling layer:
pool_size = 2
dropout_1 = 0.3
dropout_2 = 0.4
dropout_3 = 0.5

# output layer: 
n_classes = 10

In [0]:
batch_size = 128
display_progress = 40 # after this many batches, output progress to screen
wt_init = tf.contrib.layers.xavier_initializer() # weight initializer
epoch_no_change = 400 # stops training if validation loss doesn't decrease in the last "epoch_no_change" epochs
restart_training = 1

#### Define placeholder Tensors for inputs and labels

In [0]:
x = tf.placeholder(tf.float32, [None, 32,32,3])
y = tf.placeholder(tf.float32, [None, n_classes])
do_dropout = tf.placeholder(tf.float32)

In [0]:
xs_arr1 = np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,2.0],dtype=np.float32)
ys_arr1 = np.array([0.0,0.0,0.0,0.0,0.5,1.0,2.0],dtype=np.float32)
xs_arr2 = np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,2.0],dtype=np.float32)
ys_arr2 = np.array([0.0,0.0,0.0,0.0,0.5,1.0,2.0],dtype=np.float32)
xs_arr3 = np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,2.0],dtype=np.float32)
ys_arr3 = np.array([0.0,0.0,0.0,0.0,0.5,1.0,2.0],dtype=np.float32)
xs_arr4 = np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,2.0],dtype=np.float32)
ys_arr4 = np.array([0.0,0.0,0.0,0.0,0.5,1.0,2.0],dtype=np.float32)
xs_arr5 = np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,2.0],dtype=np.float32)
ys_arr5 = np.array([0.0,0.0,0.0,0.0,0.5,1.0,2.0],dtype=np.float32)
xs_arr6 = np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,2.0],dtype=np.float32)
ys_arr6 = np.array([0.0,0.0,0.0,0.0,0.5,1.0,2.0],dtype=np.float32)
xs_arr7 = np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,2.0],dtype=np.float32)
ys_arr7 = np.array([0.0,0.0,0.0,0.0,0.5,1.0,2.0],dtype=np.float32)
xs_arr8 = np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,2.0],dtype=np.float32)
ys_arr8 = np.array([0.0,0.0,0.0,0.0,0.5,1.0,2.0],dtype=np.float32)
xs_arr9 = np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,2.0],dtype=np.float32)
ys_arr9 = np.array([0.0,0.0,0.0,0.0,0.5,1.0,2.0],dtype=np.float32)
xs_arr10 = np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,2.0],dtype=np.float32)
ys_arr10 = np.array([0.0,0.0,0.0,0.0,0.5,1.0,2.0],dtype=np.float32)
xs_arr11 = np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,2.0],dtype=np.float32)
ys_arr11 = np.array([0.0,0.0,0.0,0.0,0.5,1.0,2.0],dtype=np.float32)
xs_arr12 = np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,2.0],dtype=np.float32)
ys_arr12 = np.array([0.0,0.0,0.0,0.0,0.5,1.0,2.0],dtype=np.float32)
xs_arr13 = np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,2.0],dtype=np.float32)
ys_arr13 = np.array([0.0,0.0,0.0,0.0,0.5,1.0,2.0],dtype=np.float32)
xs_arr14 = np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,2.0],dtype=np.float32)
ys_arr14 = np.array([0.0,0.0,0.0,0.0,0.5,1.0,2.0],dtype=np.float32)


nxs_arr1 = np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,2.0],dtype=np.float32)
nys_arr1 = np.array([0.0,0.0,0.0,0.0,0.5,1.0,2.0],dtype=np.float32)
nxs_arr2 = np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,2.0],dtype=np.float32)
nys_arr2 = np.array([0.0,0.0,0.0,0.0,0.5,1.0,2.0],dtype=np.float32)
nxs_arr3 = np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,2.0],dtype=np.float32)
nys_arr3 = np.array([0.0,0.0,0.0,0.0,0.5,1.0,2.0],dtype=np.float32)
nxs_arr4 = np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,2.0],dtype=np.float32)
nys_arr4 = np.array([0.0,0.0,0.0,0.0,0.5,1.0,2.0],dtype=np.float32)
nxs_arr5 = np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,2.0],dtype=np.float32)
nys_arr5 = np.array([0.0,0.0,0.0,0.0,0.5,1.0,2.0],dtype=np.float32)
nxs_arr6 = np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,2.0],dtype=np.float32)
nys_arr6 = np.array([0.0,0.0,0.0,0.0,0.5,1.0,2.0],dtype=np.float32)
nxs_arr7 = np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,2.0],dtype=np.float32)
nys_arr7 = np.array([0.0,0.0,0.0,0.0,0.5,1.0,2.0],dtype=np.float32)
nxs_arr8 = np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,2.0],dtype=np.float32)
nys_arr8 = np.array([0.0,0.0,0.0,0.0,0.5,1.0,2.0],dtype=np.float32)
nxs_arr9 = np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,2.0],dtype=np.float32)
nys_arr9 = np.array([0.0,0.0,0.0,0.0,0.5,1.0,2.0],dtype=np.float32)
nxs_arr10 = np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,2.0],dtype=np.float32)
nys_arr10 = np.array([0.0,0.0,0.0,0.0,0.5,1.0,2.0],dtype=np.float32)
nxs_arr11 = np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,2.0],dtype=np.float32)
nys_arr11 = np.array([0.0,0.0,0.0,0.0,0.5,1.0,2.0],dtype=np.float32)
nxs_arr12 = np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,2.0],dtype=np.float32)
nys_arr12 = np.array([0.0,0.0,0.0,0.0,0.5,1.0,2.0],dtype=np.float32)
nxs_arr13 = np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,2.0],dtype=np.float32)
nys_arr13 = np.array([0.0,0.0,0.0,0.0,0.5,1.0,2.0],dtype=np.float32)
nxs_arr14 = np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,2.0],dtype=np.float32)
nys_arr14 = np.array([0.0,0.0,0.0,0.0,0.5,1.0,2.0],dtype=np.float32)

xs_dict = {
    '1': xs_arr1,
    '2': xs_arr2,
    '3': xs_arr3,
    '4': xs_arr4,
    '5': xs_arr5,
    '6': xs_arr6,
    '7': xs_arr7,
    '8': xs_arr8,
    '9': xs_arr9,
    '10': xs_arr10,
    '11': xs_arr11,
    '12': xs_arr12,
    '13': xs_arr13,
    '14': xs_arr14,
}

ys_dict = {
    '1': ys_arr1,
    '2': ys_arr2,
    '3': ys_arr3,
    '4': ys_arr4,
    '5': ys_arr5,
    '6': ys_arr6,
    '7': ys_arr7,
    '8': ys_arr8,
    '9': ys_arr9,
    '10': ys_arr10,
    '11': ys_arr11,
    '12': ys_arr12,
    '13': ys_arr13,
    '14': ys_arr14,
}

nxs_dict = {
    '1': nxs_arr1,
    '2': nxs_arr2,
    '3': nxs_arr3,
    '4': nxs_arr4,
    '5': nxs_arr5,
    '6': nxs_arr6,
    '7': nxs_arr7,
    '8': nxs_arr8,
    '9': nxs_arr9,
    '10': nxs_arr10,
    '11': nxs_arr11,
    '12': nxs_arr12,
    '13': nxs_arr13,
    '14': nxs_arr14,
}

nys_dict = {
    '1': nys_arr1,
    '2': nys_arr2,
    '3': nys_arr3,
    '4': nys_arr4,
    '5': nys_arr5,
    '6': nys_arr6,
    '7': nys_arr7,
    '8': nys_arr8,
    '9': nys_arr9,
    '10': nys_arr10,
    '11': nys_arr11,
    '12': nys_arr12,
    '13': nys_arr13,
    '14': nys_arr14,
}

m_dict = {
    '1': xs_arr1,
    '2': xs_arr2,
    '3': xs_arr3,
    '4': xs_arr4,
    '5': xs_arr5,
    '6': xs_arr6,
    '7': xs_arr7,
    '8': xs_arr8,
    '9': xs_arr9,
    '10': xs_arr10,
    '11': xs_arr11,
    '12': xs_arr12,
    '13': xs_arr13,
    '14': xs_arr14,
}

c_dict = {
    '1': xs_arr1,
    '2': xs_arr2,
    '3': xs_arr3,
    '4': xs_arr4,
    '5': xs_arr5,
    '6': xs_arr6,
    '7': xs_arr7,
    '8': xs_arr8,
    '9': xs_arr9,
    '10': xs_arr10,
    '11': xs_arr11,
    '12': xs_arr12,
    '13': xs_arr13,
    '14': xs_arr14,
}


#### Define types of layers

In [0]:
# dense layer with ReLU activation:
def dense(x, W, b, s):
    x=tf.Print(x,[x],"DENSE X ")
    z = tf.add(tf.matmul(x, W), b)
    a = custom_activation(z,s)
    #a=tf.nn.relu(z)
    return a

# convolutional layer with ReLU activation:
def conv2d(x, W, b, s,stride_length=1):
    x=tf.Print(x,[x],"CONV2D X ")
    xW = tf.nn.conv2d(x, W, strides=[1, stride_length, stride_length, 1], padding='SAME')
    z = tf.nn.bias_add(xW, b)
    a = custom_activation(z,s)
    #a=tf.nn.relu(z)
    return a

# max-pooling layer: 
def maxpooling2d(x, p_size):
    return tf.nn.max_pool(x, 
                          ksize=[1, p_size, p_size, 1], 
                          strides=[1, p_size, p_size, 1], 
                          padding='SAME')

#### Design neural network architecture

In [0]:
def network(x, weights, biases, n_in, mp_psize, dropout_1, dropout_2, dropout_3, do_dropout):
    square_x = x
    
    conv_1 = conv2d(square_x, weights['W_c1'], biases['b_c1'],'1')
    conv_1 = tf.nn.dropout(conv_1, 1-tf.multiply(dropout_1,do_dropout))
    
    conv_2 = conv2d(conv_1, weights['W_c2'], biases['b_c2'],'2')
    conv_2 = tf.nn.max_pool(conv_2,ksize=[1,2,2,1],strides=[1,2,2,1],padding='VALID')
    #conv_1 = tf.nn.dropout(conv_1, 1-tf.multiply(dropout_1,do_dropout))
    
    conv_3 = conv2d(conv_2, weights['W_c3'], biases['b_c3'],'3')
    conv_3 = tf.nn.dropout(conv_3, 1-tf.multiply(dropout_2,do_dropout))
    
    conv_4 = conv2d(conv_3, weights['W_c4'], biases['b_c4'],'4')
    conv_4 = tf.nn.max_pool(conv_4,ksize=[1,2,2,1],strides=[1,2,2,1],padding='VALID')
    #conv_1 = tf.nn.dropout(conv_1, 1-tf.multiply(dropout_1,do_dropout))

    conv_5 = conv2d(conv_4, weights['W_c5'], biases['b_c5'],'5')
    conv_5 = tf.nn.dropout(conv_5, 1-tf.multiply(dropout_2,do_dropout))

    conv_6 = conv2d(conv_5, weights['W_c6'], biases['b_c6'],'6')
    conv_6 = tf.nn.dropout(conv_6, 1-tf.multiply(dropout_2,do_dropout))

    conv_7 = conv2d(conv_6, weights['W_c7'], biases['b_c7'],'7')
    conv_7 = tf.nn.max_pool(conv_7,ksize=[1,2,2,1],strides=[1,2,2,1],padding='VALID')
    #conv_1 = tf.nn.dropout(conv_1, 1-tf.multiply(dropout_1,do_dropout))

    conv_8 = conv2d(conv_7, weights['W_c8'], biases['b_c8'],'8')
    conv_8 = tf.nn.dropout(conv_8, 1-tf.multiply(dropout_2,do_dropout))

    conv_9 = conv2d(conv_8, weights['W_c9'], biases['b_c9'],'9')
    conv_9 = tf.nn.dropout(conv_9, 1-tf.multiply(dropout_2,do_dropout))
    
    conv_10 = conv2d(conv_9, weights['W_c10'], biases['b_c10'],'10')
    conv_10 = tf.nn.max_pool(conv_10,ksize=[1,2,2,1],strides=[1,2,2,1],padding='VALID')

    ##conv_1 = tf.nn.dropout(conv_1, 1-tf.multiply(dropout_1,do_dropout))

    conv_11 = conv2d(conv_10, weights['W_c11'], biases['b_c11'],'11')
    conv_11 = tf.nn.dropout(conv_11, 1-tf.multiply(dropout_2,do_dropout))

    conv_12 = conv2d(conv_11, weights['W_c12'], biases['b_c12'],'12')
    conv_12 = tf.nn.dropout(conv_12, 1-tf.multiply(dropout_2,do_dropout))

    conv_13 = conv2d(conv_12 , weights['W_c13'], biases['b_c13'],'13')
    conv_13 = tf.nn.max_pool(conv_13,ksize=[1,2,2,1],strides=[1,2,2,1],padding='VALID')
    conv_13 = tf.nn.dropout(conv_13, 1-tf.multiply(dropout_3,do_dropout))
    
    flat = tf.reshape(conv_13, [-1, weights['W_d1'].get_shape().as_list()[0]])
    dense_1 = dense(flat, weights['W_d1'], biases['b_d1'],'14')
    dense_1 = tf.nn.dropout(dense_1, 1-tf.multiply(dropout_3,do_dropout))
    
    # output layer: 
    out_layer_z = tf.add(tf.matmul(dense_1, weights['W_out']), biases['b_out'])
    
    return out_layer_z

#### Define dictionaries for storing weights and biases for each layer -- and initialize

In [0]:
# calculate number of inputs to dense layer: 
full_square_length = np.sqrt(n_input)
pooled_square_length = int(full_square_length / pool_size)
dense_inputs = (pooled_square_length**2 * n_conv_2)/4


if(restart_training==1):
  bias_dict = {
      'b_c1': tf.Variable(tf.zeros([n_conv_1])),
      'b_c2': tf.Variable(tf.zeros([n_conv_2])),
      'b_c3': tf.Variable(tf.zeros([n_conv_3])),
      'b_c4': tf.Variable(tf.zeros([n_conv_4])),
      'b_c5': tf.Variable(tf.zeros([n_conv_5])),
      'b_c6': tf.Variable(tf.zeros([n_conv_6])),
      'b_c7': tf.Variable(tf.zeros([n_conv_7])),
      'b_c8': tf.Variable(tf.zeros([n_conv_8])),
      'b_c9': tf.Variable(tf.zeros([n_conv_9])),
      'b_c10': tf.Variable(tf.zeros([n_conv_10])),
      'b_c11': tf.Variable(tf.zeros([n_conv_11])),
      'b_c12': tf.Variable(tf.zeros([n_conv_12])),
      'b_c13': tf.Variable(tf.zeros([n_conv_13])),
      'b_d1': tf.Variable(tf.zeros([n_dense])),
      'b_out': tf.Variable(tf.zeros([n_classes]))
  }

  weight_dict = {
      'W_c1': tf.get_variable('W_c1', [k_conv_1, k_conv_1, 3,n_conv_1], initializer=wt_init, regularizer=tf.contrib.layers.l2_regularizer(scale=0.0005)),
      'W_c2': tf.get_variable('W_c2', [k_conv_2, k_conv_2, n_conv_1,n_conv_2], initializer=wt_init, regularizer=tf.contrib.layers.l2_regularizer(scale=0.0005)),
      'W_c3': tf.get_variable('W_c3', [k_conv_3, k_conv_3, n_conv_2,n_conv_3], initializer=wt_init, regularizer=tf.contrib.layers.l2_regularizer(scale=0.0005)),
      'W_c4': tf.get_variable('W_c4', [k_conv_4, k_conv_4, n_conv_3,n_conv_4], initializer=wt_init, regularizer=tf.contrib.layers.l2_regularizer(scale=0.0005)),
      'W_c5': tf.get_variable('W_c5', [k_conv_5, k_conv_5, n_conv_4,n_conv_5], initializer=wt_init, regularizer=tf.contrib.layers.l2_regularizer(scale=0.0005)),
      'W_c6': tf.get_variable('W_c6', [k_conv_6, k_conv_6, n_conv_5,n_conv_6], initializer=wt_init, regularizer=tf.contrib.layers.l2_regularizer(scale=0.0005)),
      'W_c7': tf.get_variable('W_c7', [k_conv_7, k_conv_7, n_conv_6,n_conv_7], initializer=wt_init, regularizer=tf.contrib.layers.l2_regularizer(scale=0.0005)),
      'W_c8': tf.get_variable('W_c8', [k_conv_8, k_conv_8, n_conv_7,n_conv_8], initializer=wt_init, regularizer=tf.contrib.layers.l2_regularizer(scale=0.0005)),
      'W_c9': tf.get_variable('W_c9', [k_conv_9, k_conv_9, n_conv_8,n_conv_9], initializer=wt_init, regularizer=tf.contrib.layers.l2_regularizer(scale=0.0005)),
      'W_c10': tf.get_variable('W_c10', [k_conv_10, k_conv_10, n_conv_9,n_conv_10], initializer=wt_init, regularizer=tf.contrib.layers.l2_regularizer(scale=0.0005)),
      'W_c11': tf.get_variable('W_c11', [k_conv_11, k_conv_11, n_conv_10,n_conv_11], initializer=wt_init, regularizer=tf.contrib.layers.l2_regularizer(scale=0.0005)),
      'W_c12': tf.get_variable('W_c12', [k_conv_12, k_conv_12, n_conv_11,n_conv_12], initializer=wt_init, regularizer=tf.contrib.layers.l2_regularizer(scale=0.0005)),
      'W_c13': tf.get_variable('W_c13', [k_conv_13, k_conv_13, n_conv_12,n_conv_13], initializer=wt_init, regularizer=tf.contrib.layers.l2_regularizer(scale=0.0005)),
      'W_d1': tf.get_variable('W_d1', [512, n_dense], initializer=wt_init, regularizer=tf.contrib.layers.l2_regularizer(scale=0.0005)),      
      'W_out': tf.get_variable('W_out', [n_dense, n_classes], initializer=wt_init)
      
  }
  
else:
  
  W_c1 = np.load('W_c1.npy')
  W_c2 = np.load('W_c2.npy')
  W_c3 = np.load('W_c3.npy')
  W_c4 = np.load('W_c4.npy')
  W_c5 = np.load('W_c5.npy')
  W_c6 = np.load('W_c6.npy')
  W_c7 = np.load('W_c7.npy')
  W_c8 = np.load('W_c8.npy')
  W_c9 = np.load('W_c9.npy')
  W_c10 = np.load('W_c10.npy')
  W_c11 = np.load('W_c11.npy')
  W_c12 = np.load('W_c12.npy')
  W_c13 = np.load('W_c13.npy')
  W_d1 = np.load('W_d1.npy')
  W_out = np.load('W_out.npy')
  
  b_c1 = np.load('b_c1.npy')
  b_c2 = np.load('b_c2.npy')
  b_c3 = np.load('b_c3.npy')
  b_c4 = np.load('b_c4.npy')
  b_c5 = np.load('b_c5.npy')
  b_c6 = np.load('b_c6.npy')
  b_c7 = np.load('b_c7.npy')
  b_c8 = np.load('b_c8.npy')
  b_c9 = np.load('b_c9.npy')
  b_c10 = np.load('b_c10.npy')
  b_c11 = np.load('b_c11.npy')
  b_c12 = np.load('b_c12.npy')
  b_c13 = np.load('b_c13.npy')
  b_d1 = np.load('b_d1.npy')
  b_out = np.load('b_out.npy')

  bias_dict = {
      'b_c1': tf.Variable(b_c1),
      'b_c2': tf.Variable(b_c2),
      'b_c3': tf.Variable(b_c3),
      'b_c4': tf.Variable(b_c4),
      'b_c5': tf.Variable(b_c5),
      'b_c6': tf.Variable(b_c6),
      'b_c7': tf.Variable(b_c7),
      'b_c8': tf.Variable(b_c8),
      'b_c9': tf.Variable(b_c9),
      'b_c10': tf.Variable(b_c10),
      'b_c11': tf.Variable(b_c11),
      'b_c12': tf.Variable(b_c12),
      'b_c13': tf.Variable(b_c13),
      'b_d1': tf.Variable(b_d1),
      'b_out': tf.Variable(b_out)
  }



  weight_dict = {
      'W_c1': tf.get_variable("W_c1", initializer=tf.constant_initializer(W_c1), regularizer=tf.contrib.layers.l2_regularizer(scale=0.0005)),
      'W_c2': tf.get_variable("W_c2", initializer=tf.constant_initializer(W_c2), regularizer=tf.contrib.layers.l2_regularizer(scale=0.0005)),
      'W_c3': tf.get_variable("W_c3", initializer=tf.constant_initializer(W_c3), regularizer=tf.contrib.layers.l2_regularizer(scale=0.0005)),
      'W_c4': tf.get_variable("W_c4", initializer=tf.constant_initializer(W_c4), regularizer=tf.contrib.layers.l2_regularizer(scale=0.0005)),
      'W_c5': tf.get_variable("W_c5", initializer=tf.constant_initializer(W_c5), regularizer=tf.contrib.layers.l2_regularizer(scale=0.0005)),
      'W_c6': tf.get_variable("W_c6", initializer=tf.constant_initializer(W_c6), regularizer=tf.contrib.layers.l2_regularizer(scale=0.0005)),
      'W_c7': tf.get_variable("W_c7", initializer=tf.constant_initializer(W_c7), regularizer=tf.contrib.layers.l2_regularizer(scale=0.0005)),
      'W_c8': tf.get_variable("W_c8", initializer=tf.constant_initializer(W_c8), regularizer=tf.contrib.layers.l2_regularizer(scale=0.0005)),
      'W_c9': tf.get_variable("W_c9", initializer=tf.constant_initializer(W_c9), regularizer=tf.contrib.layers.l2_regularizer(scale=0.0005)),
      'W_c10': tf.get_variable("W_c10", initializer=tf.constant_initializer(W_c10), regularizer=tf.contrib.layers.l2_regularizer(scale=0.0005)),
      'W_c11': tf.get_variable("W_c11", initializer=tf.constant_initializer(W_c11), regularizer=tf.contrib.layers.l2_regularizer(scale=0.0005)),
      'W_c12': tf.get_variable("W_c12", initializer=tf.constant_initializer(W_c12), regularizer=tf.contrib.layers.l2_regularizer(scale=0.0005)),
      'W_c13': tf.get_variable("W_c13", initializer=tf.constant_initializer(W_c13), regularizer=tf.contrib.layers.l2_regularizer(scale=0.0005)),
      'W_d1': tf.get_variable("W_d1", initializer=tf.constant_initializer(W_d1), regularizer=tf.contrib.layers.l2_regularizer(scale=0.0005)),
      'W_out': tf.Variable(W_out)
      
  }


#### Learning Activation Function

In [0]:
from tensorflow.python.framework import ops
initializer_op = tf.global_variables_initializer()
session = tf.Session()
session.run(initializer_op)


def pls(xs,ys,m,c,ota,otb,a2):
  for i in range(xs.shape[0]-1):
    m[i] -= ota[i]
    c[i] -= otb[i]
  xs2 = xs
  ys2 = ys
  ys2[0] = m[0]*xs2[0] + c[0]
  
  if(np.isnan(ys2[0])):
    print()
    print("GRAD = ",np.sum(np.isnan(grad))," X = ",np.sum(np.isnan(x)))
    print("shape = ",x.shape)
    print()
  for i in range(1,xs.shape[0]-1):
    ys2[i] = ( (m[i-1]*xs2[i] + c[i-1]) + (m[i]*xs2[i] + c[i]) )/2.0
  ys2[xs.shape[0]-1] = m[xs.shape[0]-2]*xs2[xs.shape[0]-1] + c[xs.shape[0]-2]
  nys_dict[str(a2)] += ys2  
  return np.array([0.0],dtype=np.float32)



def np_custom_activation_grad(x,s,i2,grad):
  i = tf.cast(i2,tf.int32)
  a2 = tf.cast(tf.reduce_max(s),dtype=tf.int32)
  tf_xs = tf.cast(tf.py_func(lambda val: np.float32(xs_dict[str(val)]), [a2], tf.float32),dtype=tf.float32)
  tf_ys = tf.cast(tf.py_func(lambda val: np.float32(ys_dict[str(val)]), [a2], tf.float32),dtype=tf.float32)
  
  tf_m2 = tf.cast(tf.py_func(lambda val: np.float32(m_dict[str(val)]), [a2], tf.float32),dtype=tf.float32)
  tf_c2 = tf.cast(tf.py_func(lambda val: np.float32(c_dict[str(val)]), [a2], tf.float32),dtype=tf.float32)
  slope = tf.gather(tf_m2,i)
  y = tf.multiply(grad,slope)
  
  az = tf.zeros([tf.shape(tf_xs)[0]-1],dtype=tf.float32)
  bz = tf.zeros([tf.shape(tf_xs)[0]-1],dtype=tf.float32)
  az0 = tf.zeros([1],dtype=tf.float32)
  bz0 = tf.zeros([1],dtype=tf.float32)
  
  tf_az = tf.Variable(az0,dtype=tf.float32)
  tf_bz = tf.Variable(bz0,dtype=tf.float32)
  tf_az = tf.assign(tf_az, az, validate_shape=False)
  tf_bz = tf.assign(tf_bz, bz, validate_shape=False)
  
  curve_lr = 0.001
  tf_az = tf.scatter_add(tf_az,i,tf.multiply(tf.multiply(grad,x),curve_lr))
  tf_bz = tf.scatter_add(tf_bz,i,tf.multiply(grad,curve_lr))
  ad = tf.py_func(pls,[tf_xs,tf_ys,tf_m2,tf_c2,tf_az,tf_bz,a2],[tf.float32])
  y = tf.add(y,ad)
  
  return y,y,y
  

    
    
@tf.RegisterGradient("np_custom_activation")
def custom_activation_grad(op,grad):  
  return np_custom_activation_grad(op.inputs[0],op.inputs[1],op.inputs[2],grad)



def custom_activation(tf_x,s):
  s2 = tf.string_to_number(s)
  zeros = tf.subtract(tf_x,tf_x)
  s3 = tf.add(zeros,tf.cast(s2,tf.float32))
  g = tf.get_default_graph()
  a2 = tf.cast(tf.reduce_max(s2),dtype=tf.int32)
  
  tf_xs = tf.cast(tf.py_func(lambda val: np.float32(xs_dict[str(val)]), [a2], tf.float32),dtype=tf.float32)
  tf_ys = tf.cast(tf.py_func(lambda val: np.float32(ys_dict[str(val)]), [a2], tf.float32),dtype=tf.float32)
  x_sh = tf_x.get_shape().as_list()
  tf_x_len = len(tf_x.get_shape().as_list())
  sh = tf.ones(tf_x_len)

  tf_xc = tf.expand_dims(tf_x,tf_x_len)

  sh = tf.concat([sh,tf.cast(tf.shape(tf_xs),dtype=tf.float32)],axis=0)
  sh = tf.cast(sh,tf.int32)

  tf_x2 = tf.tile(tf_xc, sh)

  xdf = tf.abs(tf.subtract(tf_x2, tf_xs))
  i = tf.cast(tf.argmin(xdf,axis=tf_x_len),tf.int32)
  tf_l = tf.cast(tf.greater(tf.gather(tf_xs,i), tf_x),dtype=tf.int32)
  i = tf.subtract(i,tf_l)
  i2 = tf.cast(i,tf.float32)
  with g.gradient_override_map({"AddN": "np_custom_activation"}):
    tf_x21 = tf.add_n([tf_x,s3,i2])
  tf_x22 = tf.subtract(tf_x21,s3)
  tf_x22 = tf.subtract(tf_x22,i2)
  slope = tf.gather(tf.cast(tf.py_func(lambda val: np.float32(m_dict[str(val)]), [a2], tf.float32),dtype=tf.float32),i)
  c = tf.gather(tf.cast(tf.py_func(lambda val: np.float32(c_dict[str(val)]), [a2], tf.float32),dtype=tf.float32),i)
  y = tf.add(tf.multiply(tf_x22,slope),c)
  return y

#### Build model

In [0]:
predictions = network(x, weight_dict, bias_dict, n_input, 
                      pool_size, dropout_1, dropout_2, dropout_3, do_dropout)

#### Define model's loss and its optimizer

In [0]:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=predictions, labels=y))
batch = tf.Variable(0)

learning_rate = tf.train.exponential_decay(
  0.01,                # Base learning rate.
  batch * batch_size,  # Current index into the dataset.
  500,          # Decay step.
  0.999999,                # Decay rate.
  staircase=True)
# Use simple momentum for the optimization.
optimizer = tf.train.MomentumOptimizer(learning_rate,
                                     0.9).minimize(cost,
                                                   global_step=batch)

#### Define evaluation metrics

In [0]:
correct_prediction = tf.equal(tf.argmax(predictions, 1), tf.argmax(y, 1))
accuracy_pct = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) * 100

#### Create op for variable initialization

In [0]:
initializer_op = tf.global_variables_initializer()
session.run(initializer_op)

#### Initialize Variables

In [0]:
if(restart_training==1):
  wd,bd = session.run([weight_dict,bias_dict])


  epoch_arr = np.zeros(1)
  cost_arr = np.zeros(1)
  accuracy_arr = np.zeros(1)
  train_cost_arr = np.zeros(1)
  train_accuracy_arr = np.zeros(1)

  
  
  
  np.save("W_c1.npy",wd['W_c1'])
  np.save("W_c2.npy",wd['W_c2'])
  np.save("W_c3.npy",wd['W_c3'])
  np.save("W_c4.npy",wd['W_c4'])
  np.save("W_c5.npy",wd['W_c5'])
  np.save("W_c6.npy",wd['W_c6'])
  np.save("W_c7.npy",wd['W_c7'])
  np.save("W_c8.npy",wd['W_c8'])
  np.save("W_c9.npy",wd['W_c9'])
  np.save("W_c10.npy",wd['W_c10'])
  np.save("W_c11.npy",wd['W_c11'])
  np.save("W_c12.npy",wd['W_c12'])
  np.save("W_c13.npy",wd['W_c13'])
  np.save("W_d1.npy",wd['W_d1'])
  np.save("W_out.npy",wd['W_out'])
  np.save("b_c1.npy",bd['b_c1'])
  np.save("b_c2.npy",bd['b_c2'])
  np.save("b_c3.npy",bd['b_c3'])
  np.save("b_c4.npy",bd['b_c4'])
  np.save("b_c5.npy",bd['b_c5'])
  np.save("b_c6.npy",bd['b_c6'])
  np.save("b_c7.npy",bd['b_c7'])
  np.save("b_c8.npy",bd['b_c8'])
  np.save("b_c9.npy",bd['b_c9'])
  np.save("b_c10.npy",bd['b_c10'])
  np.save("b_c11.npy",bd['b_c11'])
  np.save("b_c12.npy",bd['b_c12'])
  np.save("b_c13.npy",bd['b_c13'])
  np.save("b_d1.npy",bd['b_d1'])
  np.save("b_out.npy",bd['b_out'])
  np.save("xs_arr1.npy",xs_arr1)
  np.save("ys_arr1.npy",ys_arr1)
  np.save("xs_arr2.npy",xs_arr2)
  np.save("ys_arr2.npy",ys_arr2)
  np.save("xs_arr3.npy",xs_arr3)
  np.save("ys_arr3.npy",ys_arr3)
  np.save("xs_arr4.npy",xs_arr4)
  np.save("ys_arr4.npy",ys_arr4)
  np.save("xs_arr5.npy",xs_arr5)
  np.save("ys_arr5.npy",ys_arr5)
  np.save("xs_arr6.npy",xs_arr6)
  np.save("ys_arr6.npy",ys_arr6)
  np.save("xs_arr7.npy",xs_arr7)
  np.save("ys_arr7.npy",ys_arr7)
  np.save("xs_arr8.npy",xs_arr8)
  np.save("ys_arr8.npy",ys_arr8)
  np.save("xs_arr9.npy",xs_arr9)
  np.save("ys_arr9.npy",ys_arr9)
  np.save("xs_arr10.npy",xs_arr10)
  np.save("ys_arr10.npy",ys_arr10)
  np.save("xs_arr11.npy",xs_arr11)
  np.save("ys_arr11.npy",ys_arr11)
  np.save("xs_arr12.npy",xs_arr12)
  np.save("ys_arr12.npy",ys_arr12)
  np.save("xs_arr13.npy",xs_arr13)
  np.save("ys_arr13.npy",ys_arr13)
  np.save("xs_arr14.npy",xs_arr14)
  np.save("ys_arr14.npy",ys_arr14)
  np.save("epochs.npy",epoch_arr)
  np.save("cost.npy",cost_arr)
  np.save("accuracy.npy",accuracy_arr)
  np.save("train_cost.npy",train_cost_arr)
  np.save("train_accuracy.npy",train_accuracy_arr)

#### Next Batch (Helper Function)

In [0]:
#Return a total of `num` random samples and labels.
def next_batch(num, data, labels):
  idx = np.arange(0 , len(data))
  np.random.shuffle(idx)
  idx = idx[:num]
  data_shuffle = [data[ i] for i in idx]
  labels_shuffle = [labels[ i] for i in idx]
  return np.asarray(data_shuffle), np.asarray(labels_shuffle)


#### Augment Images (Helper Functions)

In [0]:
from math import ceil, floor, pi
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt


def rotate_images(X_imgs, max_angle):
    n_images = len(X_imgs)
    angles = np.random.randint(0,2*max_angle,n_images)
    angles = angles - max_angle
    angles = angles*np.pi/180
    
    tf.reset_default_graph()
    X = tf.placeholder(tf.float32, shape=(len(X_imgs), 32, 32, 3))
    radian = tf.placeholder(tf.float32, shape=(len(X_imgs)))
    tf_img = tf.contrib.image.rotate(X, radian)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        rotated_imgs = sess.run(tf_img, feed_dict={X: X_imgs, radian: angles})

    X_rotate = rotated_imgs
    return X_rotate

  
def get_translate_parameters(X_imgs, index, amount):
    IMAGE_SIZE = 32
    if index == 0: # Translate left 20 percent
        offset = np.array([0.0, amount], dtype = np.float32)
        size = np.array([IMAGE_SIZE, ceil((1-amount) * IMAGE_SIZE)], dtype = np.int32)
        w_start = 0
        w_end = int(ceil((1-amount) * IMAGE_SIZE))
        h_start = 0
        h_end = IMAGE_SIZE
    elif index == 1: # Translate right 20 percent
        offset = np.array([0.0, amount], dtype = np.float32)
        size = np.array([IMAGE_SIZE, ceil((1+amount) * IMAGE_SIZE)], dtype = np.int32)
        w_start = int(floor((1 - (1+amount)) * IMAGE_SIZE))
        w_end = IMAGE_SIZE
        h_start = 0
        h_end = IMAGE_SIZE
    elif index == 2: # Translate top 20 percent
        offset = np.array([amount, 0.0], dtype = np.float32)
        size = np.array([ceil((1-amount) * IMAGE_SIZE), IMAGE_SIZE], dtype = np.int32)
        w_start = 0
        w_end = IMAGE_SIZE
        h_start = 0
        h_end = int(ceil((1-amount) * IMAGE_SIZE)) 
    else: # Translate bottom 20 percent
        offset = np.array([amount, 0.0], dtype = np.float32)
        size = np.array([ceil((1+amount) * IMAGE_SIZE), IMAGE_SIZE], dtype = np.int32)
        w_start = 0
        w_end = IMAGE_SIZE
        h_start = int(floor((1 - (1+amount)) * IMAGE_SIZE))
        h_end = IMAGE_SIZE 
        
        
    offsets = np.zeros((len(X_imgs), 2), dtype = np.float32)
    X_translated_arr = []
    
    tf.reset_default_graph()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        X_translated = np.zeros((len(X_imgs), 32, 32, 3), dtype = np.float32)
        X_translated.fill(1.0) # Filling background color
        offsets[:, :] = offset 
        glimpses = tf.image.extract_glimpse(X_imgs, size, offsets)
        glimpses = sess.run(glimpses)
        X_translated[:, h_start: h_start + size[0], w_start: w_start + size[1], :] = glimpses
        X_translated_arr.extend(X_translated)
    X_translated_arr = np.array(X_translated_arr, dtype = np.float32)
    return X_translated_arr

    
def translate_images(X_imgs,max_translate):
    n_images = len(X_imgs)
    X_imgs = np.array(X_imgs,np.float32)
    n_translations = 4
    x_measures = np.random.randint(0,2*max_translate,1)
    x_measures = x_measures - max_translate
    x_measures = x_measures + 0.00001
    x_measures_abs = np.abs(x_measures)
    i = x_measures/x_measures_abs
    i = max(i,0)
    i = 1-i
    X_translated_imgs = get_translate_parameters(X_imgs, i, x_measures/100)
    
    y_measures = np.random.randint(0,2*max_translate,1)
    y_measures = y_measures - max_translate
    y_measures = y_measures + 0.00001
    y_measures_abs = np.abs(y_measures)
    i = y_measures/y_measures_abs
    i = 2 + max(i,0)
    i = 5-i
    Y_translated_imgs = get_translate_parameters(X_translated_imgs, i, y_measures/100)
    
    return Y_translated_imgs

  
def flip_images(X_imgs):
    X_imgs = np.array(X_imgs,np.float32)
    
    tf.reset_default_graph()
    X = tf.placeholder(tf.float32, shape=(len(X_imgs), 32, 32, 3))
    random_flip = tf.image.random_flip_left_right(X)
    
    with tf.Session() as sess:
      sess.run(tf.global_variables_initializer())
      random_flipped_imgs = sess.run(random_flip, feed_dict={X: X_imgs})
    
    return random_flipped_imgs

  
def augment(images):
  input_x = images
  rotated_imgs = rotate_images(input_x, 15)
  translated_imgs = np.array(rotated_imgs)
  for i in range(len(rotated_imgs)//200):
    translated_imgs[200*i:200*i+200] = translate_images(rotated_imgs[200*i:200*i+200],10)
  
  
  flipped_imgs = flip_images(translated_imgs)
  
  augmented_imgs = flipped_imgs
  
  return augmented_imgs

#### Train the network

In [0]:

import datetime

min_cost = 100000.0
max_accuracy = 0.0

# loop over epochs: 
epoch = 0 
last_best_epoch = -1

vary_it = 0

while(1):
  
    
    xs_dict['1'] = np.load('xs_arr1.npy')
    ys_dict['1'] = np.load('ys_arr1.npy')
    xs_dict['2'] = np.load('xs_arr2.npy')
    ys_dict['2'] = np.load('ys_arr2.npy')
    xs_dict['3'] = np.load('xs_arr3.npy')
    ys_dict['3'] = np.load('ys_arr3.npy')
    xs_dict['4'] = np.load('xs_arr4.npy')
    ys_dict['4'] = np.load('ys_arr4.npy')
    xs_dict['5'] = np.load('xs_arr5.npy')
    ys_dict['5'] = np.load('ys_arr5.npy')
    xs_dict['6'] = np.load('xs_arr6.npy')
    ys_dict['6'] = np.load('ys_arr6.npy')
    xs_dict['7'] = np.load('xs_arr7.npy')
    ys_dict['7'] = np.load('ys_arr7.npy')
    xs_dict['8'] = np.load('xs_arr8.npy')
    ys_dict['8'] = np.load('ys_arr8.npy')
    xs_dict['9'] = np.load('xs_arr9.npy')
    ys_dict['9'] = np.load('ys_arr9.npy')
    xs_dict['10'] = np.load('xs_arr10.npy')
    ys_dict['10'] = np.load('ys_arr10.npy')
    xs_dict['11'] = np.load('xs_arr11.npy')
    ys_dict['11'] = np.load('ys_arr11.npy')
    xs_dict['12'] = np.load('xs_arr12.npy')
    ys_dict['12'] = np.load('ys_arr12.npy')
    xs_dict['13'] = np.load('xs_arr13.npy')
    ys_dict['13'] = np.load('ys_arr13.npy')
    xs_dict['14'] = np.load('xs_arr14.npy')
    ys_dict['14'] = np.load('ys_arr14.npy')
    epoch_arr = np.load('epochs.npy')
    cost_arr = np.load('cost.npy')
    accuracy_arr = np.load('accuracy.npy')
    train_cost_arr = np.load('train_cost.npy')
    train_accuracy_arr = np.load('train_accuracy.npy')
    
    
    
    nxs_dict['1'] = np.array(xs_dict['1'])
    nxs_dict['2'] = np.array(xs_dict['2'])
    nxs_dict['3'] = np.array(xs_dict['3'])
    nxs_dict['4'] = np.array(xs_dict['4'])
    nxs_dict['5'] = np.array(xs_dict['5'])
    nxs_dict['6'] = np.array(xs_dict['6'])
    nxs_dict['7'] = np.array(xs_dict['7'])
    nxs_dict['8'] = np.array(xs_dict['8'])
    nxs_dict['9'] = np.array(xs_dict['9'])
    nxs_dict['10'] = np.array(xs_dict['10'])
    nxs_dict['11'] = np.array(xs_dict['11'])
    nxs_dict['12'] = np.array(xs_dict['12'])
    nxs_dict['13'] = np.array(xs_dict['13'])
    nxs_dict['14'] = np.array(xs_dict['14'])
    
    nys_dict['1'] = np.array(ys_dict['1'])
    nys_dict['2'] = np.array(ys_dict['2'])
    nys_dict['3'] = np.array(ys_dict['3'])
    nys_dict['4'] = np.array(ys_dict['4'])
    nys_dict['5'] = np.array(ys_dict['5'])
    nys_dict['6'] = np.array(ys_dict['6'])
    nys_dict['7'] = np.array(ys_dict['7'])
    nys_dict['8'] = np.array(ys_dict['8'])
    nys_dict['9'] = np.array(ys_dict['9'])
    nys_dict['10'] = np.array(ys_dict['10'])
    nys_dict['11'] = np.array(ys_dict['11'])
    nys_dict['12'] = np.array(ys_dict['12'])
    nys_dict['13'] = np.array(ys_dict['13'])
    nys_dict['14'] = np.array(ys_dict['14'])
    
    print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
  
    avg_cost = 0.0 # track cost to monitor performance during training
    avg_accuracy_pct = 0.0

    # loop over all batches of the epoch:
    n_batches = int(train_x.shape[0] / batch_size)

    
    train_x_og = np.array(train_x)
    train_x = augment(train_x)
      
    for i in range(n_batches):
        
        nys_dict['1'] = np.zeros(ys_dict['1'].shape[0],dtype=np.float32)
        nys_dict['2'] = np.zeros(ys_dict['2'].shape[0],dtype=np.float32)
        nys_dict['3'] = np.zeros(ys_dict['3'].shape[0],dtype=np.float32)
        nys_dict['4'] = np.zeros(ys_dict['4'].shape[0],dtype=np.float32)
        nys_dict['5'] = np.zeros(ys_dict['5'].shape[0],dtype=np.float32)
        nys_dict['6'] = np.zeros(ys_dict['6'].shape[0],dtype=np.float32)
        nys_dict['7'] = np.zeros(ys_dict['7'].shape[0],dtype=np.float32)
        nys_dict['8'] = np.zeros(ys_dict['8'].shape[0],dtype=np.float32)
        nys_dict['9'] = np.zeros(ys_dict['9'].shape[0],dtype=np.float32)
        nys_dict['10'] = np.zeros(ys_dict['10'].shape[0],dtype=np.float32)
        nys_dict['11'] = np.zeros(ys_dict['11'].shape[0],dtype=np.float32)
        nys_dict['12'] = np.zeros(ys_dict['12'].shape[0],dtype=np.float32)
        nys_dict['13'] = np.zeros(ys_dict['13'].shape[0],dtype=np.float32)
        nys_dict['14'] = np.zeros(ys_dict['14'].shape[0],dtype=np.float32)
        
        for ij in range(1,15):
          m_dict[str(ij)] = np.zeros(xs_dict[str(ij)].shape[0] - 1, dtype=np.float32)
          c_dict[str(ij)] = np.zeros(xs_dict[str(ij)].shape[0] - 1, dtype=np.float32)

          for i in range(xs_dict[str(ij)].shape[0] - 1):
              m_dict[str(ij)][i] = (ys_dict[str(ij)][i + 1] - ys_dict[str(ij)][i]) / (xs_dict[str(ij)][i + 1] - xs_dict[str(ij)][i])
              c_dict[str(ij)][i] = ys_dict[str(ij)][i] - m_dict[str(ij)][i] * xs_dict[str(ij)][i]

        
        
        batch_x, batch_y = next_batch(batch_size,train_x,train_y)
        _, batch_cost, batch_acc, wd, bd = session.run([optimizer, cost, accuracy_pct,weight_dict,bias_dict], 
                                               feed_dict={x: batch_x, y: batch_y,do_dropout: np.array([1.0],dtype=np.float32)})
        avg_cost += batch_cost / n_batches
        avg_accuracy_pct += batch_acc / n_batches
        ys_dict['1'] = nys_dict['1']
        ys_dict['2'] = nys_dict['2']
        ys_dict['3'] = nys_dict['3']
        ys_dict['4'] = nys_dict['4']
        ys_dict['5'] = nys_dict['5']
        ys_dict['6'] = nys_dict['6']
        ys_dict['7'] = nys_dict['7']
        ys_dict['8'] = nys_dict['8']
        ys_dict['9'] = nys_dict['9']
        ys_dict['10'] = nys_dict['10']
        ys_dict['11'] = nys_dict['11']
        ys_dict['12'] = nys_dict['12']
        ys_dict['13'] = nys_dict['13']
        ys_dict['14'] = nys_dict['14']
     
    train_x = np.array(train_x_og)
    
    # output logs at end of each epoch of training:
    print("Epoch ", '%03d' % (epoch+1), 
          ": cost = ", '{:.3f}'.format(avg_cost), 
          ", accuracy = ", '{:.2f}'.format(avg_accuracy_pct), "%", 
          sep='')
    print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
    
    for ij in range(1,15):
          m_dict[str(ij)] = np.zeros(xs_dict[str(ij)].shape[0] - 1, dtype=np.float32)
          c_dict[str(ij)] = np.zeros(xs_dict[str(ij)].shape[0] - 1, dtype=np.float32)

          for i in range(xs_dict[str(ij)].shape[0] - 1):
              m_dict[str(ij)][i] = (ys_dict[str(ij)][i + 1] - ys_dict[str(ij)][i]) / (xs_dict[str(ij)][i + 1] - xs_dict[str(ij)][i])
              c_dict[str(ij)][i] = ys_dict[str(ij)][i] - m_dict[str(ij)][i] * xs_dict[str(ij)][i]

    
    t_batches = int(val_x.shape[0] / 100)

    test_cost = 0
    test_accuracy_pct = 0

    for i in range(t_batches):

      img,lbl = next_batch(100,val_x,val_y)
      interim_test_cost = session.run(cost,feed_dict={x: img, y: lbl,do_dropout: np.array([0.0],dtype=np.float32)})
      interim_test_accuracy_pct = session.run(accuracy_pct,feed_dict={x: img, y: lbl,do_dropout: np.array([0.0],dtype=np.float32)})

      test_cost+=(interim_test_cost/(t_batches))
      test_accuracy_pct+=(interim_test_accuracy_pct/(t_batches))


    epoch_arr = np.append(epoch_arr,epoch)
    cost_arr = np.append(cost_arr,test_cost)    
    accuracy_arr = np.append(accuracy_arr,test_accuracy_pct)
    
    train_cost_arr = np.append(train_cost_arr,avg_cost)
    train_accuracy_arr = np.append(train_accuracy_arr,avg_accuracy_pct)
    

    print("Validation Cost:", '{:.3f}'.format(test_cost),"  Validation Accuracy: ", '{:.2f}'.format(test_accuracy_pct), "%", sep='')
    print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"W_c1.npy",wd['W_c1'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"W_c2.npy",wd['W_c2'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"W_c3.npy",wd['W_c3'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"W_c4.npy",wd['W_c4'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"W_c5.npy",wd['W_c5'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"W_c6.npy",wd['W_c6'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"W_c7.npy",wd['W_c7'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"W_c8.npy",wd['W_c8'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"W_c9.npy",wd['W_c9'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"W_c10.npy",wd['W_c10'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"W_c11.npy",wd['W_c11'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"W_c12.npy",wd['W_c12'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"W_c13.npy",wd['W_c13'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"W_d1.npy",wd['W_d1'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"W_out.npy",wd['W_out'])

    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"b_c1.npy",bd['b_c1'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"b_c2.npy",bd['b_c2'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"b_c3.npy",bd['b_c3'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"b_c4.npy",bd['b_c4'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"b_c5.npy",bd['b_c5'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"b_c6.npy",bd['b_c6'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"b_c7.npy",bd['b_c7'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"b_c8.npy",bd['b_c8'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"b_c9.npy",bd['b_c9'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"b_c10.npy",bd['b_c10'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"b_c11.npy",bd['b_c11'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"b_c12.npy",bd['b_c12'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"b_c13.npy",bd['b_c13'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"b_d1.npy",bd['b_d1'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"b_out.npy",bd['b_out'])

    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"xs_arr1.npy",xs_dict['1'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"ys_arr1.npy",ys_dict['1'])

    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"xs_arr2.npy",xs_dict['2'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"ys_arr2.npy",ys_dict['2'])

    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"xs_arr3.npy",xs_dict['3'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"ys_arr3.npy",ys_dict['3'])

    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"xs_arr4.npy",xs_dict['4'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"ys_arr4.npy",ys_dict['4'])

    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"xs_arr5.npy",xs_dict['5'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"ys_arr5.npy",ys_dict['5'])

    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"xs_arr6.npy",xs_dict['6'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"ys_arr6.npy",ys_dict['6'])

    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"xs_arr7.npy",xs_dict['7'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"ys_arr7.npy",ys_dict['7'])

    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"xs_arr8.npy",xs_dict['8'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"ys_arr8.npy",ys_dict['8'])

    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"xs_arr9.npy",xs_dict['9'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"ys_arr9.npy",ys_dict['9'])

    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"xs_arr10.npy",xs_dict['10'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"ys_arr10.npy",ys_dict['10'])

    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"xs_arr11.npy",xs_dict['11'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"ys_arr11.npy",ys_dict['11'])

    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"xs_arr12.npy",xs_dict['12'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"ys_arr12.npy",ys_dict['12'])

    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"xs_arr13.npy",xs_dict['13'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"ys_arr13.npy",ys_dict['13'])

    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"xs_arr14.npy",xs_dict['14'])
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"ys_arr14.npy",ys_dict['14'])


    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"epochs.npy",epoch_arr)
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"cost.npy",cost_arr)
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"accuracy.npy",accuracy_arr)
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"traincost.npy",train_cost_arr)
    np.save("VAL"+str(test_accuracy_pct*100)[0:4]+"trainaccuracy.npy",train_accuracy_arr)


      

    min_cost = min(min_cost,avg_cost)
    max_accuracy = max(max_accuracy,test_accuracy_pct)
    print("Min Train Cost:", '{:.3f}'.format(min_cost),"  Max Validation Accuracy: ", '{:.2f}'.format(max_accuracy), "%", sep='')

    if(epoch%10==0):
        plt.subplot(2,2,1)
        plt.title("Activation 1")
        plt.xlabel("X")
        plt.ylabel("Y")
        plt.plot(xs_dict['1'],ys_dict['1'],'b')

        plt.subplot(2,2,2)
        plt.title("Activation 2")
        plt.xlabel("X")
        plt.ylabel("Y")
        plt.plot(xs_dict['2'],ys_dict['2'],'b')

        plt.subplot(2,2,3)
        plt.title("Activation 3")
        plt.xlabel("X")
        plt.ylabel("Y")
        plt.plot(xs_dict['3'],ys_dict['3'],'b')

        plt.subplot(2,2,4)
        plt.title("Activation 4")
        plt.xlabel("X")
        plt.ylabel("Y")
        plt.plot(xs_dict['4'],ys_dict['4'],'b')
        plt.show()

        plt.subplot(2,2,1)
        plt.title("Activation 5")
        plt.xlabel("X")
        plt.ylabel("Y")
        plt.plot(xs_dict['5'],ys_dict['5'],'b')

        plt.subplot(2,2,2)
        plt.title("Activation 6")
        plt.xlabel("X")
        plt.ylabel("Y")
        plt.plot(xs_dict['6'],ys_dict['6'],'b')

        plt.subplot(2,2,3)
        plt.title("Activation 7")
        plt.xlabel("X")
        plt.ylabel("Y")
        plt.plot(xs_dict['7'],ys_dict['7'],'b')

        plt.subplot(2,2,4)
        plt.title("Activation 8")
        plt.xlabel("X")
        plt.ylabel("Y")
        plt.plot(xs_dict['8'],ys_dict['8'],'b')
        plt.show()

        plt.subplot(2,2,1)
        plt.title("Activation 9")
        plt.xlabel("X")
        plt.ylabel("Y")
        plt.plot(xs_dict['9'],ys_dict['9'],'b')

        plt.subplot(2,2,2)
        plt.title("Activation 10")
        plt.xlabel("X")
        plt.ylabel("Y")
        plt.plot(xs_dict['10'],ys_dict['10'],'b')

        plt.subplot(2,2,3)
        plt.title("Activation 11")
        plt.xlabel("X")
        plt.ylabel("Y")
        plt.plot(xs_dict['11'],ys_dict['11'],'b')

        plt.subplot(2,2,4)
        plt.title("Activation 12")
        plt.xlabel("X")
        plt.ylabel("Y")
        plt.plot(xs_dict['12'],ys_dict['12'],'b')
        plt.show()

        plt.subplot(2,2,1)
        plt.title("Activation 13")
        plt.xlabel("X")
        plt.ylabel("Y")
        plt.plot(xs_dict['13'],ys_dict['13'],'b')

        plt.subplot(2,2,2)
        plt.title("Activation 14")
        plt.xlabel("X")
        plt.ylabel("Y")
        plt.plot(xs_dict['14'],ys_dict['14'],'b')

        plt.show()

    
    if (epoch-last_best_epoch) > epoch_no_change:
      break
    
    np.save("W_c1.npy",wd['W_c1'])
    np.save("W_c2.npy",wd['W_c2'])
    np.save("W_c3.npy",wd['W_c3'])
    np.save("W_c4.npy",wd['W_c4'])
    np.save("W_c5.npy",wd['W_c5'])
    np.save("W_c6.npy",wd['W_c6'])
    np.save("W_c7.npy",wd['W_c7'])
    np.save("W_c8.npy",wd['W_c8'])
    np.save("W_c9.npy",wd['W_c9'])
    np.save("W_c10.npy",wd['W_c10'])
    np.save("W_c11.npy",wd['W_c11'])
    np.save("W_c12.npy",wd['W_c12'])
    np.save("W_c13.npy",wd['W_c13'])
    np.save("W_d1.npy",wd['W_d1'])
    np.save("W_out.npy",wd['W_out'])
    np.save("b_c1.npy",bd['b_c1'])
    np.save("b_c2.npy",bd['b_c2'])
    np.save("b_c3.npy",bd['b_c3'])
    np.save("b_c4.npy",bd['b_c4'])
    np.save("b_c5.npy",bd['b_c5'])
    np.save("b_c6.npy",bd['b_c6'])
    np.save("b_c7.npy",bd['b_c7'])
    np.save("b_c8.npy",bd['b_c8'])
    np.save("b_c9.npy",bd['b_c9'])
    np.save("b_c10.npy",bd['b_c10'])
    np.save("b_c11.npy",bd['b_c11'])
    np.save("b_c12.npy",bd['b_c12'])
    np.save("b_c13.npy",bd['b_c13'])
    np.save("b_d1.npy",bd['b_d1'])
    np.save("b_out.npy",bd['b_out'])
    np.save("xs_arr1.npy",xs_dict['1'])
    np.save("ys_arr1.npy",ys_dict['1'])
    np.save("xs_arr2.npy",xs_dict['2'])
    np.save("ys_arr2.npy",ys_dict['2'])
    np.save("xs_arr3.npy",xs_dict['3'])
    np.save("ys_arr3.npy",ys_dict['3'])
    np.save("xs_arr4.npy",xs_dict['4'])
    np.save("ys_arr4.npy",ys_dict['4'])
    np.save("xs_arr5.npy",xs_dict['5'])
    np.save("ys_arr5.npy",ys_dict['5'])
    np.save("xs_arr6.npy",xs_dict['6'])
    np.save("ys_arr6.npy",ys_dict['6'])
    np.save("xs_arr7.npy",xs_dict['7'])
    np.save("ys_arr7.npy",ys_dict['7'])
    np.save("xs_arr8.npy",xs_dict['8'])
    np.save("ys_arr8.npy",ys_dict['8'])
    np.save("xs_arr9.npy",xs_dict['9'])
    np.save("ys_arr9.npy",ys_dict['9'])
    np.save("xs_arr10.npy",xs_dict['10'])
    np.save("ys_arr10.npy",ys_dict['10'])
    np.save("xs_arr11.npy",xs_dict['11'])
    np.save("ys_arr11.npy",ys_dict['11'])
    np.save("xs_arr12.npy",xs_dict['12'])
    np.save("ys_arr12.npy",ys_dict['12'])
    np.save("xs_arr13.npy",xs_dict['13'])
    np.save("ys_arr13.npy",ys_dict['13'])
    np.save("xs_arr14.npy",xs_dict['14'])
    np.save("ys_arr14.npy",ys_dict['14'])
    np.save("epochs.npy",epoch_arr)
    np.save("cost.npy",cost_arr)
    np.save("accuracy.npy",accuracy_arr)
    np.save("train_cost.npy",train_cost_arr)
    np.save("train_accuracy.npy",train_accuracy_arr)
    
      
    epoch = epoch+1

#### Visualize Loss and Accuracy

In [0]:
  plt.subplot(1,2,1)
  plt.title("Cost")
  plt.xlabel("Epochs")
  plt.ylabel("Cost")
  plt.plot(epoch_arr[1:],train_cost_arr[1:],'b')
  plt.plot(epoch_arr[1:],cost_arr[1:],'g')

  plt.subplot(1,2,2)
  plt.title("Accuracy")
  plt.xlabel("Epochs")
  plt.ylabel("Accuracy")
  plt.plot(epoch_arr[1:],train_accuracy_arr[1:],'b')
  plt.plot(epoch_arr[1:],accuracy_arr[1:],'g')

  plt.show()

#### Test Model

In [0]:
#Testing already done in every epoch.
print("Training Complete. Testing Model.\n")

test_x = np.array(test_x)
t_batches = int(test_x.shape[0] / 100)

test_cost = 0
test_accuracy_pct = 0

for i in range(t_batches):

  img,lbl = next_batch(100,test_x,test_y)
  interim_test_cost, interim_test_accuracy_pct = session.run([cost, accuracy_pct], feed_dict={x: img, y: lbl, do_dropout: np.array([0.0],dtype=np.float32)})
  test_cost+=(interim_test_cost/t_batches)
  test_accuracy_pct+=(interim_test_accuracy_pct/t_batches)

print("Test Cost:", '{:.3f}'.format(test_cost))
print("Test Accuracy: ", '{:.2f}'.format(test_accuracy_pct), "%", sep='')

session.close()