# Deep Learning Basics

In [None]:
import tensorflow as tf

## Activation function

In [None]:
a = tf.nn.relu(tf.matmul(x, w1) + bias1)
b = tf.nn.relu(tf.matmul(a, w2) + bias2)

## Loss function

In [None]:
# Loss defined in TensorFlow

cross_entropy = -tf.reduce_mean(y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0)))

v1 = tf.constant([[1.0, 2.0], [3.0, 4.0]])
v2 = tf.constant([[5.0, 6.0], [7.0, 8.0]])

print((v1*v2).eval())
print(tf.matmul(v1, v2).eval())

v = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
print(tf.reduce_mean(v).eval())

cross_entropy = tf.nn.softmax_cross_entropy_with_logits(label = y_, logits = y)

mse = tf.reduce_mean(tf.square(y_ - y))

In [None]:
# Loss defined by users

v1 = tf.constant([1.0, 2.0, 3.0, 4.0])
v2 = tf.constant([4.0, 3.0, 2.0, 1.0])

sess = tf.InteractiveSession()
print(tf.greater(v1, v2).eval())

print(tf.where(tf.greater(v1, v2), v1, v2).eval())

## Optimizer

In [None]:
train_step = tf.train.AdamOptimizer(0.001).minimize(loss)

...

with tf.Session() as sess:
    
    ...
    
    for i in range(STEPS):
        
        ...
        
        sess.run(train_step, feed_dict = {x: current_X, y: current_Y})

## Learning rate

In [None]:
# Exponential decay

decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps)


# tf.train.exponential_decay(learning_rate, global_step, decay_steps, decay_rate, staircase = True/False)

global_step = tf.Variable(0)
...
learning_rate = tf.train.exponential_decay(0.1, global_step, 100, 0.96, staircase = True)
...
learning_rate = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)

## Regularization

In [None]:
w = tf.Variable(tf.random_normal([2, 1], stddev = 1, seed = 1))
y = tf.matmul(x, w)
...
loss = tf.reduce_mean(tf.square(y_ - y)) + tf.contrib.layers.l2_regularizer(lambda_)(w)

In [None]:
def get_weight(shape, lambda_):
    var = tf.Variable(tf.random_normal(shape), dtype = tf.float32)
    # Use add_to_collection to add the L2 regularizer wrt the variable
    tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(lambda_)(var))
    return var

x = tf.placeholder(tf.float32, shape = (None, 2))
y_ = tf.placeholder(tf.float32, shape = (None, 1))

batch_size = 8

layer_dimension = [2, 10, 10, 10, 1]

n_layers = len(layer_dimension)

current_layer = x

in_dimension = layer_dimension[0]

for i in range(1, n_layers):
    out_dimension = layer_dimension[i]
    
    weight = get_weight([in_dimension, out_dimension], 0.001)
    bias = tf.Variable(tf.constant(0.1, shape = [out_dimension]))
    
    cur_layer = tf.nn.relu(tf.matmul(cur_layer, weight) + bias)
    
    in_dimension = layer_dimension[i]
    
mse_loss = tf.reduce_mean(tf.square(y_ - cur_layer))

tf.add_collection('losses', mse_loss)

loss = tf.add_n(tf.get_collection('losses'))

## Moving average

In [None]:
v1 = tf.Variable(0, dtype = tf.float32)
step = tf.Variable(0, trainable = False)

ema = tf.train.ExponentialMovingAverage(0.99, step)
maintain_averages_op = ema.apply([v1])

with tf.Session() as sess:
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    
    print(sess.run([v1, ema.average(v1)]))
    
    sess.run(tf.assign(v1, 5))
    sess.run(maintain_averages_op)
    print(sess.run([v1, ema.average(v1)]))
    
    sess.run(tf.assign(step, 10000))
    sess.run(tf.assign(v1, 10))
    sess.run(maintain_averages_op)
    print(sess.run([v1, ema.average(v1)]))
    
    sess.run(maintain_averages_op)
    print(sess.run([v1, ema.average(v1)]))