# Import libraries

In [12]:
from __future__ import print_function 

import numpy as np
import tensorflow as tf

## Topics
> ## 1. Build a model
>> ## 1.1 Create placeholder
>>> ## Coding 1: Dealing with tensors and dimensions

>> ## 1.2 One-hot encoding
>> ## 1.3 Define the weight of CNN
>> ## 1.4 Understanding the self-defined blocks and distinguish the function of the layers
>>> ## Coding 2: Add some layers in the model


> ## 2. Optimization: Method of Gradient Descent
>> ## 2.1 Cost function and Learning rate
>>> ## Coding 3: Tune the learning rate on cost function


> ## 3. Look into Pre-trained model: YOLOv3 (jupyter_2)
>> ## Advance topic: ResNet

> ## 4. Training small dataset, and plot the result on Tensorboard (jupyter_3)

## 1. Build a model
## 1.1 Create placeholder: 
> ## Pre-allocate places for different inputs and output
![Placeholder](https://imgur.com/lTf4ehx.png)

> ## Discussion:
>> ## 1. What's the meaning of <span style="color:blue">None</span> in tf.placeholder( XX ,[<span style="color:blue">None</span>, XX]) ?
>> ## 2. What does it mean to create placeholder for input_picture and input_state ?

In [2]:
def create_placeholders(n_H0, n_W0, n_C0, n_y, n_state):
    # Create float placeholder for input_picture
    pic_placehold = tf.placeholder(tf.float32,[None,n_H0,n_W0,n_C0])
    # Create integer placeholder for output_class
    action_placehold = tf.placeholder(tf.int32,[None,n_y])
    
    ### Coding 1: Dealing with tensors and dimensions
    ### Step 1: Create integer placeholder for input_state
    ### START CODE HERE ### (1 line)
    state_placehold = 
    ### END CODE HERE ###
    
    return pic_placehold, action_placehold, state_placehold

In [3]:
### Step 2: Tune and try out what the parameters in your tensor means
pic_height = 
pic_width = 
pic_channel = 
output_class = 
input_state = 
### 

In [4]:
pic_placehold, action_placehold, state_placehold = create_placeholders(pic_height, pic_width, pic_channel, output_class, input_state)
print ("pic_placehold = " + str(pic_placehold))
print ("action_placehold = " + str(action_placehold))
print ("state_placehold = " + str(state_placehold))

pic_placehold = Tensor("Placeholder:0", shape=(?, 256, 256, 3), dtype=float32)
action_placehold = Tensor("Placeholder_1:0", shape=(?, 6), dtype=int32)
state_placehold = Tensor("Placeholder_2:0", shape=(?, 4), dtype=int32)


**Expected Output**

<table> 
<tr>
<td>
    pic_placehold = Tensor("Placeholder:0", shape=(?, 256, 256, 3), dtype=float32)

</td>
</tr>
<tr>
<td>
    action_placehold  = Tensor("Placeholder_1:0", shape=(?, 6), dtype=int32)

</td>
</tr>
<tr>
<th>
    state_placehold  = Tensor("Placeholder_2:0", shape=(?, 4), dtype=int32)
        
</th>
</tr>
</table>

## 1.2 One-hot encoding
> ## Expressions of probability on feature type
> ## ![one-hot](https://imgur.com/jCjGQgi.jpg)

In [5]:
# action = {0:"right", 1:"left", 2:"front", 3:"back", 4:"up", 5:"down"}
action_type = 6

In [6]:
action_onehot = tf.one_hot(action_placehold , action_type)

## 1.3 Define the weight of CNN: 
> ## Filters and channels: Place to keep parameters stored with its special meaning
> ## Create parameters which can be stored and updated for back propagation
>> ## Filter example
![Sobel filter](https://imgur.com/YaAq4q7.png)
>> ## After filter
![Human](https://imgur.com/C8XqlHm.png)
>> ## Channel
![RGB](https://imgur.com/ygKlPRO.png)

In [7]:
def initialize_parameters(filter_height, filter_width, channel_in, channel_out):
    
    w_parameters = tf.Variable(tf.random_normal([filter_height,filter_width,channel_in,channel_out], stddev=0.1))
    b_parameters = tf.Variable(tf.constant(0.1, shape=[channel_out]))
    
    return w_parameters, b_parameters

In [13]:
### Tune the filter size if you want
filter_height = 5
filter_width = 5
channel_in = 3
channel_out = 8
###

tf.reset_default_graph()
with tf.Session() as sess_test:
    w,b = initialize_parameters(filter_height, filter_width, channel_in, channel_out)
    init = tf.global_variables_initializer()
    sess_test.run(init)
    ### Watch for specific channel
    c_in = 0 # 0 ~ channel_in-1
    c_out = 0 # 0 ~ channel_out-1
    ###
    print("Your filter (at channel_in: %d channel_out: %d) looks like this\n" %(c_in,c_out) )
    for i in range(0,int(w.shape[0])):
        for j in range(0,int(w.shape[1])):
            #print(w[i,j].shape)
            # python3
            print(w[i,j].eval()[c_in,c_out], end='\t')
            # python2
            # print w[i,j].eval()[c_in,c_out],
        print("\n")  

Your filter (at channel_in: 0 channel_out: 0) looks like this

0.0230391	-0.0697294	-0.113742	-0.028562	-3.05606e-05	

0.118471	-0.121192	-0.0223764	0.170336	0.00057642	

-0.0319116	-0.0790702	0.0510799	0.051996	0.25189	

-0.0353221	0.00756408	0.0787909	-0.119117	0.00878545	

-0.140177	-0.0323252	0.0839279	-0.0188945	0.289834	



## 1.4 Understanding the self-defined blocks and distinguish the function of the layers 
> ## CNN: strides, channel
> ## CNN to NN: Flatten()
> ## NN:  Fully_Connected()

> ## Some definitions of functions 
>> ## Check on Tensorflow for some function used: https://www.tensorflow.org/api_docs/python/tf

> ## Discussion:
>> ## 1. What is stride? What is filter?
>>> 

>> ## 2. How does "Step 1", "Step 2", "Step 3" happens?
>>> ## CNN operation
>>> ![CNN](https://imgur.com/FIy5Ou4.gif)
>>> ## Max Pooling operation
>>> ![Max Pool](https://imgur.com/ec0zNkC.png)

>> ## 3. Problems in padding = 'SAME', and padding = 'VALID' ?
>>> ## tf.nn.conv2d(XX, XX, XX, padding='SAME') 

> ## Hint for 1.4
- ## <span style="color:red">Graphic operations: CNN</span>
- ## tf.Variable()
- ## tf.nn.conv2d()
- ## tf.nn.max_pool()
- ## tf.contrib.layers.flatten()
- ## tf.contrib.layers.fully_connected()

In [14]:
# create placeholder
pic_height = 256
pic_width = 256
pic_channel = 3
output_class = 6
input_state = 4
pic_placehold, action_placehold, state_placehold = create_placeholders(pic_height, pic_width, pic_channel, output_class, input_state)

In [15]:
# Notice the size of max pooling is pre-fixed in this self-defined function
def conv_block(tensor_in, channel_in, channel_out, filter_height, filter_width, strides_height, strides_width, maxPool_height=2, maxPool_width=2):
    tf.set_random_seed(1)
    
    w = tf.Variable(tf.truncated_normal([filter_height,filter_width,channel_in,channel_out], stddev=0.1))
    b = tf.Variable(tf.constant(0.1, shape=[channel_out]))
    
    conv = tf.nn.conv2d(tensor_in, w, strides=[1, strides_height, strides_width,1], padding='SAME')                
    print("Conv\t\t%d\t%d\t%d\t\t| %s" % (conv.shape[1],conv.shape[2],conv.shape[3],conv.shape))
    # activate each nodes with bias
    activate_func = tf.nn.relu(conv + b)
    print("ReLU\t\t%d\t%d\t%d\t\t| %s" % (activate_func.shape[1],activate_func.shape[2],activate_func.shape[3],activate_func.shape))
    
    maxPool = tf.nn.max_pool(activate_func, ksize=[1,maxPool_height,maxPool_width,1], strides=[1,maxPool_height,maxPool_width,1], padding='SAME')
    print("max_pool\t%d\t%d\t%d\t\t| %s" %(maxPool.shape[1],maxPool.shape[2],maxPool.shape[3],maxPool.shape))
    return maxPool

In [23]:
s = "---------------------------------------------------------------"
print(s)
print("Layer\t\tHeight\tWidth\tChannel\t Node\t| What you will see")
print(s)
print("Input\t\t%d\t%d\t%d\t\t| %s" %(pic_placehold.shape[1],pic_placehold.shape[2],pic_placehold.shape[3],pic_placehold.shape))

### Coding 2: Add some layers in the model
### Step 1: Tunning the stride for first CNN layer
### Tunning part:
strides_height = 
strides_width = 
filter_height = 4
filter_width = 4
###
conv1 = conv_block(pic_placehold, 3, 8, filter_height, filter_width, strides_height, strides_width)

---------------------------------------------------------------
Layer		Height	Width	Channel	 Node	| What you will see
---------------------------------------------------------------
Input		256	256	3		| (?, 256, 256, 3)
Conv		128	128	8		| (?, 128, 128, 8)
ReLU		128	128	8		| (?, 128, 128, 8)
max_pool	64	64	8		| (?, 64, 64, 8)


** Step 1: Expected Output **
<table>
    <tr>
        <td>
            Input | (?, 256, 256, 3)
        </td>
    </tr>
    <tr>
        <td>
            Conv | (?, 128, 128, 8)
        </td>
    </tr>
    <tr>
        <td>
            ReLU | (?, 128, 128, 8)
        </td>
    </tr>
    <tr>
        <td>
            max_pool | (?, 64, 64, 8)
        </td>
    </tr>
</table>

In [24]:
### Step 2: Tunning the stride for second CNN layer and the channel
### Tunning part: divisible stride
strides_height =  
strides_width = 
channel_in = 
channel_out = 
filter_height = 4
filter_width = 4
###
conv2 = conv_block(conv1, channel_in, channel_out, filter_height, filter_width, strides_height, strides_width)

Conv		16	32	16		| (?, 16, 32, 16)
ReLU		16	32	16		| (?, 16, 32, 16)
max_pool	8	16	16		| (?, 8, 16, 16)


** Step 2: Expected Output **
<table>
    <tr>
        <td>
            Conv | (?, 16, 32, 16)
        </td>
    </tr>
    <tr>
        <td>
            ReLU | (?, 16, 32, 16)
        </td>
    </tr>
    <tr>
        <td>
            max_pool | (?, 8, 16, 16)
        </td>
    </tr>
</table>

In [83]:
### Step 3: Tunning the stride and build the third block of CNN
### Tunning part: non-divisible stride
strides_height = 
strides_width = 
###
conv3 = 

Conv		4	6	32		| (?, 4, 6, 32)
ReLU		4	6	32		| (?, 4, 6, 32)
max_pool	2	3	32		| (?, 2, 3, 32)


** Step 3: Expected Output **
<table>
    <tr>
        <td>
            Conv | (?, 4, 6, 32)
        </td>
    </tr>
    <tr>
        <td>
            ReLU | (?, 4, 6, 32)
        </td>
    </tr>
    <tr>
        <td>
            max_pool | (?, 2, 3, 32)
        </td>
    </tr>
</table>

In [25]:
def fc_block(tensor_in, node_out, activate_func):
    node_in = int(tensor_in.shape[1])
    w = tf.Variable(tf.random_normal([node_in, node_out], stddev=0.35))
    b = tf.Variable(tf.zeros([node_out]))
    
    if activate_func == None:
        ### 2 way
        #fully_connect = tf.contrib.layers.fully_connected(tensor_in, num_outputs= node_out,activation_fn= None)
        fully_connect = tf.add(tf.matmul(tensor_in,w), b)
        ###
        print("fully_connect\t\t\t\t%d\t| %s" % (fully_connect.shape[1],fully_connect.shape))
    elif activate_func == tf.nn.relu:
        ### 2 way
        #fully_connect = tf.contrib.layers.fully_connected(tensor_in, num_outputs= node_out,activation_fn= activate_func)
        fully_connect = tf.nn.relu(tf.add(tf.matmul(tensor_in,w), b))
        ###
        print("fully_connect\t\t\t\t%d\t| %s" % (fully_connect.shape[1],fully_connect.shape))
        print("ReLU\t\t\t\t\t%d\t| %s" % (fully_connect.shape[1],fully_connect.shape))
    elif activate_func == tf.nn.softmax:
        ### 2 way
        #fully_connect = tf.contrib.layers.fully_connected(tensor_in, num_outputs= node_out,activation_fn= activate_func)
        fully_connect = tf.nn.softmax(tf.add(tf.matmul(tensor_in,w), b))
        ###
        print("fully_connect\t\t\t\t%d\t| %s" % (fully_connect.shape[1],fully_connect.shape))
        print("Softmax\t\t\t\t\t%d\t| %s" % (fully_connect.shape[1],fully_connect.shape))
    
    return fully_connect

In [26]:
# Summarized with CNN and NN
s = "---------------------------------------------------------------"
print(s)
print("Layer\t\tHeight\tWidth\tChannel\t Node\t| What you will see")
print(s)
print("Input\t\t%d\t%d\t%d\t\t| %s" %(pic_placehold.shape[1],pic_placehold.shape[2],pic_placehold.shape[3],pic_placehold.shape))

### Step 4: Select max_pooling size, and build your layers from the beginning
conv1 = 
conv2 = 
###

#flat = tf.contrib.layers.flatten(conv2)
flat = tf.reshape(conv2, [-1, int(conv2.shape[1])*int(conv2.shape[2])*int(conv2.shape[3])])
print("flatten\t\t\t\t\t%d\t| %s" %(flat.shape[1],flat.shape))

fc_layer1 = fc_block(flat, 256, None)
fc_layer2 = fc_block(fc_layer1, 128, tf.nn.relu)
predicted_prob = fc_block(fc_layer1, output_class, tf.nn.softmax)

---------------------------------------------------------------
Layer		Height	Width	Channel	 Node	| What you will see
---------------------------------------------------------------
Input		256	256	3		| (?, 256, 256, 3)
Conv		128	128	16		| (?, 128, 128, 16)
ReLU		128	128	16		| (?, 128, 128, 16)
max_pool	32	128	16		| (?, 32, 128, 16)
Conv		32	128	64		| (?, 32, 128, 64)
ReLU		32	128	64		| (?, 32, 128, 64)
max_pool	16	64	64		| (?, 16, 64, 64)
flatten					65536	| (?, 65536)
fully_connect				256	| (?, 256)
fully_connect				128	| (?, 128)
ReLU					128	| (?, 128)
fully_connect				6	| (?, 6)
Softmax					6	| (?, 6)


** Step 4: Expected Output **
<table>
    <tr>
        <td>
            Input | (?, 256, 256, 3)
        </td>
    </tr>
    <tr>
        <td>
            Conv | (?, 128, 128, 16)
        </td>
    </tr>
    <tr>
        <td>
            ReLU | (?, 128, 128, 16)
        </td>
    </tr>
    <tr>
        <td>
            max_pool | (?, 32, 128, 16)
        </td>
    </tr>
    <tr>
        <td>
            Conv | (?, 32, 128, 64)
        </td>
    </tr>
    <tr>
        <td>
            ReLU | (?, 32, 128, 64)
        </td>
    </tr>
    <tr>
        <td>
            max_pool | (?, 16, 64, 64)
        </td>
    </tr>
</table>

# 2. Optimization: Method of gradient descent
## 2.1 Cost function and learning rate
> ## Method of gradient descent updates the weight with: learning_rate*slope
> ![Gradient Descent](https://imgur.com/YQzIBuB.png)
>> ## 1. Cost function:
![Optimization](https://imgur.com/LXBjfLb.png)
>> ## 2. Learning rate
![Learning rate](https://imgur.com/J8U8fu9.jpg)

> ## Meaning of cost function
>> ## Difference between "predict" and "reality"

> ## Optimization problems: Find global minimum
>> ## 1. Local minimum
>> ## 2. Saddle point

## Discussion:
> ## 1. Check how the cost function looks like on the Internet, and explain how the weight changes.
>> ## Link for mode = 2: [Wolfram Alpha](http://www.wolframalpha.com/input/?i=2*w%5E4+%2B+4*w%5E3+%2B+1*w%5E2)

> ## 2. What is "overfitting"?

In [27]:
tf.set_random_seed(1)
x = tf.placeholder(tf.float32,[None,1])

### Try w=0,random_uniform for mode = 1,2,3 
w = tf.Variable(0,dtype=tf.float32)
#w = tf.Variable(tf.random_uniform([1], seed=2),dtype=tf.float32)
###

### change different cost function
mode = 1
###

if mode == 1:
    coefficients = np.array([[1.],[-10.],[25.]])
    # operator overloading
    # cost = tf.add(tf.add(w**2,tf.multiply(-10.,w),25))
    cost = x[0]*w**2 + x[1]*w + x[2]
elif mode == 2:
    coefficients = np.array([[2.],[4.],[1.]])
    cost = x[0]*(w**4) + x[1]*(w**3) + x[2]*(w**2)
elif mode == 3:
    coefficients = np.array([[5.],[8.],[3.],[1.]])
    cost = x[0]*(w**6)+ x[1]*(w**5)+ x[2]*(w**2)+ x[3]*w

In [28]:
### Coding 3: tune the learning rate
### play for the value between: 0.9 ~ 0.01
learning_rate = 0.01
###
train = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

init = tf.global_variables_initializer()

print("--------------------------------------------")
print("lr=%f\tweight\t\tvalue update" %learning_rate)
print("--------------------------------------------")

with tf.Session() as session:
    session.run(init)
    print("Initialize:\t%f" %session.run(w))
    wf = session.run(w)
    for i in range(100):
        # feed_dict={x:coefficients}, feed our data "coefficients" into our placeholder "x"
        session.run(train,feed_dict={x:coefficients})
        print("Epoch %d:\t%f\t%f" %(i+1,session.run(w),session.run(w)-wf))
        wf = session.run(w)

--------------------------------------------
lr=0.010000	weight		value update
--------------------------------------------
Initialize:	0.000000
Epoch 1:	0.100000	0.100000
Epoch 2:	0.198000	0.098000
Epoch 3:	0.294040	0.096040
Epoch 4:	0.388159	0.094119
Epoch 5:	0.480396	0.092237
Epoch 6:	0.570788	0.090392
Epoch 7:	0.659372	0.088584
Epoch 8:	0.746185	0.086813
Epoch 9:	0.831261	0.085076
Epoch 10:	0.914636	0.083375
Epoch 11:	0.996343	0.081707
Epoch 12:	1.076416	0.080073
Epoch 13:	1.154888	0.078472
Epoch 14:	1.231790	0.076902
Epoch 15:	1.307155	0.075364
Epoch 16:	1.381011	0.073857
Epoch 17:	1.453391	0.072380
Epoch 18:	1.524323	0.070932
Epoch 19:	1.593837	0.069514
Epoch 20:	1.661960	0.068123
Epoch 21:	1.728721	0.066761
Epoch 22:	1.794147	0.065426
Epoch 23:	1.858264	0.064117
Epoch 24:	1.921098	0.062835
Epoch 25:	1.982676	0.061578
Epoch 26:	2.043023	0.060346
Epoch 27:	2.102162	0.059139
Epoch 28:	2.160119	0.057957
Epoch 29:	2.216917	0.056798
Epoch 30:	2.272578	0.055662
Epoch 31:	2.327127	0.0545