## CH1 Tensorflow Convolution Layer

### padding

**SAME Padding**, the output height and width are computed as:

out_height = ceil(float(in_height) / float(strides[1]))

out_width = ceil(float(in_width) / float(strides[2]))

**VALID Padding**, the output height and width are computed as:

out_height = ceil(float(in_height - filter_height + 1) / float(strides[1]))

out_width = ceil(float(in_width - filter_width + 1) / float(strides[2]))

#### Import

In [1]:
import tensorflow as tf

  from ._conv import register_converters as _register_converters


#### Initialization

In [2]:
# Output depth
k_output = 64

# Image Properties
image_width = 10
image_height = 10
color_channels = 3

# Convolution filter
filter_size_width = 5
filter_size_height = 5

# Input/Image
input = tf.placeholder(
    tf.float32,
    shape=[None, image_height, image_width, color_channels])

# Weight and bias
weight = tf.Variable(tf.truncated_normal(
    [filter_size_height, filter_size_width, color_channels, k_output]))
bias = tf.Variable(tf.zeros(k_output))

#### CNN layer
`tf.nn.conv2d()` : strides=[batch, input_height, input_width, input_channels], generally always going to set the stride for batch and input_channels to be 1.

`tf.nn.bias_add()`

In [3]:
# Apply Convolution
conv_layer = tf.nn.conv2d(input, weight, strides=[1, 2, 2, 1], padding='SAME')
# Add bias
conv_layer = tf.nn.bias_add(conv_layer, bias)
# Apply activation function
conv_layer = tf.nn.relu(conv_layer)

### pooling

`tf.nn.max_pool()`

new_height = (input_height - filter_height)/S + 1

new_width = (input_width - filter_width)/S + 1



### Flatten
`from tensorflow.contrib.layers import flatten`

`fc = flatten(conv_layer)`

## 1x1 Convolution

1*1卷积的主要作用有以下几点：
1. 降维（ dimension reductionality ）。比如，一张500x500且厚度depth为100 的图片在20个filter上做1x1的卷积，那么结果的大小为500x500x20。
2. 加入非线性。卷积层之后经过激励层，1x1的卷积在前一层的学习表示上添加了非线性激励（ non-linear activation ），提升网络的表达能力。

source:
* https://zhuanlan.zhihu.com/p/30182988
* https://www.zhihu.com/question/56024942

## Inception model

傳統的ConvNet是將Convulution layer stack在一起。按論文裡面說就是，用Inception近似一個稀疏結構。將1x1，3x3，5x5的conv和3x3的pooling，stack在一起，一方面增加了網絡的width，另一方面增加了網絡對尺度的適應性. 主要特點是提高了網絡內部計算資源的利用率。

![](http://img.blog.csdn.net/20161108152543838)

- - -
## mnist example

In [4]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets(".", one_hot=True, reshape=False)

import tensorflow as tf

# Parameters
learning_rate = 0.00001
epochs = 10
batch_size = 128

# Number of samples to calculate validation and accuracy
# Decrease this if you're running out of memory to calculate accuracy
test_valid_size = 256

# Network Parameters
n_classes = 10  # MNIST total classes (0-9 digits)
dropout = 0.75  # Dropout, probability to keep units

Extracting ./train-images-idx3-ubyte.gz
Extracting ./train-labels-idx1-ubyte.gz
Extracting ./t10k-images-idx3-ubyte.gz
Extracting ./t10k-labels-idx1-ubyte.gz


In [5]:
# Store layers weight & bias
weights = {
    'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
    'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
    'wd1': tf.Variable(tf.random_normal([7*7*64, 1024])),
    'out': tf.Variable(tf.random_normal([1024, n_classes]))}

biases = {
    'bc1': tf.Variable(tf.random_normal([32])),
    'bc2': tf.Variable(tf.random_normal([64])),
    'bd1': tf.Variable(tf.random_normal([1024])),
    'out': tf.Variable(tf.random_normal([n_classes]))}

In [6]:
def conv2d(x, W, b, strides=1):
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)

In [7]:
def maxpool2d(x, k=2):
    return tf.nn.max_pool(
        x,
        ksize=[1, k, k, 1],
        strides=[1, k, k, 1],
        padding='SAME')

In [8]:
def conv_net(x, weights, biases, dropout):
    # Layer 1 - 28*28*1 to 14*14*32
    conv1 = conv2d(x, weights['wc1'], biases['bc1'])
    conv1 = maxpool2d(conv1, k=2)

    # Layer 2 - 14*14*32 to 7*7*64
    conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
    conv2 = maxpool2d(conv2, k=2)

    # Fully connected layer - 7*7*64 to 1024
    fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
    fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
    fc1 = tf.nn.relu(fc1)
    fc1 = tf.nn.dropout(fc1, dropout)

    # Output Layer - class prediction - 1024 to 10
    out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
    return out

### session

In [None]:
# tf Graph input
x = tf.placeholder(tf.float32, [None, 28, 28, 1])
y = tf.placeholder(tf.float32, [None, n_classes])
keep_prob = tf.placeholder(tf.float32)

# Model
logits = conv_net(x, weights, biases, keep_prob)

# Define loss and optimizer
cost = tf.reduce_mean(\
    tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\
    .minimize(cost)

# Accuracy
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initializing the variables
init = tf. global_variables_initializer()

# Launch the graph
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(epochs):
        for batch in range(mnist.train.num_examples//batch_size):
            batch_x, batch_y = mnist.train.next_batch(batch_size)
            sess.run(optimizer, feed_dict={
                x: batch_x,
                y: batch_y,
                keep_prob: dropout})

            # Calculate batch loss and accuracy
            loss = sess.run(cost, feed_dict={
                x: batch_x,
                y: batch_y,
                keep_prob: 1.})
            valid_acc = sess.run(accuracy, feed_dict={
                x: mnist.validation.images[:test_valid_size],
                y: mnist.validation.labels[:test_valid_size],
                keep_prob: 1.})

            print('Epoch {:>2}, Batch {:>3} -'
                  'Loss: {:>10.4f} Validation Accuracy: {:.6f}'.format(
                epoch + 1,
                batch + 1,
                loss,
                valid_acc))

    # Calculate Test Accuracy
    test_acc = sess.run(accuracy, feed_dict={
        x: mnist.test.images[:test_valid_size],
        y: mnist.test.labels[:test_valid_size],
        keep_prob: 1.})
    print('Testing Accuracy: {}'.format(test_acc))

## CH2 Tensorflow Convolution LeNet
![LeNet Architecture](./images/lenet.png)

In [11]:
from tensorflow.contrib.layers import flatten

def LeNet(x):    
    # Arguments used for tf.truncated_normal, randomly defines variables for the weights and biases for each layer
    mu = 0
    sigma = 0.1
    
    # TODO: Layer 1: Convolutional. Input = 32x32x1. Output = 28x28x6.
    conv_w1 = tf.Variable(tf.truncated_normal((5, 5, 1, 6), mean=mu, stddev=sigma))
    conv_b1 = tf.Variable(tf.zeros(6))
    conv_layer = tf.nn.conv2d(x, conv_w1, strides=[1,1,1,1], padding='VALID') + conv_b1
    
    # TODO: Activation.
    conv_layer = tf.nn.relu(conv_layer)

    # TODO: Pooling. Input = 28x28x6. Output = 14x14x6.
    conv_layer = tf.nn.max_pool(conv_layer, ksize=[1,2,2,1], strides=[1,2,2,1], padding='VALID')   
    
    # TODO: Layer 2: Convolutional. Output = 10x10x16.
    conv_w2 = tf.Variable(tf.truncated_normal((5, 5, 6, 16), mean=mu, stddev=sigma))
    conv_b2 = tf.Variable(tf.zeros(16))
    conv_layer = tf.nn.conv2d(conv_layer, conv_w2, strides=[1,1,1,1], padding='VALID')
    
    # TODO: Activation.
    conv_layer = tf.nn.relu(conv_layer)
    
    # TODO: Pooling. Input = 10x10x16. Output = 5x5x16.
    conv_layer = tf.nn.max_pool(conv_layer, ksize=[1,2,2,1], strides=[1,2,2,1], padding='VALID')
    
    # TODO: Flatten. Input = 5x5x16. Output = 400.
    fc = flatten(conv_layer)
    
    # TODO: Layer 3: Fully Connected. Input = 400. Output = 120.
    dnn_w1 = tf.Variable(tf.truncated_normal([400, 120], mean=mu, stddev=sigma))
    dnn_b1 = tf.Variable(tf.zeros(120))
    dnn_layer = tf.add(tf.matmul(fc, dnn_w1), dnn_b1)
    
    # TODO: Activation.
    dnn_layer = tf.nn.relu(dnn_layer)

    # TODO: Layer 4: Fully Connected. Input = 120. Output = 84.
    dnn_w2 = tf.Variable(tf.truncated_normal([120, 84], mean=mu, stddev=sigma))
    dnn_b2 = tf.Variable(tf.zeros(84))
    dnn_layer = tf.add(tf.matmul(dnn_layer, dnn_w2), dnn_b2)
    
    # TODO: Activation.
    dnn_layer = tf.nn.relu(dnn_layer)

    # TODO: Layer 5: Fully Connected. Input = 84. Output = 10.
    dnn_w3 = tf.Variable(tf.truncated_normal([84, 10], mean=mu, stddev=sigma))
    dnn_b3 = tf.Variable(tf.zeros(10))
    logits = tf.add(tf.matmul(dnn_layer, dnn_w3), dnn_b3)
    
    return logits