# Convolutional Neural Network
---

论文[Multi-digit Number Recognition from Street View Imagery using Deep Convolutional Neural Networks](https://arxiv.org/abs/1312.6082)的convolutional neural network的实现，网络的结构与论文一致

In [1]:
%matplotlib inline

import os
import sys
import random
import pickle

import numpy as np
import tensorflow as tf
import PIL.Image as Image
import matplotlib.pyplot as plt

### 读取数据
---

In [4]:
with open('SVHN.pickle', 'rb') as f:
    save = pickle.load(f)
    train_data = save['train_data']
    train_labels = save['train_labels']
    test_data = save['test_data']
    test_labels = save['test_labels']
    valid_data = save['valid_data']
    valid_labels = save['valid_labels']
    del save
    print(train_data.shape, train_labels.shape)
    print(test_data.shape, test_labels.shape)
    print(valid_data.shape, valid_labels.shape)

((230070, 32, 32, 1), (230070, 6))
((13068, 32, 32, 1), (13068, 6))
((5684, 32, 32, 1), (5684, 6))


In [13]:
train_labels[100,:]

array([ 3,  4,  1,  3, 10, 10])

### 构建网络
---

网络结构

8 ConvNets Layers
2 FC hidden layers

##### ConvNets Layers

* units(means depth) in each layer

    [48, 64, 128, 160, 192, 192, 192, 192]

* neurons
  * layer 1: maxout
  * others: ReLU

* each layer includes max pooling and subtrative normalization
  * max pooling: 2x2 window with 2 and 1 at each layer
  * subtrative normalization: 3x3 window, perserves represetation size

* conv kernels
  * size: 5x5
  * padding: zero padding, preserve representation size
  * stride: 1
  

##### FC hidden layers

* units in each layer

    [3072, 3072]

* each layer with drop out

In [17]:
# define settings of the CNN

# depth of each conv layer
depth_1, depth_2, depth_3, depth_4, depth_5, depth_6, depth_7, depth_8 = [48, 64, 128, 160, 192, 192, 192, 192]

batch_size = 128
image_size = 32
pooling_width = 2
sub_norm_width = 3
patch_size = 5
stride = 1
num_channels = 1
num_lables = 11

num_hidden_neuron = 3072

In [19]:
# create CNN graph and define weights, biases, and other varible etc.

graph = tf.Graph()

with graph.as_default():
    # input data
    tf_train_data = tf.placeholder(tf.float32, shape=(batch_size, image_size, image_size, num_channels))
    tf_train_labels = tf.placeholder(tf.int32, shape=(batch_size, 6))
    tf_test_data = tf.constant(test_data)
    tf_test_labls = tf.constant(test_labels)
    tf_valid_data = tf.constant(valid_data)
    tf_valid_labels = tf.constant(valid_labels)
    
    # define weights and biases
    # conv layer weights and biases
    conv_weights_1 = tf.Variable(tf.truncated_normal([patch_size, patch_size, num_channels, depth_1], stddev=0.1), name='c_w_1')
    conv_biases_1 = tf.Variable(tf.zeros(shape=[depth_1]), name='c_b_1')
    
    conv_weights_2 = tf.Variable(tf.truncated_normal([patch_size, patch_size, depth_1, depth_2], stddev=0.1), name='c_w_2')
    conv_biases_2 = tf.Variable(tf.zeros(shape=[depth_2]), name='c_b_2')
    
    conv_weights_3 = tf.Variable(tf.truncated_normal(shape=[patch_size, patch_size, depth_2, depth_3], stddev=0.1), name='c_w_3')
    conv_biases_3 = tf.Variable(tf.zeros(shape=[depth_3]), name='c_b_3')
    
    conv_weights_4 = tf.Variable(tf.truncated_normal(shape=[patch_size, patch_size, depth_3, depth_4], stddev=0.1), name='c_w_4')
    conv_biases_4 = tf.Variable(tf.zeros(shape=[depth_4]), name='c_b_4')
    
    conv_weights_5 = tf.Variable(tf.truncated_normal(shape=[patch_size, patch_size, depth_4, depth_5], stddev=0.1), name='c_w_5')
    conv_biases_5 = tf.Variable(tf.zeros(shape=[depth_5]), name='c_b_5')
    
    conv_weights_6 = tf.Variable(tf.truncated_normal(shape=[patch_size, patch_size, depth_5, depth_6], stddev=0.1), name='c_w_6')
    conv_biases_6 = tf.Variable(tf.zeros(shape=[depth_6]), name='c_b_6')
    
    conv_weights_7 = tf.Variable(tf.truncated_normal(shape=[patch_size, patch_size, depth_6, depth_7], stddev=0.1), name='c_w_7')
    conv_biases_7 = tf.Variable(tf.zeros(shape=[depth_7]), name='c_b_7')
    
    conv_weights_8 = tf.Variable(tf.truncated_normal(shape=[patch_size, patch_size, depth_7, depth_8], stddev=0.1), name='c_w_8')
    conv_biases_8 = tf.Variable(tf.zeros(shape=[depth_8]), name='c_b_8')
    # full connected layer weights and biases
    fc_weights_1 = tf.Variable(tf.truncated_normal([16 * 16 * 192, num_hidden_neuron], stddev=0.1), name='f_w_1') 
    fc_biases_1 = tf.Variable(tf.zeros([num_hidden_neuron]), name='f_b_1')
    fc_weights_2 = tf.Variable(tf.truncated_normal([num_hidden_neuron, num_hidden_neuron], stddev=0.1), name='f_w_2') 
    fc_biases_2 = tf.Variable(tf.zeros([num_hidden_neuron]), name='f_b_2')
    
    # output
    fc_weights_out_length = tf.get_variable('f_w_o_len', shape=[num_hidden_neuron, 6], dtype=tf.float32, 
                                            initializer=tf.contrib.layers.xavier_initializer_conv2d())
    fc_biases_out_length = tf.Variable(tf.zeros([6]), name='f_b_o_len')
    
    fc_weights_out_1 = tf.get_variable('f_w_o_1', shape=[num_hidden_neuron, num_lables], dtype=tf.float32, 
                                       initializer=tf.contrib.layers.xavier_initializer_conv2d())
    fc_biases_out_1 = tf.Variable(tf.zeros([num_lables]), name='f_b_o_1')
    
    fc_weights_out_2 = tf.get_variable('f_w_o_2', shape=[num_hidden_neuron, num_lables], dtype=tf.float32, 
                                       initializer=tf.contrib.layers.xavier_initializer_conv2d())
    fc_biases_out_2 = tf.Variable(tf.zeros([num_lables]), name='f_b_o_2')
    
    fc_weights_out_3 = tf.get_variable('f_w_o_3', shape=[num_hidden_neuron, num_lables], dtype=tf.float32, 
                                       initializer=tf.contrib.layers.xavier_initializer_conv2d())
    fc_biases_out_3 = tf.Variable(tf.zeros([num_lables]), name='f_b_o_3')
    
    fc_weights_out_4 = tf.get_variable('f_w_o_4', shape=[num_hidden_neuron, num_lables], dtype=tf.float32, 
                                       initializer=tf.contrib.layers.xavier_initializer_conv2d())
    fc_biases_out_4 = tf.Variable(tf.zeros([num_lables]), name='f_b_o_4')
    
    fc_weights_out_5 = tf.get_variable('f_w_o_5', shape=[num_hidden_neuron, num_lables], dtype=tf.float32, 
                                       initializer=tf.contrib.layers.xavier_initializer_conv2d())
    fc_biases_out_5 = tf.Variable(tf.zeros([num_lables]), name='f_b_o_5')
    
    def model(data, keep_prob):
        # conv layers
        conv_1 = tf.nn.conv2d(input=data, filter=conv_weights_1, strides=[1, 1, 1, 1], 
                              padding='SAME', name='conv_1') + conv_biases_1
        conv_1 = tf.nn.relu(conv_1, name='conv_1_relu')
        pool_1 = tf.nn.max_pool(conv_1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool_1')
        
        conv_2 = tf.nn.conv2d(input=pool_1, filter=conv_weights_2, strides=[1, 1, 1, 1], 
                              padding='SAME', name='conv_2') + conv_biases_2
        conv_2 = tf.nn.relu(conv_2, name='conv_2_relu')
        pool_2 = tf.nn.max_pool(conv_2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool_2')
        
        conv_3 = tf.nn.conv2d(input=pool_2, filter=conv_weights_3, strides=[1, 1, 1, 1], 
                              padding='SAME', name='conv_3') + conv_biases_3
        conv_3 = tf.nn.relu(conv_3, name='conv_3_relu')
        pool_3 = tf.nn.max_pool(conv_3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool_3')
        
        conv_4 = tf.nn.conv2d(input=pool_3, filter=conv_weights_4, strides=[1, 1, 1, 1], 
                              padding='SAME', name='conv_4') + conv_biases_4
        conv_4 = tf.nn.relu(conv_4, name='conv_4_relu')
        pool_4 = tf.nn.max_pool(conv_4, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool_4')
        
        conv_5 = tf.nn.conv2d(input=pool_4, filter=conv_weights_5, strides=[1, 1, 1, 1], 
                              padding='SAME', name='conv_5') + conv_biases_5
        conv_5 = tf.nn.relu(conv_5, name='conv_5_relu')
        pool_5 = tf.nn.max_pool(conv_5, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool_5')
        
        conv_6 = tf.nn.conv2d(input=pool_5, filter=conv_weights_6, strides=[1, 1, 1, 1], 
                              padding='SAME', name='conv_6') + conv_biases_6
        conv_6 = tf.nn.relu(conv_6, name='conv_6_relu')
        pool_6 = tf.nn.max_pool(conv_6, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool_6')
        
        conv_7 = tf.nn.conv2d(input=pool_6, filter=conv_weights_7, strides=[1, 1, 1, 1], 
                              padding='SAME', name='conv_7') + conv_biases_7
        conv_7 = tf.nn.relu(conv_7, name='conv_7_relu')
        pool_7 = tf.nn.max_pool(conv_7, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool_7')
        
        conv_8 = tf.nn.conv2d(input=pool_7, filter=conv_weights_8, strides=[1, 1, 1, 1], 
                              padding='SAME', name='conv_8') + conv_biases_8
        conv_8 = tf.nn.relu(conv_8, name='conv_8_relu')
        pool_8 = tf.nn.max_pool(conv_8, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool_8')
        # FC layers
        shapes = pool_8.get_shape().as_list()
        reshape = tf.reshape(pool_8, [shapes[0], shapes[1] * shapes[2]*shapes[3]])
        hidden_1 = tf.nn.relu(tf.matmul(reshape, fc_weights_1) + fc_biases_1)
        hidden_1 = tf.nn.dropout(hidden_1, keep_prob)
        hidden_2 = tf.nn.relu(tf.matmul(hidden_1, fc_weights_2) + fc_biases_2)
        hidden_2 = tf.nn.dropout(hidden_2, keep_prob)
        
        # six output layers
        logit_len = tf.matmul(hidden_2, fc_weights_out_length) + fc_biases_out_length
        logit_1 = tf.matmul(hidden_2, fc_weights_out_1) + fc_biases_out_1
        logit_2 = tf.matmul(hidden_2, fc_weights_out_2) + fc_biases_out_2
        logit_3 = tf.matmul(hidden_2, fc_weights_out_3) + fc_biases_out_3
        logit_4 = tf.matmul(hidden_2, fc_weights_out_4) + fc_biases_out_4
        logit_5 = tf.matmul(hidden_2, fc_weights_out_5) + fc_biases_out_5
        return logit_len, logit_1, logit_2, logit_3, logit_4, logit_5
        
    logit_len, logit_1, logit_2, logit_3, logit_4, logit_5 = model(tf_train_data, 0.8)
    loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logit_len, tf_train_labels[:, 0])) +\
        tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logit_1, tf_train_labels[:, 1])) +\
        tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logit_2, tf_train_labels[:, 2])) +\
        tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logit_3, tf_train_labels[:, 3])) +\
        tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logit_4, tf_train_labels[:, 4])) +\
        tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logit_5, tf_train_labels[:, 5]))
        
    global_step = tf.Variable(0)
    learning_rate = tf.train.exponential_decay(0.2, global_step, 10000, 0.95)
    optimizer = tf.train.AdagradOptimizer(learning_rate).minimize(loss, global_step=global_step)
    
    train_prediction = tf.pack(list(map(tf.nn.softmax, model(tf_train_data, 1.0, shape))))
    valid_prediction = tf.pack(list(map(tf.nn.softmax, model(tf_valid_data, 1.0, shape))))
    test_prediction = tf.pack(list(map(tf.nn.softmax, model(tf_test_data, 1.0, shape))))
    
    saver = tf.train.Saver()

ValueError: Dimensions must be equal, but are 192 and 49152 for 'MatMul' (op: 'MatMul') with input shapes: [128,192], [49152,3072].