# 第7回講義 演習

## 課題1. 畳み込みニューラルネットワーク(Convolutional Neural Networks)の実装と学習

In [1]:
from __future__ import division
from collections import OrderedDict
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.datasets import fetch_mldata
from sklearn.cross_validation import train_test_split
from theano.tensor.nnet import conv2d
from theano.tensor.signal import pool
from theano.tensor.shared_randomstreams import RandomStreams

import numpy as np
import theano
import theano.tensor as T

rng = np.random.RandomState(1234)

Using gpu device 0: GRID K520 (CNMeM is enabled with initial size: 95.0% of memory, cuDNN 4007)


### 1. MNISTデータセットの読み込み

In [2]:
mnist = fetch_mldata('MNIST original')
mnist_X, mnist_y = shuffle(mnist.data.astype('float32'), mnist.target.astype('int32'))

mnist_X = mnist_X / 255.0

train_X, test_X, train_y, test_y = train_test_split(mnist_X, mnist_y, test_size=0.2)

In [3]:
train_y = np.eye(10)[train_y]

train_X = train_X.reshape((train_X.shape[0], 1, 28,28))
test_X  = test_X.reshape((test_X.shape[0], 1, 28, 28))

train_X, valid_X, train_y, valid_y = train_test_split(train_X, train_y, test_size=0.2, random_state=42)

### 2. 畳み込みとプーリング in Theano

#### 畳み込み: theano.tensor.nnet.conv2d [[link]](http://deeplearning.net/software/theano/library/tensor/nnet/conv.html#theano.tensor.nnet.conv2d)

- 入力または隠れ層$X_{i,j}^{k}$
    - 次元数4$(n,k,i,j)$
        - $n$：バッチサイズ
        - $k$：チャネル数
        - $i$：入力の行数
        - $j$：入力の列数
- 畳み込みのフィルタ（重み）$W_{i,j}^{k,l}$
    - 次元数4$(k,l,i,j)$
        - $k$：フィルタ数（出力のチャネル数)
        - $l$：入力のチャネル数
        - $i$：フィルタの行数
        - $j$：フィルタの列数
    - ストライド：フィルタを適用する位置の間隔（theanoのsubsampleオプション）
    - ゼロパディング：入力の周りに値0の縁を加える（theanoのborder_modeオプション）
        - 入力のサイズを保つ為，フィルタの縦or横の次元が$F$のときパディング数を$(F-1)/2$とする．
        - ただしborder_mode="full"だと，$F-1$となることに注意
- フィルタ後のサイズは，入力の縦or横の次元数$N$，フィルタの縦or横の次元数$F$，ストライドの縦or横の量$S$で決まる．
    - $(N-F)/S+1$
    - border_mode="full"の場合，S=1のとき$(N-F+2(F-1))+1=N+F-1$

sample image & filter

In [4]:
#- 入力 (4次元)
x = T.ftensor4('x')

#- サンプル画像
sample_image = np.array([[1, 1, 1, 0, 0], 
                         [0, 1, 1, 1, 0], 
                         [0, 0, 1, 1, 1], 
                         [0, 0, 1, 1, 0], 
                         [0, 1, 1, 0, 0]]).astype("float32").reshape(1, 1, 5, 5)

#- フィルタ
W = np.array([[1,0,1],
              [0,1,0],
              [1,0,1]]).astype('float32').reshape(1, 1, 3, 3)

print 'Sample image'
print sample_image
print ''
print 'Filter'
print W

Sample image
[[[[ 1.  1.  1.  0.  0.]
   [ 0.  1.  1.  1.  0.]
   [ 0.  0.  1.  1.  1.]
   [ 0.  0.  1.  1.  0.]
   [ 0.  1.  1.  0.  0.]]]]

Filter
[[[[ 1.  0.  1.]
   [ 0.  1.  0.]
   [ 1.  0.  1.]]]]


- ストライド(subsampleで指定): (1, 1)
- パディング(border_modeで指定): なし ('valid')
- 出力のサイズ: (5 - 3)/1 + 1 = 3

In [5]:
convoluted_image_st1x1 = conv2d(x, W, border_mode='valid', subsample=(1, 1))
convolution = theano.function(inputs=[x], outputs=convoluted_image_st1x1, name='covolution')

print convolution(sample_image).reshape(3, 3)

[[ 4.  3.  4.]
 [ 2.  4.  3.]
 [ 2.  3.  4.]]


- ストライド: (2, 2)
- パディング: なし
- 出力のサイズ: (5 - 3)/2 + 1 = 2

In [6]:
convoluted_image_st2x2 = conv2d(x, W, border_mode='valid', subsample=(2,2))
convolution_stride     = theano.function(inputs=[x], outputs=convoluted_image_st2x2, name='convolution_stride')

print convolution_stride(sample_image).reshape(2, 2)

[[ 4.  4.]
 [ 2.  4.]]


- ストライド: (1, 1)
- パディング: full (2, 2)
- 出力のサイズ: 5 + 3 - 1 = 7

In [8]:
convoluted_image_st1x1_full = conv2d(x, W, border_mode='full', subsample=(1,1))
convolution_fullpadding     = theano.function(inputs=[x], outputs=convoluted_image_st1x1_full, name='convolution_full')

print convolution_fullpadding(sample_image).reshape(7, 7)

[[ 1.  1.  2.  1.  1.  0.  0.]
 [ 0.  2.  2.  3.  1.  1.  0.]
 [ 1.  1.  4.  3.  4.  1.  1.]
 [ 0.  1.  2.  4.  3.  3.  0.]
 [ 0.  1.  2.  3.  4.  1.  1.]
 [ 0.  0.  2.  2.  1.  1.  0.]
 [ 0.  1.  1.  1.  1.  0.  0.]]


- ストライド: (1, 1)
- パディング: (1, 1)
- 出力のサイズ: (5 - 3 + 2)/1 + 1 = 5

In [16]:
convoluted_image_st1x1_same = conv2d(x, W, border_mode=(1,1), subsample=(1,1))
convolution_samepadding     = theano.function(inputs=[x], outputs=convoluted_image_st1x1_same, name='convolution_same')

print convolution_samepadding(sample_image).reshape(5, 5)

[[ 155.  243.  243.  240.  153.]
 [ 245.  405.  400.  390.  240.]
 [ 252.  417.  406.  393.  234.]
 [ 248.  416.  404.  385.  224.]
 [ 171.  251.  241.  229.  153.]]


#### プーリング: theano.tensor.signal.pool [[link]](http://deeplearning.net/software/theano/library/tensor/signal/pool.html)

- プーリングには次の種類がある
    - Max pooling (theanoでは'max')
    - Sum pooling (theanoでは'sum')
    - Mean pooling (theanoでは'average_exc_pad')
    - その他Lpプーリングなど(theano未実装)
- Convと同様，ストライドやパディングも考えることもある．
    - ストライドはデフォルトではdsと同じ
- ignore_border=Falseにすると，画像領域を超える

sample image

In [10]:
#- 入力
x = T.ftensor4('x')

sample_image = np.array([[77, 80, 82, 78, 70], 
                         [83, 78, 80, 83, 82], 
                         [87, 82, 81, 80, 74], 
                         [87, 87, 85, 77, 66], 
                         [84, 79, 77, 78, 76]]).astype("float32").reshape(1, 1, 5, 5)

print 'Sample image'
print sample_image

Sample image
[[[[ 77.  80.  82.  78.  70.]
   [ 83.  78.  80.  83.  82.]
   [ 87.  82.  81.  80.  74.]
   [ 87.  87.  85.  77.  66.]
   [ 84.  79.  77.  78.  76.]]]]


- ウィンドウサイズ(dsで指定): (2, 2)
- ストライド(stで指定. defaultではdsと同じ): (2, 2)
- プーリング(modeで指定): 'max'

In [11]:
pooled_image = pool.pool_2d(input=x, ds=(2, 2), mode='max', ignore_border=True) # ストライド(st)はdefaultではdsと同じ
pooling = theano.function(inputs=[x], outputs=pooled_image, name='pooling')

print pooling(sample_image).reshape(2, 2)

[[ 83.  83.]
 [ 87.  85.]]


- ウィンドウサイズ: (2, 2)
- ストライド: (1, 1)
- max pooling

In [13]:
pooled_image_st1x1 = pool.pool_2d(input=x, ds=(2,2), mode='max', ignore_border=True, st=(1,1))
pooling_st1x1      = theano.function(inputs=[x], outputs=pooled_image_st1x1, name='pooling_stride_1x1')

print pooling_st1x1(sample_image).reshape(4, 4)

[[ 83.  82.  83.  83.]
 [ 87.  82.  83.  83.]
 [ 87.  87.  85.  80.]
 [ 87.  87.  85.  78.]]


- ウィンドウサイズ: (2, 2)
- ストライド: (2, 2)
- mean pooling

In [14]:
pooled_image_mean = pool.pool_2d(input=x, ds=(2,2), mode='average_exc_pad', ignore_border=True)
pooling_mean      = theano.function(inputs=[x], outputs=pooled_image_mean, name='pooling_mean')

print pooling_mean(sample_image).reshape(2, 2)

[[ 79.5   80.75]
 [ 85.75  80.75]]


### 3. 畳み込み層(Convolutional Layer)の実装

In [118]:
class Conv:
    #- Constructor
    def __init__(self, filter_shape, function, border_mode="valid", subsample=(1, 1)):
        # filter shape (k, l, i, j): 4次元
        #  - k: フィルタ数(出力のチャネル数)
        #  - l: 入力のチャネル数
        #  - i: フィルタの行数
        #  - j: フィルタの列数
        
        self.function = function
        self.border_mode = border_mode
        self.subsample = subsample
        # unpack filter_shape for easy computing
        k,l,i,j = filter_shape
        self.W = theano.shared(np.sqrt(2.0/(l*i*j)) * np.random.randn(k,l,i,j).astype("float32"), name='w')
        self.b = theano.shared(np.zeros(k).astype("float32"), name='b')

        self.params = [self.W, self.b]
        
    #- Forward Propagation
    def f_prop(self, x):
        conv_out = conv2d(x, self.W, border_mode=self.border_mode, subsample=self.subsample)
        self.z   = self.function(conv_out + self.b.reshape((1,-1,1,1)))
        return self.z

### 4. プーリング層(Pooling Layer)の実装

In [119]:
class Pooling:
    #- Constructor
    def __init__(self, pool_size=(2, 2), mode='max'):
        self.pool_size = pool_size
        self.mode = mode
        self.params = []
        
    #- Forward Propagation
    def f_prop(self, x):
        return pool.pool_2d(input=x, ds=self.pool_size, mode=self.mode, ignore_border=True)

### 5. 平滑化層(Flatten Layer)

In [120]:
class Flatten:
    #- Constructor
    def __init__(self, outdim=2):
        self.outdim = outdim
        self.params = []

    #- Forward Propagation
    def f_prop(self,x):
        return T.flatten(x, self.outdim)

### 6. 全結合層

In [121]:
class Layer:
    #- Constructor
    def __init__(self, in_dim, out_dim, function):
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.function = function

        self.W = theano.shared(rng.uniform(
                    low=-np.sqrt(6. / (in_dim + out_dim)),
                    high=np.sqrt(6. / (in_dim + out_dim)),
                    size=(in_dim,out_dim)
                ).astype("float32"), name="W")       
        self.b =  theano.shared(np.zeros(out_dim).astype("float32"), name="b")
        self.params = [ self.W, self.b ]
        
    #- Forward Propagation
    def f_prop(self, x):
        self.z = self.function(T.dot(x, self.W) + self.b)
        return self.z

### 7. 更新則

In [122]:
#--- Stochastic Gradient Descent
def sgd(params, g_params, eps=np.float32(0.1)):
    updates = OrderedDict()
    for param, g_param in zip(params, g_params):
        updates[param] = param - eps * g_param
    return updates

### 8. ネットワークの定義

In [123]:
activation = T.tanh

layers = [
    Conv((20, 1, 5, 5),activation),  # 28x28x 1 -> 24x24x20
    Pooling((2, 2)),                 # 24x24x20 -> 12x12x20
    Conv((50, 20, 5, 5),activation), # 12x12x20 ->  8x 8x50
    Pooling((2, 2)),                 #  8x 8x50 ->  4x 4x50
    Flatten(2),
    Layer(4*4*50, 500, activation),
    Layer(500, 10, T.nnet.softmax)
]

### 9. train関数とtest関数とvalid関数

In [124]:
x = T.ftensor4('x')
t = T.imatrix('t')

params = []
layer_out = x
for layer in layers:
    params += layer.params
    layer_out = layer.f_prop(layer_out)

y = layers[-1].z

cost = T.mean(T.nnet.categorical_crossentropy(y, t))

g_params = T.grad(cost, params)
updates = sgd(params, g_params)

train = theano.function(inputs=[x, t], outputs=cost, updates=updates, allow_input_downcast=True, name='train')
valid = theano.function(inputs=[x, t], outputs=[cost, T.argmax(y, axis=1)], allow_input_downcast=True, name='valid')
test  = theano.function(inputs=[x], outputs=T.argmax(y, axis=1), name='test')

### 10. 学習

In [126]:
batch_size = 100
n_batches = train_X.shape[0]//batch_size
for epoch in xrange(50):
    train_X, train_y = shuffle(train_X, train_y)
    for i in xrange(n_batches):
        start = i*batch_size
        end = start + batch_size
        train(train_X[start:end], train_y[start:end])
    valid_cost, pred_y = valid(valid_X, valid_y)
    print 'EPOCH:: %i, Validation cost: %.3f, Validation F1: %.3f' % (epoch + 1, valid_cost, f1_score(np.argmax(valid_y, axis=1).astype('int32'), pred_y, average='macro'))

EPOCH:: 1, Validation cost: 0.046, Validation F1: 0.985
EPOCH:: 2, Validation cost: 0.042, Validation F1: 0.986
EPOCH:: 3, Validation cost: 0.044, Validation F1: 0.986
EPOCH:: 4, Validation cost: 0.041, Validation F1: 0.988
EPOCH:: 5, Validation cost: 0.040, Validation F1: 0.988
EPOCH:: 6, Validation cost: 0.039, Validation F1: 0.988
EPOCH:: 7, Validation cost: 0.039, Validation F1: 0.988
EPOCH:: 8, Validation cost: 0.038, Validation F1: 0.988
EPOCH:: 9, Validation cost: 0.037, Validation F1: 0.988
EPOCH:: 10, Validation cost: 0.038, Validation F1: 0.988
EPOCH:: 11, Validation cost: 0.038, Validation F1: 0.988
EPOCH:: 12, Validation cost: 0.043, Validation F1: 0.987
EPOCH:: 13, Validation cost: 0.038, Validation F1: 0.989
EPOCH:: 14, Validation cost: 0.038, Validation F1: 0.988
EPOCH:: 15, Validation cost: 0.038, Validation F1: 0.988
EPOCH:: 16, Validation cost: 0.037, Validation F1: 0.989
EPOCH:: 17, Validation cost: 0.037, Validation F1: 0.988
EPOCH:: 18, Validation cost: 0.037, Vali