# 第7回講義 宿題

## 課題. Theanoを用いて, MNISTを畳み込みニューラルネットワーク(CNN)で学習せよ

### 注意

- homework関数を完成させて提出してください
    - 訓練データはtrain_X, train_y, テストデータはtest_Xで与えられます
    - train_Xとtrain_yをtrain_X, train_yとvalid_X, valid_yに分けるなどしてモデルを学習させてください
    - test_Xに対して予想ラベルpred_yを作り, homework関数の戻り値としてください\
- pred_yのtest_yに対する精度(F値)で評価します
- 全体の実行時間がiLect上で60分を超えないようにしてください
- homework関数の外には何も書かないでください

次のような内容のコードが**事前**に実行されます

```python
from __future__ import division
from collections import OrderedDict
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.datasets import fetch_mldata
from sklearn.cross_validation import train_test_split
from theano.tensor.nnet import conv2d
from theano.tensor.signal import pool
from theano.tensor.shared_randomstreams import RandomStreams

import numpy as np
import theano
import theano.tensor as T

mnist = fetch_mldata('MNIST original')
mnist_X, mnist_y = shuffle(mnist.data.astype('float32'), mnist.target.astype('int32'))

mnist_X = mnist_X / 255.0

train_X, test_X, train_y, test_y = train_test_split(mnist_X, mnist_y, test_size=0.2, random_state=??) # random_stateはこちらで与えます
```

次のセルのhomework関数を完成させて提出してください
- パッケージのインポートなど, 必要な物はすべて書いてください

In [2]:
from __future__ import division
from collections import OrderedDict
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.datasets import fetch_mldata
from sklearn.cross_validation import train_test_split
from theano.tensor.nnet import conv2d
from theano.tensor.signal import pool
from theano.tensor.shared_randomstreams import RandomStreams

import numpy as np
import theano
import theano.tensor as T

def load_mnist():
    mnist = fetch_mldata('MNIST original')
    mnist_X, mnist_y = shuffle(mnist.data.astype('float32'), mnist.target.astype('int32'))

    mnist_X = mnist_X / 255.0

    train_X, test_X, train_y, test_y = train_test_split(mnist_X, mnist_y, test_size=0.2, random_state=42)

    return (train_X, test_X, train_y, test_y)

def check_homework():
    train_X, test_X, train_y, test_y = load_mnist()
    pred_y = homework(train_X, test_X, train_y)
    return f1_score(test_y, pred_y, average='macro')

if 'homework' in globals():
    result = check_homework()
    print result
    print "No Error Occured!"

Using gpu device 0: GRID K520 (CNMeM is enabled with initial size: 95.0% of memory, cuDNN 4007)


UnusedInputError: theano.function was asked to create a function computing outputs given certain inputs, but the provided input variable at index 2 is not part of the computational graph needed to compute the outputs: <TensorType(int32, scalar)>.
To make this error into a warning, you can pass the parameter on_unused_input='warn' to theano.function. To disable it completely, use on_unused_input='ignore'.

In [1]:
def homework(train_X, test_X, train_y):
    from theano.ifelse import ifelse
    import time
    
    rng = np.random.RandomState(1234)
    srng = RandomStreams(1234)
    
    train_y = np.eye(10)[train_y]
    train_X = train_X.reshape((train_X.shape[0], 1, 28,28))
    test_X  = test_X.reshape((test_X.shape[0], 1, 28, 28))
    train_X, valid_X, train_y, valid_y = train_test_split(train_X, train_y, test_size=0.1, random_state=42)
    
    class Conv:
        #- Constructor
        def __init__(self, filter_shape, function, border_mode="valid", subsample=(1, 1)):
            # filter shape (k, l, i, j): 4次元
            #  - k: フィルタ数(出力のチャネル数)
            #  - l: 入力のチャネル数
            #  - i: フィルタの行数
            #  - j: フィルタの列数

            self.function = function
            self.border_mode = border_mode
            self.subsample = subsample
            # unpack filter_shape for easy computing
            k,l,i,j = filter_shape
            self.W = theano.shared(np.sqrt(2.0/(l*i*j)) * rng.randn(k,l,i,j).astype("float32"), name='W')
            self.b = theano.shared(np.zeros(k).astype("float32"), name='b')

            self.params = [self.W, self.b]

        #- Forward Propagation
        def f_prop(self, x):
            conv_out = conv2d(x, self.W, border_mode=self.border_mode, subsample=self.subsample)
            self.z   = self.function(conv_out + self.b.reshape((1,-1,1,1)))
            return self.z
        
    
    class Pooling:
        #- Constructor
        def __init__(self, pool_size=(2, 2), mode='max'):
            self.pool_size = pool_size
            self.mode = mode
            self.params = []

        #- Forward Propagation
        def f_prop(self, x):
            return pool.pool_2d(input=x, ds=self.pool_size, mode=self.mode, ignore_border=True)
        
    
    class Flatten:
        #- Constructor
        def __init__(self, outdim=2):
            self.outdim = outdim
            self.params = []

        #- Forward Propagation
        def f_prop(self,x):
            return T.flatten(x, self.outdim)
        
    class Dropout:
        def __init__(self, p=0.):
            self.p = p
            
        def f_prop(self, x, is_train):
            if self.p > 0:
                retain_prob = 1 - self.p
                dropped_x = x * srng.binomial(x.shape, p=retain_prob, dtype="float32")
                dropped_x /= retain_prob
            # return original X if in testing mode (is_train==0)
            return ifelse(T.neq(is_train, 0), dropped_x, x)

        
        
    class Layer:
        #- Constructor
        def __init__(self, in_dim, out_dim, function):
            self.in_dim = in_dim
            self.out_dim = out_dim
            self.function = function

            self.W = theano.shared(np.sqrt(2.0/in_dim) * rng.randn(in_dim, out_dim).astype("float32"), name='w')       
            self.b =  theano.shared(np.zeros(out_dim).astype("float32"), name="b")
            self.params = [ self.W, self.b ]

        #- Forward Propagation
        def f_prop(self, x):
            self.z = self.function(T.dot(x, self.W) + self.b)
            return self.z
        
    
    #--- Stochastic Gradient Descent
    def sgd(params, g_params, eps=np.float32(0.1)):
        updates = OrderedDict()
        for param, g_param in zip(params, g_params):
            updates[param] = param - eps * g_param
        return updates
    
    # Adam Optimizer
    def adam(cost, params, lr=0.001, b1=0.1, b2=0.001, e=1e-8):
        updates = []
        grads = T.grad(cost, params)
        i = theano.shared(np.asarray(0., dtype="float32"))
        i_t = i + 1.
        fix1 = 1. - (1. - b1)**i_t
        fix2 = 1. - (1. - b2)**i_t
        lr_t = lr * (T.sqrt(fix2) / fix1)
        for p, g in zip(params, grads):
            m = theano.shared(p.get_value() * 0.)
            v = theano.shared(p.get_value() * 0.)
            m_t = (b1 * g) + ((1. - b1) * m)
            v_t = (b2 * T.sqr(g)) + ((1. - b2) * v)
            g_t = m_t / (T.sqrt(v_t) + e)
            p_t = p - (lr_t * g_t)
            updates.append((m, m_t))
            updates.append((v, v_t))
            updates.append((p, p_t))
        updates.append((i, i_t))
        return updates
    
    
    # define networks
    activation = lambda x: T.maximum(0, x)
    
    is_train = T.iscalar('is_train') # pseudo boolean for switching between training and prediction
    
    layers = [
        Conv((32, 1, 3, 3), activation, border_mode=(1,1)),  # 28x28x 1 -> 28x28x32
        Dropout(0.3),
        Conv((32, 32, 3, 3), activation, border_mode=(1,1)),
        Pooling((2, 2)),                 # 28x28x32 -> 14x14x32
        Conv((64, 32, 3, 3), activation, border_mode=(1,1)), # 14x14x32 ->  14x14x64
        Dropout(0.3),
        Conv((64, 64, 3, 3), activation, border_mode=(1,1)),
        Pooling((2, 2)),                 #  14x14x64 ->  7x7x64
        Flatten(2),
        Layer(7*7*64, 200, activation),
        Dropout(0.5),
        Layer(200, 200, activation),
        #Dropout(0.5),
        Layer(200, 10, T.nnet.softmax)
    ]
    
    
    # define train, valid, test function
    x = T.ftensor4('x')
    t = T.imatrix('t')

    params = []
    layer_out = x
    for layer in layers:
        #params = ifelse(hasattr(layer, "params"), params+layer.params, params)
        #layer_out = ifelse(hasattr(layer, "params"), layer.f_prop(layer_out, is_train=is_train), layer.f_prop(layer_out))
        if hasattr(layer, "params"):
            params += layer.params
            layer_out = layer.f_prop(layer_out)
        else:
            layer_out = layer.f_prop(layer_out, is_train=is_train)
        

    y = layers[-1].z

    cost = T.mean(T.nnet.categorical_crossentropy(y, t))

    #g_params = T.grad(cost, params)
    #updates = sgd(params, g_params)
    updates = adam(cost, params, lr=0.001, b1=0.1, b2=0.001, e=1e-8)

    train = theano.function(inputs=[x, t], outputs=cost, updates=updates, 
                            givens={is_train: np.cast['int32'](1)}, allow_input_downcast=True, name='train')
    valid = theano.function(inputs=[x, t], outputs=[cost, T.argmax(y, axis=1)],
                            givens={is_train: np.cast['int32'](0)}, allow_input_downcast=True, name='valid')
    test  = theano.function(inputs=[x], outputs=T.argmax(y, axis=1),
                            givens={is_train: np.cast['int32'](0)}, name='test')
    
    # training and validation
    batch_size = 100
    n_batches = train_X.shape[0]//batch_size
    for epoch in xrange(2):
        tic = time.clock()
        train_X, train_y = shuffle(train_X, train_y)
        for i in xrange(n_batches):
            start = i*batch_size
            end = start + batch_size
            train(train_X[start:end], train_y[start:end])
        valid_cost, pred_y = valid(valid_X, valid_y)
        print 'EPOCH:: %i, Validation cost: %.3f, Validation F1: %.3f' % (epoch + 1, valid_cost, 
                    f1_score(np.argmax(valid_y, axis=1).astype('int32'), pred_y, average='macro'))
        print('time spent for one epoch %f sec' % (time.clock() - tic))
    pred_y = test(test_X)
    
    
    return pred_y

In [None]:
from __future__ import division
from collections import OrderedDict
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.datasets import fetch_mldata
from sklearn.cross_validation import train_test_split
from theano.tensor.nnet import conv2d
from theano.tensor.signal import pool
from theano.tensor.shared_randomstreams import RandomStreams

import numpy as np
import theano
import theano.tensor as T

def load_mnist():
    mnist = fetch_mldata('MNIST original')
    mnist_X, mnist_y = shuffle(mnist.data.astype('float32'), mnist.target.astype('int32'))

    mnist_X = mnist_X / 255.0

    train_X, test_X, train_y, test_y = train_test_split(mnist_X, mnist_y, test_size=0.2, random_state=42)

    return (train_X, test_X, train_y, test_y)

def check_homework():
    train_X, test_X, train_y, test_y = load_mnist()
    pred_y = homework(train_X, test_X, train_y)
    return f1_score(test_y, pred_y, average='macro')

if 'homework' in globals():
    result = check_homework()

    print "No Error Occured!"