# 第5回講義 宿題

## 課題. Theanoを用いて, MNISTを多層パーセプトロン(MLP)で学習せよ

### 注意

- homework関数を完成させて提出してください
    - 訓練データはtrain_X, train_y, テストデータはtest_Xで与えられます
    - train_Xとtrain_yをtrain_X, train_yとvalid_X, valid_yに分けるなどしてモデルを学習させてください
    - test_Xに対して予想ラベルpred_yを作り, homework関数の戻り値としてください\
- pred_yのtest_yに対する精度(F値)で評価します
- 全体の実行時間がiLect上で60分を超えないようにしてください
- homework関数の外には何も書かないでください

- CNNは使わないでください

次のような内容のコードが**事前**に実行されます

```python
from __future__ import division
from collections import OrderedDict
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.datasets import fetch_mldata
from sklearn.cross_validation import train_test_split

import numpy as np
import theano
import theano.tensor as T

mnist = fetch_mldata('MNIST original')
mnist_X, mnist_y = shuffle(mnist.data.astype('float32'), mnist.target.astype('int32'))

mnist_X = mnist_X / 255.0

train_X, test_X, train_y, test_y = train_test_split(mnist_X, mnist_y, test_size=0.2, random_state=??) # random_stateはこちらで与えます
```

次のセルのhomework関数を完成させて提出してください
- パッケージのインポートなど, 必要な物はすべて書いてください

In [None]:
def homework(train_X, test_X, train_y):
    #--- Multi Layer Perceptron
    train_y = np.eye(10)[train_y]
    train_X, valid_X, train_y, valid_y = train_test_split(train_X, train_y, test_size=0.05, random_state=42)
    class Layer:
        #- Constructor
        def __init__(self, in_dim, out_dim, function):
            self.in_dim   = in_dim
            self.out_dim  = out_dim
            self.function = function
            self.W        = theano.shared(np.sqrt(2.0/in_dim) * np.random.randn(in_dim, out_dim).astype("float32"))
            self.b        = theano.shared(np.zeros(out_dim).astype("float32"))
            self.params   = [self.W, self.b]

        #- Forward Propagation
        def f_prop(self, x):
            self.z = self.function(T.dot(x, self.W) + self.b)
            return self.z

    #--- Stochastic Gradient Descent
    def sgd(params, g_params, eps=np.float32(0.1)):
        updates = OrderedDict()
        for param, g_param in zip(params, g_params):
            updates[param] = param - eps * g_param
        return updates
    
    def relu(x):
        return T.maximum(0, x)
    
    layers = [
        Layer(train_X.shape[1], 200, relu),
        #Layer(200, 200, T.nnet.sigmoid),
        Layer(200, 10, T.nnet.softmax)
    ]

    x = T.fmatrix('x')
    t = T.imatrix('t')

    params = []
    for i, layer in enumerate(layers):
        params += layer.params
        if i == 0:
            layer_out = layer.f_prop(x)
        else:
            layer_out = layer.f_prop(layer_out)

    y = layers[-1].z
    cost = T.mean(T.nnet.categorical_crossentropy(y, t))

    g_params = T.grad(cost=cost, wrt=params)
    updates = sgd(params, g_params)
    
    train = theano.function(inputs=[x, t], outputs=cost, updates=updates, 
                            allow_input_downcast=True, name='train')
    valid = theano.function(inputs=[x, t], outputs=[cost, T.argmax(y, axis=1)], 
                            allow_input_downcast=True, name='valid')
    test  = theano.function(inputs=[x], outputs=T.argmax(y, axis=1), name='test')
    
    batch_size = 10
    n_batches = train_X.shape[0]//batch_size
    for epoch in xrange(50):
        train_X, train_y = shuffle(train_X, train_y)
        for i in xrange(n_batches):
            start = i*batch_size
            end = start + batch_size
            train(train_X[start:end], train_y[start:end])
        valid_cost, pred_y = valid(valid_X, valid_y)
    print 'EPOCH:: %i, Validation cost: %.3f, Validation F1: %.3f' % (epoch + 1, 
        valid_cost, f1_score(np.argmax(valid_y, axis=1).astype('int32'), pred_y, average='macro'))
    pred_y = test(test_X)
    return pred_y

In [None]:
from __future__ import division
from collections import OrderedDict
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.datasets import fetch_mldata
from sklearn.cross_validation import train_test_split

import numpy as np
import theano
import theano.tensor as T

def load_mnist():
    mnist = fetch_mldata('MNIST original')
    mnist_X, mnist_y = shuffle(mnist.data.astype('float32'), mnist.target.astype('int32'))

    mnist_X = mnist_X / 255.0

    train_X, test_X, train_y, test_y = train_test_split(mnist_X, mnist_y, test_size=0.2, random_state=42)

    return (train_X, test_X, train_y, test_y)

def check_homework():
    train_X, test_X, train_y, test_y = load_mnist()
    pred_y = homework(train_X, test_X, train_y)
    return f1_score(test_y, pred_y, average='macro')

if 'homework' in globals():
    result = check_homework()

    print "No Error Occured!"