# 第7回講義 宿題

## 課題. Theanoを用いて, MNISTを畳み込みニューラルネットワーク(CNN)で学習せよ

### 注意

- homework関数を完成させて提出してください
    - 訓練データはtrain_X, train_y, テストデータはtest_Xで与えられます
    - train_Xとtrain_yをtrain_X, train_yとvalid_X, valid_yに分けるなどしてモデルを学習させてください
    - test_Xに対して予想ラベルpred_yを作り, homework関数の戻り値としてください\
- pred_yのtest_yに対する精度(F値)で評価します
- 全体の実行時間がiLect上で60分を超えないようにしてください
- homework関数の外には何も書かないでください

次のような内容のコードが**事前**に実行されます

```python
from collections import OrderedDict
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.datasets import fetch_mldata
from sklearn.cross_validation import train_test_split
from theano.tensor.nnet import conv2d
from theano.tensor.signal import pool
from theano.tensor.shared_randomstreams import RandomStreams

import numpy as np
import theano
import theano.tensor as T


mnist = fetch_mldata('MNIST original')
mnist_X, mnist_y = shuffle(mnist.data.astype('float32'),
                           mnist.target.astype('int32'),
                           random_state=42)

mnist_X = mnist_X / 255.0

train_X, test_X, train_y, test_y = train_test_split(mnist_X, mnist_y,
                                                    test_size=0.2,
                                                    random_state=??)
```

次のセルのhomework関数を完成させて提出してください
- **上記のコード以外で必要なもの**は全て書いてください

In [1]:
def homework(train_X, train_y, test_X):
    
    train_y = np.eye(10)[train_y]

    train_X = train_X.reshape((train_X.shape[0], 1, 28, 28))
    test_X = test_X.reshape((test_X.shape[0], 1, 28, 28))

    train_X, valid_X, train_y, valid_y = train_test_split(train_X, train_y,
                                                      test_size=0.2,
                                                      random_state=42)
    import time
    start_time = time.time()
    
    # Convolutional layer
    class Conv:
        # Constructor
        def __init__(self, filter_shape, function, border_mode="valid",
                     subsample=(1, 1)):
            # filter shape (k, l, i, j): 4次元
            self.filter_shape = filter_shape
            self.function = function
            self.border_mode = border_mode
            self.subsample = subsample

            self.W = theano.shared(np.random.uniform(low=-0.08, high=0.08,
                                               size = self.filter_shape
                                               ).astype('float32'), name='W')
            self.b = theano.shared(np.zeros(self.filter_shape[0],).astype('float32'),
                                   name='b') 
        
            self.params = [self.W, self.b] 

        # Forward Propagation
        def f_prop(self, x):
            conv_out = conv2d(x, self.W, border_mode= self.border_mode, subsample=self.subsample) 
            self.z = self.function(conv_out + self.b[np.newaxis, :, np.newaxis, np.newaxis]) 
            return self.z
        
    class Pooling:
        # Constructor
        def __init__(self, pool_size=(2, 2), mode='max'):
            self.pool_size = pool_size
            self.mode = 'max'
            self.params = []

        # Forward Propagation
        def f_prop(self, x):
            return pool.pool_2d(input=x, ds=self.pool_size, mode=self.mode, ignore_border=True)  
        
    class Flatten:
        # Constructor
        def __init__(self, outdim=2):
            self.outdim = outdim
            self.params = []

        # Forward Propagation
        def f_prop(self, x):
            return T.flatten(x, self.outdim)
    
    class Layer:
        # Constructor
        def __init__(self, in_dim, out_dim, function, possibility):
            self.in_dim = in_dim
            self.out_dim = out_dim
            self.function = function
            self.possibility = possibility
            self.W = theano.shared(np.random.uniform(
                        low=-np.sqrt(6. / (in_dim + out_dim)),
                        high=np.sqrt(6. / (in_dim + out_dim)),
                        size=(in_dim, out_dim)
                        ).astype("float32"), name="W")
            self.b = theano.shared(np.zeros(out_dim).astype("float32"), name="b")
            self.params = [self.W, self.b]

    # Forward Propagation
        def f_prop(self, x):
            self.z = self.function(T.dot(x, self.W) + self.b)
            return self.z
        
        def get_mask(self):
            a = np.random.rand(self.out_dim) < self.possibility
            return a*np.float32(1.0)
        

    def build_shared_zeros(shape, name):

        return theano.shared(value=np.zeros(shape, dtype=theano.config.floatX), 
                             name=name, borrow=True)

    # Optimizer
    class Adam:
        def __init__(self, params, alpha=0.001, beta1=0.9, beta2=0.999, eps=1e-8, gamma=1-1e-8):
            self.alpha = alpha
            self.b1 = beta1
            self.b2 = beta2
            self.gamma = gamma
            self.t = theano.shared(np.float32(1))
            self.eps = eps

            self.ms = [build_shared_zeros(t.shape.eval(), 'm') for t in params]
            self.vs = [build_shared_zeros(t.shape.eval(), 'v') for t in params]
        
        def updates(self, g_params, cost):
            self.b1_t = self.b1 * self.gamma ** (self.t - 1)
            self.updates = OrderedDict()
            for m, v, param, g_param in zip(self.ms, self.vs, params, g_params):
                _m = self.b1_t * m + (1 - self.b1_t) * g_param
                _v = self.b2 * v + (1 - self.b2) * g_param ** 2

                m_hat = _m / (1 - self.b1 ** self.t)
                v_hat = _v / (1 - self.b2 ** self.t)

                self.updates[param] = param - self.alpha*m_hat / (T.sqrt(v_hat) + self.eps)
                self.updates[m] = _m
                self.updates[v] = _v
            self.updates[self.t] = self.t + 1.0

            return self.updates
    
    activation = T.nnet.relu
    
    layers = [                             # (チャネル数)x(縦の次元数)x(横の次元数)
        Conv((20, 1, 5, 5), activation),   # 1x28x28  -> 20x24x24
        Pooling((2, 2)),                   # 20x24x24 -> 20x12x12
        Conv((50, 20, 5, 5), activation),  # 20x12x12 -> 50x 8x 8
        Pooling((2, 2)),                   # 50x 8x 8 -> 50x 4x 4
        Flatten(2),
        Layer(4*4*50, 500, T.nnet.sigmoid, 0.80),
        Layer(500, 10, T.nnet.softmax, 1.00)
    ]
    
    
    x = T.ftensor4('x')
    t = T.imatrix('t')
    
    params = []
    layer_out = x

    for (i, layer) in enumerate(layers):
        params += layer.params
        if i < 5:
            layer_out = layer.f_prop(layer_out)
        else:
            layer.mask = layer.get_mask()
            layer_out = layer.f_prop(layer_out)*layer.mask
    y = layers[-1].z

    cost = T.mean(T.nnet.categorical_crossentropy(y, t))
    
    g_params = T.grad(cost, params)
    updates = Adam(params=params).updates(g_params, cost=cost) 

        
    train = theano.function(inputs=[x, t], outputs=cost, updates=updates,
                        allow_input_downcast=True, name='train')
    valid = theano.function(inputs=[x, t], outputs=[cost, T.argmax(y, axis=1)],
                        allow_input_downcast=True, name='valid')
    test = theano.function(inputs=[x], outputs=T.argmax(y, axis=1), name='test')
    
    
    
    batch_size = 100
    n_batches = train_X.shape[0]//batch_size
    epoch = 0
    while time.time() - start_time < 45*60 and epoch < 400:
        epoch += 1
        train_X, train_y = shuffle(train_X, train_y)
        for i in range(n_batches):
            start = i*batch_size
            end = start + batch_size
            train(train_X[start:end], train_y[start:end])
        #print(epoch)
        valid_cost, pred_y = valid(valid_X, valid_y)
        if epoch % 50 == 9:
            print('EPOCH:: %i, Validation cost: %.3f, Validation F1: %.3f' %
                  (epoch + 1, valid_cost,
                   f1_score(np.argmax(valid_y, axis=1).astype('int32'), pred_y,
                            average='macro')))
    pred_y  = test(test_X)
    return pred_y

In [None]:
from collections import OrderedDict
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.datasets import fetch_mldata
from sklearn.cross_validation import train_test_split
from theano.tensor.nnet import conv2d
from theano.tensor.signal import pool
from theano.tensor.shared_randomstreams import RandomStreams

import numpy as np
import theano
import theano.tensor as T


def load_mnist():
    mnist = fetch_mldata('MNIST original')
    mnist_X, mnist_y = shuffle(mnist.data.astype('float32'),
                               mnist.target.astype('int32'),
                               random_state=42)

    mnist_X = mnist_X / 255.0

    train_X, test_X, train_y, test_y = train_test_split(mnist_X, mnist_y,
                                                        test_size=0.2,
                                                        random_state=42)

    return (train_X, test_X, train_y, test_y)


def check_homework():
    train_X, test_X, train_y, test_y = load_mnist()

    # validate for small dataset
    train_X_mini = train_X[:1000]
    train_y_mini = train_y[:1000]
    test_X_mini = test_X[:1000]
    test_y_mini = test_y[:1000]

    pred_y = homework(train_X_mini, train_y_mini, test_X_mini)
    return f1_score(test_y_mini, pred_y, average='macro')

if 'homework' in globals():
    result = check_homework()

    print("No Error Occured!")

Using gpu device 0: GRID K520 (CNMeM is enabled with initial size: 95.0% of memory, cuDNN 4007)


EPOCH:: 10, Validation cost: 0.254, Validation F1: 0.937
EPOCH:: 60, Validation cost: 0.089, Validation F1: 0.981
EPOCH:: 110, Validation cost: 0.084, Validation F1: 0.981
EPOCH:: 160, Validation cost: 0.083, Validation F1: 0.977
EPOCH:: 210, Validation cost: 0.083, Validation F1: 0.975
EPOCH:: 260, Validation cost: 0.084, Validation F1: 0.975


In [None]:
from collections import OrderedDict
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.datasets import fetch_mldata
from sklearn.cross_validation import train_test_split
from theano.tensor.nnet import conv2d
from theano.tensor.signal import pool
from theano.tensor.shared_randomstreams import RandomStreams

import numpy as np
import theano
import theano.tensor as T


mnist = fetch_mldata('MNIST original')
mnist_X, mnist_y = shuffle(mnist.data.astype('float32'),
                           mnist.target.astype('int32'),
                           random_state=42)

mnist_X = mnist_X / 255.0

train_X, test_X, train_y, test_y = train_test_split(mnist_X, mnist_y,
                                                    test_size=0.2,
                                                    random_state=1)
a = homework(train_X, train_y, test_X)
print(sum(a==test_y)/len(a))

In [None]:
from collections import OrderedDict
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.datasets import fetch_mldata
from sklearn.cross_validation import train_test_split
from theano.tensor.nnet import conv2d
from theano.tensor.signal import pool
from theano.tensor.shared_randomstreams import RandomStreams

import numpy as np
import theano
import theano.tensor as T


mnist = fetch_mldata('MNIST original')
mnist_X, mnist_y = shuffle(mnist.data.astype('float32'),
                           mnist.target.astype('int32'),
                           random_state=42)

mnist_X = mnist_X / 255.0

train_X, test_X, train_y, test_y = train_test_split(mnist_X, mnist_y,
                                                    test_size=0.2,
                                                    random_state=22)
a = homework(train_X, train_y, test_X)
print(sum(a==test_y)/len(a))

EPOCH:: 10, Validation cost: 0.036, Validation F1: 0.988
EPOCH:: 60, Validation cost: 0.039, Validation F1: 0.993
EPOCH:: 110, Validation cost: 0.047, Validation F1: 0.992
EPOCH:: 160, Validation cost: 0.047, Validation F1: 0.993
EPOCH:: 210, Validation cost: 0.050, Validation F1: 0.993
EPOCH:: 260, Validation cost: 0.055, Validation F1: 0.992
EPOCH:: 310, Validation cost: 0.046, Validation F1: 0.991
EPOCH:: 360, Validation cost: 0.050, Validation F1: 0.992
EPOCH:: 410, Validation cost: 0.059, Validation F1: 0.993
EPOCH:: 460, Validation cost: 0.058, Validation F1: 0.992
EPOCH:: 510, Validation cost: 0.065, Validation F1: 0.991


In [2]:
from collections import OrderedDict
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.datasets import fetch_mldata
from sklearn.cross_validation import train_test_split
from theano.tensor.nnet import conv2d
from theano.tensor.signal import pool
from theano.tensor.shared_randomstreams import RandomStreams

import numpy as np
import theano
import theano.tensor as T


mnist = fetch_mldata('MNIST original')
mnist_X, mnist_y = shuffle(mnist.data.astype('float32'),
                           mnist.target.astype('int32'),
                           random_state=123)

mnist_X = mnist_X / 255.0

train_X, test_X, train_y, test_y = train_test_split(mnist_X, mnist_y,
                                                    test_size=0.2,
                                                    random_state=22)
import time
start = time.time()
while 

Using gpu device 0: GRID K520 (CNMeM is enabled with initial size: 95.0% of memory, cuDNN 4007)


EPOCH:: 10, Validation cost: 0.029, Validation F1: 0.991
EPOCH:: 20, Validation cost: 0.032, Validation F1: 0.993
EPOCH:: 30, Validation cost: 0.031, Validation F1: 0.993
EPOCH:: 40, Validation cost: 0.033, Validation F1: 0.993
EPOCH:: 50, Validation cost: 0.038, Validation F1: 0.994
EPOCH:: 60, Validation cost: 0.041, Validation F1: 0.994
EPOCH:: 70, Validation cost: 0.041, Validation F1: 0.994
EPOCH:: 80, Validation cost: 0.041, Validation F1: 0.994
EPOCH:: 90, Validation cost: 0.042, Validation F1: 0.994
EPOCH:: 100, Validation cost: 0.044, Validation F1: 0.993


array([2, 8, 5, ..., 0, 5, 8])

In [1]:
    layers1 = [                             
        Conv((30, 1, 3, 3), activation),   # 1x28x28  -> 30x26x26
        Pooling((2, 2)),                   # 30x26x26 -> 30x13x13
        Conv((50, 30, 2, 2), activation),  # 20x13x13 -> 50x12x12
        Conv((40, 50, 5, 5), activation),  # 50x12x12 -> 40x 8x 8
        Pooling((2, 2)),                   # 40x 8x 8 -> 40x 4x 4
        Flatten(2),
        Layer(4*4*40, 400, activation),
        Layer(400, 10, T.nnet.softmax)
    ]
    
    layers2 = [                            
        Conv((20, 1, 9, 9), activation),   # 1x28x28  -> 20x20x20
        Pooling((2, 2)),                   # 20x20x20 -> 20x10x10
        Conv((50, 20, 3, 3), activation),  # 20x10x10 -> 50x 8x 8
        Pooling((2, 2)),                   # 50x 8x 8 -> 50x 4x 4
        Flatten(2),
        Layer(4*4*50, 500, activation),
        Layer(500, 10, T.nnet.softmax)
    ]
    
    if result[0] == result[1] or result[0] == result[2]:
        y = result[0]
    elif result[1] == result[2]:
        y = result[1]
    else:
        y = result[0]

NameError: name 'Conv' is not defined

In [26]:
from collections import OrderedDict
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.datasets import fetch_mldata
from sklearn.cross_validation import train_test_split
from theano.tensor.nnet import conv2d
from theano.tensor.signal import pool
from theano.tensor.shared_randomstreams import RandomStreams

import numpy as np
import theano
import theano.tensor as T


mnist = fetch_mldata('MNIST original')
mnist_X, mnist_y = shuffle(mnist.data.astype('float32'),
                           mnist.target.astype('int32'),
                           random_state=123)

mnist_X = mnist_X / 255.0

train_X, test_X, train_y, test_y = train_test_split(mnist_X, mnist_y,
                                                    test_size=0.2,
                                                    random_state=22)
homework(train_X, train_y, test_X, test_y)

EPOCH:: 10, Validation cost: 0.035, Validation F1: 0.991
EPOCH:: 20, Validation cost: 0.031, Validation F1: 0.993
EPOCH:: 30, Validation cost: 0.037, Validation F1: 0.993
EPOCH:: 40, Validation cost: 0.045, Validation F1: 0.994
EPOCH:: 50, Validation cost: 0.048, Validation F1: 0.993
0.9915


array([2, 8, 5, ..., 0, 5, 8])

In [22]:
from collections import OrderedDict
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.datasets import fetch_mldata
from sklearn.cross_validation import train_test_split
from theano.tensor.nnet import conv2d
from theano.tensor.signal import pool
from theano.tensor.shared_randomstreams import RandomStreams

import numpy as np
import theano
import theano.tensor as T


mnist = fetch_mldata('MNIST original')
mnist_X, mnist_y = shuffle(mnist.data.astype('float32'),
                           mnist.target.astype('int32'),
                           random_state=123)

mnist_X = mnist_X / 255.0

train_X, test_X, train_y, test_y = train_test_split(mnist_X, mnist_y,
                                                    test_size=0.2,
                                                    random_state=22)
homework(train_X, train_y, test_X, test_y)

EPOCH:: 10, Validation cost: 0.028, Validation F1: 0.991
EPOCH:: 20, Validation cost: 0.027, Validation F1: 0.993
EPOCH:: 30, Validation cost: 0.031, Validation F1: 0.993
EPOCH:: 40, Validation cost: 0.035, Validation F1: 0.993
EPOCH:: 50, Validation cost: 0.039, Validation F1: 0.993
0.991285714286


array([2, 8, 5, ..., 0, 5, 8])

In [4]:
import numpy as np
a  = np.array([[1,2,3],[1,2],[1]])
arr_1 = np.array([[1]])
print(a)

[[1, 2, 3] [1, 2] [1]]


In [32]:
print(sum(a==test_y)/len(test_y))

NameError: name 'test' is not defined

In [17]:
from collections import OrderedDict
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.datasets import fetch_mldata
from sklearn.cross_validation import train_test_split
from theano.tensor.nnet import conv2d
from theano.tensor.signal import pool
from theano.tensor.shared_randomstreams import RandomStreams

import numpy as np
import theano
import theano.tensor as T
a = theano.shared(np.array([[1,2,3,4]]), name = 'a')
b = theano.shared(np.array([[1,2,3,4]]), name = 'b')
x = (a+b).copy()
print(x.get_value(), a.get_value())

AttributeError: 'TensorVariable' object has no attribute 'get_value'