# 第9回講義 宿題

## 課題. Theanoを用いてVatiational auto-encoder (VAE) を実装せよ

### 注意

- homework関数を完成させて提出してください
    - 訓練データのtrain_Xのみが与えられます
    - train_Xをtrain_Xとvalid_Xに分けるなどしてモデルを学習させてください
    - **test関数を戻り値**としてください (下に書いてあります)
- **test_Xに対するLower Boundで評価**します
- 全体の実行時間がiLect上で60分を超えないようにしてください
- homework関数の外には何も書かないでください

次のような内容のコードが**事前**に実行されます

```python
from __future__ import division
from collections import OrderedDict
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.datasets import fetch_mldata
from sklearn.cross_validation import train_test_split
from theano.tensor.shared_randomstreams import RandomStreams

import numpy as np
import theano
import theano.tensor as T

rng = np.random.RandomState(1234)
theano_rng= RandomStreams(rng.randint(1234))

mnist = fetch_mldata('MNIST original')
mnist_X = shuffle(mnist.data.astype('float32'), random_state=1234)

mnist_X = mnist_X / 255.0

train_X, test_X = train_test_split(mnist_X, test_size=0.2, random_state=??) # random_stateはひみつです
```

次のセルのhomework関数を完成させて提出してください
- **上記のコード以外で必要なもの**は全て書いてください

In [3]:
from __future__ import division
from collections import OrderedDict
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.datasets import fetch_mldata
from sklearn.cross_validation import train_test_split
from theano.tensor.shared_randomstreams import RandomStreams

import numpy as np
import theano
import theano.tensor as T

rng = np.random.RandomState(1234)
theano_rng= RandomStreams(rng.randint(1234))

def load_mnist():
    mnist = fetch_mldata('MNIST original')
    mnist_X = shuffle(mnist.data.astype('float32'), random_state=1234)

    mnist_X = mnist_X / 255.0

    train_X, test_X = train_test_split(mnist_X, test_size=0.2, random_state=42)

    return (train_X, test_X)

def check_homework():
    train_X, test_X = load_mnist()
    test = homework(train_X)
    lower_bound = test(test_X)
    return lower_bound

if 'homework' in globals():
    result = check_homework()
    print result

    print "No Error Occured!"

-97.3299865723
No Error Occured!


In [2]:
def homework(train_X):
    
    #train_X, valid_X = train_test_split(train_X, test_size=0.1, random_state=42)
    
    class Layer:
        #- Constructor
        def __init__(self, in_dim, out_dim, function=lambda x: x):
            self.in_dim = in_dim
            self.out_dim = out_dim
            self.function = function

            # Xavier
            self.W = theano.shared(np.sqrt(2/in_dim) * rng.randn(in_dim, out_dim).astype('float32'), name='W')

            self.b = theano.shared(np.zeros(out_dim).astype('float32'), name='b')

            self.params = [self.W, self.b]

        #- Forward Propagation
        def f_prop(self, x):
            self.u = T.dot(x, self.W) + self.b
            self.z = self.function(self.u)
            return self.z
        
        
    class VAE:
        def __init__(self, q, p, random=1234):
            self.q = q
            self.p = p
            self.srng = RandomStreams(seed=random)

        def q_f_prop(self, x):
            # Gaussian MLP
            params = []
            layer_out = x
            for i, layer in enumerate(self.q[:-2]):
                params += layer.params
                layer_out = layer.f_prop(layer_out)

            params += self.q[-2].params
            mean = self.q[-2].f_prop(layer_out)

            params += self.q[-1].params
            var  = self.q[-1].f_prop(layer_out)

            return mean, var, params

        def p_f_prop(self, x):
            # Bernoulli MLP
            params = []
            layer_out = x
            for i, layer in enumerate(self.p):
                params += layer.params
                layer_out = layer.f_prop(layer_out)
            mean = layer_out

            return mean, params

        def lower_bound(self, x):
            # Encode
            mean, var, q_params = self.q_f_prop(x)
            KL = -0.5 * T.mean(T.sum(1 + T.log(var) - mean**2 - var, axis=1))

            epsilon = self.srng.normal(mean.shape)
            z = mean + T.sqrt(var) * epsilon

            # Decode
            _x, p_params = self.p_f_prop(z)
            log_likelihood = T.mean(T.sum(x*T.log(_x) + (1-x)*T.log(1-_x), axis=1))

            params = q_params + p_params

            lower_bound = [-KL, log_likelihood]

            return lower_bound, params
        
        
    def Adam(params, g_params, lr=0.001, b1=0.1, b2=0.001, e=1e-8):
        updates = []
        i = theano.shared(np.float32(0.))
        i_t = i + 1.
        fix1 = 1. - (1. - b1)**i_t
        fix2 = 1. - (1. - b2)**i_t
        lr_t = lr * (T.sqrt(fix2) / fix1)
        for p, g in zip(params, g_params):
            m = theano.shared(p.get_value() * 0.)
            v = theano.shared(p.get_value() * 0.)
            m_t = (b1 * g) + ((1. - b1) * m)
            v_t = (b2 * T.sqr(g)) + ((1. - b2) * v)
            g_t = m_t / (T.sqrt(v_t) + e)
            p_t = p - (lr_t * g_t)
            updates.append((m, m_t))
            updates.append((v, v_t))
            updates.append((p, p_t))
        updates.append((i, i_t))
        return updates
    
    # network defination
    z_dim = 50

    #- Gaussian MLP
    q = [
        Layer(784, 500, T.nnet.elu),
        Layer(500, 200, T.nnet.elu),
        Layer(200, 200, T.nnet.elu),
        Layer(200, z_dim),                 # mean
        Layer(200, z_dim, T.nnet.softplus) # variance
    ]

    #- Bernoulli MLP
    p = [
        Layer(z_dim, 200, T.nnet.elu),
        Layer(200, 200, T.nnet.elu),
        Layer(200, 500, T.nnet.elu),
        Layer(500, 784, T.nnet.sigmoid)
    ]
    
    # train, test function definition
    model = VAE(q, p)

    x = T.fmatrix('x')
    lower_bound, params = model.lower_bound(x)

    g_params = T.grad(-T.sum(lower_bound), params)
    updates = Adam(params, g_params)

    train = theano.function(inputs=[x], outputs=lower_bound, updates=updates, allow_input_downcast=True, name='train')
    #test  = theano.function(inputs=[x], outputs=T.sum(lower_bound), allow_input_downcast=True, name='test')
    batch_size = 100
    n_batches = train_X.shape[0]//batch_size

    for epoch in xrange(100):
        rng.shuffle(train_X)
        lowerbound_all = []
        for i in xrange(n_batches):
            start = i*batch_size
            end = start + batch_size
            lowerbound = train(train_X[start:end])
            lowerbound_all.append(lowerbound)

        lowerbound_all = np.mean(lowerbound_all, axis=0)
        #test_lowerbound = test(valid_X)
        #print 'Epoch:%d, Train Lower Bound:%lf (%lf, %lf), Test Lower Bound:%lf' % (epoch, 
                            #np.sum(lowerbound_all), lowerbound_all[0], lowerbound_all[1], test_lowerbound)


    
    
    #- 以下の行はそのままでsubmitしてください (修正しないでください)
    test  = theano.function(inputs=[x], outputs=T.sum(lower_bound), allow_input_downcast=True, name='test')
    
    return test

In [None]:
from __future__ import division
from collections import OrderedDict
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.datasets import fetch_mldata
from sklearn.cross_validation import train_test_split
from theano.tensor.shared_randomstreams import RandomStreams

import numpy as np
import theano
import theano.tensor as T

rng = np.random.RandomState(1234)
theano_rng= RandomStreams(rng.randint(1234))

def load_mnist():
    mnist = fetch_mldata('MNIST original')
    mnist_X = shuffle(mnist.data.astype('float32'), random_state=1234)

    mnist_X = mnist_X / 255.0

    train_X, test_X = train_test_split(mnist_X, test_size=0.2, random_state=42)

    return (train_X, test_X)

def check_homework():
    train_X, test_X = load_mnist()
    test = homework(train_X)
    lower_bound = test(test_X)
    return lower_bound

if 'homework' in globals():
    result = check_homework()

    print "No Error Occured!"