In [1]:
from keras.preprocessing import text 
texts = ["I have a pen.", "I have an apple", "You have pen and apple."]
tokenizer = text.Tokenizer()
tokenizer.fit_on_texts(texts)
list_tokenized = tokenizer.texts_to_sequences(texts)
print(list_tokenized)

Using TensorFlow backend.


[[2, 1, 5, 3], [2, 1, 6, 4], [7, 1, 3, 8, 4]]


# 問題1 SimpleRNNのフォワードプロパゲーション実装

In [99]:
import numpy as np
class ScratchSimpleRNNClassifier:
    
    def __init__(self, batch_size, n_features, n_nodes):
        self.batch_size = batch_size
        self.n_features = n_features
        self.n_nodes = n_nodes
        self.initializer_Wx = Initializer(self.n_features, self.n_nodes)
        self.initializer_Wh = Initializer(self.n_nodes, self.n_nodes)
        self.optimizer = SGD(lr=0.001)
        
#         self.Wx = self.initializer_Wx.W()
#         self.Wb = self.initializer_Wx.B()
#         self.Wh = self.initializer_Wh.W()
        self.Wx = np.array([[1, 3, 5, 7], [3, 5, 7, 8]])/100 # (n_features, n_nodes)
        self.Wh = np.array([[1, 3, 5, 7], [2, 4, 6, 8],
                        [3, 5, 7, 8], [4, 6, 8, 10]])/100 # (n_nodes, n_nodes)
        self.Wb = np.array([1., 1., 1., 1.])
        self.t = 0
        self.h_list = []
        self.at_list = []
        self.X = []
        self.dX=[]
        self.params = {}
        self.grads = {}
        self.params['Wx'] = self.Wx
        self.grads['Wx'] = np.zeros_like(self.Wx)
        self.params['Wb'] = self.Wb
        self.grads['Wb'] = np.zeros_like(self.Wb)
        self.params['Wh'] = self.Wh
        self.grads['Wh'] = np.zeros_like(self.Wh)
        
        
        
    def forward(self, xt, h):
        self.t += 1
        
        Wx = self.params['Wx']
        Wh = self.params['Wh']
        Wb = self.params['Wb']
        
        
        if len(self.h_list) == 0:
            self.h_list.append(h)
        #(N, D)dot(D,H) + (N,H)dot(H,H) + (H,) = (N,H)
        at = np.dot(xt, Wx) + np.dot(h, Wh) + Wb
        new_h = np.tanh(at)
        
        self.X.append(xt)
        self.at_list.append(at)
        self.h_list.append(new_h)
        return new_h
    
    def backward(self, dout, dh):
        
        xt = self.X[self.t-1]
        ht = self.h_list[self.t-1]
        at = self.at_list[self.t-1]
        
        dout = dout + dh
        dA = dout * (1. - np.tanh(at)**2)
        dWb = np.sum(dA, axis=0)
        #(D,H) = (D,N)dot(N,H)
        dWx = np.dot(xt.T, dA)
        #(H,H)= (H,N)dot(N,H)
        dWh = np.dot(ht.T, dA)
        #(N,D) = (N,H)dot(H,D)
        dxt = np.dot(dA, self.Wx.T)
        #(N,H)=(N,H)dot(H,H)
        dht = np.dot(dA, self.Wh.T)
        
        self.grads['Wx'] = dWx
        self.grads['Wh'] = dWh
        self.grads['Wb'] = dWb
        
        self.optimizer.update(self)
        
        self.dX.append(dxt)
        
        return dht
        
        
        

In [100]:
class Initializer:
    def __init__(self, n_nodes1, n_nodes2):
        self.n_nodes1 = n_nodes1
        self.n_nodes2 = n_nodes2
        
    def W(self):
        W = np.random.randn(self.n_nodes1, self.n_nodes2) * 0.01
        return W
    
    def B(self):
        B = np.random.randn(self.n_nodes2, )
        return B
    

In [101]:
class SGD:
    def __init__(self, lr):
        self.lr = lr
        
    def update(self, layer):
        self.params = layer.params
        self.grads = layer.grads
        
        for key in self.params.keys():
            self.params[key] -= self.lr * self.grads[key]
            
            

# 問題2 小さな配列でのフォワードプロパゲーションの実験

In [102]:
x = np.array([[[1, 2], [2, 3], [3, 4]]])/100
print(x.shape)

rnn = ScratchSimpleRNNClassifier(batch_size=x.shape[0],
                                 n_features=x.shape[2], n_nodes=4)

h = np.zeros((x.shape[0], 4)) # (batch_size, n_nodes)
for t in range(x.shape[1]):
    xt = x[:, t, :]
    h = rnn.forward(xt, h)
    

(1, 3, 2)


In [103]:
print(h)


[[0.79494228 0.81839002 0.83939649 0.85584174]]


# 問題3 バックプロパゲーションの実装

In [104]:
rnn.h_list

[array([[0., 0., 0., 0.]]),
 array([[0.76188798, 0.76213958, 0.76239095, 0.76255841]]),
 array([[0.792209  , 0.8141834 , 0.83404912, 0.84977719]]),
 array([[0.79494228, 0.81839002, 0.83939649, 0.85584174]])]

In [105]:
rnn.X

[array([[0.01, 0.02]]), array([[0.02, 0.03]]), array([[0.03, 0.04]])]

In [106]:
rnn.at_list

[array([[1.0007, 1.0013, 1.0019, 1.0023]]),
 array([[1.07733574, 1.13931527, 1.20129481, 1.25535044]]),
 array([[1.08471832, 1.15192269, 1.21912707, 1.27759095]])]

In [107]:
rnn.t

3

In [108]:
dX = []
dh = np.ones((1, 3, 4))
dh_prev = 0
for idx, t in enumerate(np.flip(np.arange(rnn.t))):
    dh_prev = rnn.backward(dh[:, t, :], dh_prev)
    


In [109]:
print(dh_prev)

[[0.04998308 0.06343091 0.07396354 0.09033399]]


In [110]:
print(rnn.dX)

[array([[0.04708592, 0.06963563]]), array([[0.05045678, 0.07438174]]), array([[0.05065903, 0.07466772]])]


In [None]:
class TimeRNN:
    def __init__(self, batch_size, n_features, n_nodes):
        

In [111]:
#time=tごとにRNNを生成する

class RNN:
    def __init__(self, Wx, Wh, b):
        self.params = [Wx, Wh, b]
        self.grads = [np.zeros_like(Wx), np.zeros_like(Wh),
                      np.zeros_like(b)]
        self.cache = None
        
    def forward(self, x, h_prev):
        Wx, Wh, b = self.params
        t = np.dot(x, Wx) + np.dot(h_prev, Wh) + b
        h_next = np.tanh(t)
        self.cache = (x, h_prev, h_next)
        return h_next
    
    #dh_nextとdh[:,t,:]をすでに合算した状態でbackwardに流す
    def backward(self, dh_next):
        x, h_prev, h_next = self.cache
        dt = h_next * (1 - np.tanh(h_next)**2)
        db = np.sum(dt, axis=0)
        dWx = np.dot(x.T, dt)
        dWh = np.dot(h_prev.T, dt)
        dx = np.dot(dt, dWx.T)
        dh_prev = np.dot(dt, Wh.T)
        
        self.grads[0][...] = dWx
        self.grads[1][...] = dWh
        self.grads[2][...] = db
        
        return dx, dh_prev
        

In [112]:
#テスト
x = np.array([[[1, 2], [2, 3], [3, 4]]])/100 # (batch_size, n_sequences, n_features)
w_x = np.array([[1, 3, 5, 7], [3, 5, 7, 8]])/100 # (n_features, n_nodes)
w_h = np.array([[1, 3, 5, 7], [2, 4, 6, 8], [3, 5, 7, 8], [4, 6, 8, 10]])/100 # (n_nodes, n_nodes)
batch_size = x.shape[0] # 1
n_sequences = x.shape[1] # 3
n_features = x.shape[2] # 2
n_nodes = w_x.shape[1] # 4
h = np.zeros((batch_size, n_nodes)) # (batch_size, n_nodes)
b = np.array([1., 1., 1., 1.]) # (n_nodes,)

rnn_model = RNN(w_x, w_h, b)


h = rnn_model.forward(x[:, 0, :], h)
print(h)

[[0.76188798 0.76213958 0.76239095 0.76255841]]


#正解
array([[0.76188798, 0.76213958, 0.76239095, 0.76255841]])

In [116]:
class TimeRNN:
    def __init__(self, Wx, Wh, b, stateful=False):
        self.params = [Wx, Wh, b]
        self.grads = [np.zeros_like(Wx), np.zeros_like(Wh),
                      np.zeros_like(b)]
        self.layers = None
        self.h, self.dh = None
        
    def set_state(self, h):
        self.h = h
        
    def reset_state(self):
        self.h = None
        
        
    def forward(self, xs):
        Wx, Wh, b = self.params
        N, T, D = xs.shape
        D, H = Wh.shape
        
        self.layers = []
        hs = np.empty((N, T, H), dtype='f')
        if not self.stateful or self.h is None:
            self.h = np.zeros((N,H), dtype='f')
            
            
        for t in range(T):
            layer = RNN(*self.params)
            self.h = layer.forward(xs[:, t, :], self.h)
            hs[:,t,:] = self.h
            self.layers.append(layer)
            
        return hs#次の層に渡すh
    
    
    def backward(self, dhs):
        Wx, Wh, b = self.params
        N, T, D = dhs
        
        dxs = np.empty((N, T, D), dtype='f')
        dh = 0
        grads = []
    
    
    
            
        