In [13]:
import os, sys
sys.path.append(os.path.dirname(os.path.dirname(os.getcwd())))

%load_ext autoreload
%autoreload 1
%aimport atfml

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [14]:
import numpy as np
from atfml.utils import inits

def build_rnn_final_dataset(n_hidden_dim=10, n_input_dim=15, n_batch_size=20,
                            n_steps_per_batch=100, n_batches=30, data_noise=0.0, param_noise=0.05):
    """
    dynamic system:
        x_t - input
        h_t = (h_{t-1} @ w_hh + x_t @ w_hi)
        y_t = h_t
    
    data shape:
        [.. ,(X_i, y_i), ..] - n_batches

            X_i : (n_batch_size, n_steps, n_input_dim)
            y_i : (n_batch_size, n_hidden_dim)
    """
    n_steps = n_batch_size*n_steps_per_batch*n_batches
    h_0 = np.random.randn(n_hidden_dim)
    h = np.zeros((n_steps, n_hidden_dim))
    x = np.random.randn(n_steps, n_input_dim)
    w_hi = np.random.randn(n_input_dim, n_hidden_dim)
    w_hh = (inits.identity_repeat_init(n_hidden_dim, n_hidden_dim) +
            (np.random.rand(n_hidden_dim, n_hidden_dim)-0.5)*param_noise )
    h_prev = h_0
    activation = lambda x: np.maximum(0, 1-x)
    for i in range(n_steps):
        h[i] = activation(h_prev.dot(w_hh) + x[i].dot(w_hi))
        h_prev = h[i]

    x = x + np.random.random(x.shape)*data_noise
    ## x of shape (n_batches*n_batch_size*n_steps_per_batch, n_input_dim)
    ## h of shape (n_batches*n_batch_size*n_steps_per_batch, n_hidden_dim)
    
    data = []
    for batch_n in range(n_batches-1):
        batch_tensor_X_rows = []
        batch_tensor_y_rows = []
        for line_n in range(n_batch_size):
            from_idx = batch_n*n_batch_size*n_steps_per_batch + line_n*n_steps_per_batch
            to_idx =   batch_n*n_batch_size*n_steps_per_batch + (line_n+1)*n_steps_per_batch
            batch_x = x[from_idx:to_idx]
            batch_y = h[to_idx-1]
            batch_tensor_X_rows.append(batch_x)
            batch_tensor_y_rows.append(batch_y)
        
        data.append( {'X': np.array(batch_tensor_X_rows), 'y': np.array(batch_tensor_y_rows)} )
        
    return (h_0, w_hi, w_hh), data

In [None]:
from matplotlib import pyplot as plt
%matplotlib inline

from atfml.core import AutoGradBackend, TheanoBackend
from atfml.utils import inits, behaviours

# bk = AutoGradBackend()
bk = TheanoBackend()

class BasicRNN(bk.ModelLoss):
    def __init__(self, *, n_hidden_dim=10, seq_steps=10, n_input_dim=10):
        self.n_hidden_dim = n_hidden_dim
        self.n_input_dim = n_input_dim
        self.seq_steps = seq_steps
        
        arg_dict = {
            'default_init_method': inits.gaussian_init_with(mu=0, std=1),
            'weight_template': {
                'h_0': {'shape':(self.n_hidden_dim, ) },
                'W_input_to_hidden': {'shape': (n_input_dim, n_hidden_dim), 
                                      'init_method': inits.identity_repeat_init },
                'W_hidden_to_hidden': {'shape':(n_hidden_dim, n_hidden_dim), 
                                       'init_method': inits.identity_repeat_init}
            },
            'data_template': {
                'X': {'shape':('batch_size', seq_steps, n_input_dim), 'dtype': 'float64' },
                'y': {'shape':('batch_size', n_hidden_dim) , 'dtype': 'float64'},
            },
            'optimization_method': {'name': 'adam', 'learning_rate': 0.01, 'clip': 100},
            'behaviours': {
                'loss': behaviours.LossLogBehaviour(),
                'w_ih': behaviours.WieghtLogBehaviour('W_input_to_hidden'),
                'w_hh': behaviours.WieghtLogBehaviour('W_hidden_to_hidden'),
            }
        }
        super().__init__(**arg_dict)
    
    def predict(self, theta, data, const):
        h_prev = np.repeat(theta.h_0[np.newaxis, :], const.batch_size, axis=0)
        activation = lambda x: bk.maximum(0, 1-x)
        to_stack = []
        for i in range(self.seq_steps):
            x_t = data.X[:, i, :]
            input2hidden = bk.dot(x_t, theta.W_input_to_hidden)
            hidden2hidden = bk.dot(h_prev, theta.W_hidden_to_hidden)
            h_prev = activation(input2hidden + hidden2hidden)
            to_stack.append(h_prev)
        # no stack in autograd
        output = bk.concatenate([x[:, bk.newaxis, :] for x in to_stack], axis=1)
        bk.assert_arr_shape({output.shape: (const.batch_size, self.seq_steps, self.n_hidden_dim)})
        return output
    
    def loss(self, theta, data, const):
        pred = self.predict(theta, data, const)
        last_step_pred = pred[:, -1, :]
        loss = bk.sum((last_step_pred - data.y)**2)
        return loss
    
    def step_callback(self, loss_val, theta, data, const, info):
        if info['n_iter'] % 100 == 0:
            print('%5d %10.4g' % (info['n_iter'], loss_val))
            
def test_rnn():
    (h_0, w_ih, w_hh), data = build_rnn_final_dataset(n_hidden_dim=10, n_input_dim=15, 
                                                      n_batch_size=5, n_steps_per_batch=100, 
                                                      n_batches=30, data_noise=0.3, param_noise=0.2)
    
    model = BasicRNN(n_hidden_dim=10, n_input_dim=15, seq_steps=100)
    best_theta = model.fit(data, n_max_steps=2000)
    
    plt.semilogy(range(len(model.behaviours.loss.log)), model.behaviours.loss.log)
    plt.show()
    
    w_ih_dists = [np.sum((w_ih_learned_i-w_ih)**2) for w_ih_learned_i in model.behaviours.w_ih.log]
    plt.semilogy(range(len(w_ih_dists)), w_ih_dists)
    plt.show()
    
    w_hh_dists = [np.sum((w_hh_learned_i-w_hh)**2) for w_hh_learned_i in model.behaviours.w_hh.log]
    plt.semilogy(range(len(w_hh_dists)), w_hh_dists)
    plt.show()
    
test_rnn()

Weight shapes are: {'h_0': (10,), 'W_hidden_to_hidden': (10, 10), 'W_input_to_hidden': (15, 10)}, n_total_params: 260
FAST_COMPILE
Building learning step function and gradient ..