In [2]:
import numpy as np 
import matplotlib.pyplot as plt 
%matplotlib inline

def relu(x):
    return np.maximum(0., x)


Bad key "ckend" on line 1 in
/Users/sichenglei/.matplotlib/matplotlibrc.
You probably need to get an updated matplotlibrc file from
http://github.com/matplotlib/matplotlib/blob/master/matplotlibrc.template
or from the matplotlib source distribution


In [3]:
#weather forecasting data
import os

fname = "jena_climate_2009_2016.csv"

f = open(fname)
data = f.read()
f.close()

lines = data.split('\n')
header = lines[0].split(',')
lines = lines[1:]

print (header)
print (len(lines))

['"Date Time"', '"p (mbar)"', '"T (degC)"', '"Tpot (K)"', '"Tdew (degC)"', '"rh (%)"', '"VPmax (mbar)"', '"VPact (mbar)"', '"VPdef (mbar)"', '"sh (g/kg)"', '"H2OC (mmol/mol)"', '"rho (g/m**3)"', '"wv (m/s)"', '"max. wv (m/s)"', '"wd (deg)"']
420551


In [4]:
#parsing the data, store in an array
import numpy as np

#data/time dun need to store
float_data = np.zeros((len(lines), len(header)-1))
for i, line in enumerate(lines):
    values = [float(x) for x in line.split(',')[1:]]
    float_data[i, :] = values

In [5]:
float_data.shape

(420551, 14)

In [6]:
'''
problem formulation:
given data going as far back as lookback timesteps (a timestep is 10 minutes)
and sampled every steps timesteps,
predict the temperature in delay timesteps
'''
#normalize the data
#use the first 200000 data only
mean = float_data[:200000].mean(axis=0)
float_data -= mean
std = float_data[:200000].std(axis=0)
float_data /= std


In [7]:
#samples close together are very simialr (redundant)
#so we only take one data from each hour
def generator(data, lookback, delay, min_index, max_index,
             shuffle=False, batch_size=128, step=6):
    if max_index is None:
        max_index = len(data) - 1 - delay 
    i = min_index + lookback
    while 1:
        if shuffle:
            rows = np.random.randint(
                min_index + lookback, max_index, size=batch_size)
        #if not shuffle, then draw data in chronological order
        else:
            if i + batch_size >= max_index:
                #not enough for a new batch, restart
                i = min_index + lookback
            rows = np.arange(i, min(i+batch_size, max_index))
            i += len(rows)
        
        samples = np.zeros((len(rows), #==batch_size
                            lookback//step,
                            data.shape[-1]))
        targets = np.zeros((len(rows),))
        for j, row in enumerate(rows):
            indices = range(row-lookback, row, step)
            samples[j] = data[indices]
            targets[j] = data[row+delay][1]
        samples = np.reshape(samples, (data.shape[-1], lookback//step, 
                                 len(rows)))
        yield samples, targets
    
#every time you call the generator, will return the next group of data

In [8]:
#preparing the training, validation, and test generators
#seqlen = lookback//step
lookback = 36
step = 6
delay = 144  #from the past 10 days to predict the next day
batch_size = 128

#only training data need to randomly select batch
train_gen = generator(float_data, 
                      lookback=lookback,
                      delay=delay,
                      min_index=0,
                      max_index=200000,
                      shuffle=True,
                      step=step,
                      batch_size=batch_size)

val_gen = generator(float_data,
                    lookback=lookback,
                    delay=delay,
                    min_index=200001,
                    max_index=300000,
                    step=step,
                    batch_size=batch_size)

test_gen = generator(float_data,
                     lookback=lookback,
                     delay=delay,
                     min_index=300001,
                     max_index=None,
                     step=step,
                     batch_size=batch_size)

train_steps = (200001 - lookback) // batch_size
val_steps = (300000 - 200001 - lookback) // batch_size
test_steps = (len(float_data) - 300001 - lookback) // batch_size


In [126]:
def clip(orig, norm):
    ans = np.zeros_like(orig)
    for i in range(orig.shape[1]):
        if np.nansum(np.square(orig[:, i])) >= norm:
            ans[:, i] = orig[:, i]/np.nansum(np.absolute(orig[:, i]))*norm
        else:
            ans[:, i] = orig[:, i]
    return ans

In [127]:
clip(5*np.ones((2,3)), 1.0)

array([[0.5, 0.5, 0.5],
       [0.5, 0.5, 0.5]])

In [139]:
# NOTE: fit and feed forward are two seperate things
# fit will use the trained parameters
# pseudo inverse: matrix dimension will transpose
# data need to be in columns
class RNN(object):
    def __init__(self, INPUT_DIM, HIDDEN_DIM, OUTPUT_DIM, N_BATCH, SEQ_LEN, NORM):
        # N_BATCH: number of examples in one batch
        # initialize weight matrix for input, hidden, output matrix
        # these weights are shared
        self.norm = NORM
        self.hid = HIDDEN_DIM
        self.nbatch = N_BATCH
#         np.random.seed(0)
        self.wi = np.random.normal(-0.1, 0.1, [HIDDEN_DIM, INPUT_DIM])
#         np.random.seed(1)
        self.wh = np.random.normal(-0.1, 0.1, [HIDDEN_DIM, HIDDEN_DIM])
#         np.random.seed(2)
        self.wo = np.random.normal(-0.1, 0.1, [OUTPUT_DIM, HIDDEN_DIM])

        self.h = list()
        self.u = list()
        self.v = list()

        # intialize variables in RNN
        # NOTE: initial h is always initialized as zero matrix and is not trained!
        for t in range(SEQ_LEN):
            # could try initialize h to all zeros 
#             np.random.seed(t+10)
            self.h.append(np.random.normal(-0.1, 0.1, [HIDDEN_DIM, N_BATCH]))
#             np.random.seed(t+200)
            self.u.append(np.random.normal(-0.1, 0.1, [HIDDEN_DIM, N_BATCH]))
#             np.random.seed(t+400)
            self.v.append(np.random.normal(-0.1, 0.1, [HIDDEN_DIM, N_BATCH]))
        
#         np.random.seed(5)
        self.yT =  np.random.normal(-0.1, 0.1, [OUTPUT_DIM, N_BATCH])

        self.wi_list = []
        self.wh_list = []
        self.wo_list = []

        # same shape as the the final otuput
        self.lambda_lagrange = np.ones((OUTPUT_DIM, N_BATCH))

    def wi_update(self, ut, xt):
        xt = clip(xt, self.norm)
        pinv = np.linalg.pinv(xt)
        wi = np.dot(ut, pinv)
        return clip(wi, self.norm)

    def wh_update(self, vt, hprev):
        hprev = clip(hprev, self.norm)
        pinv = np.linalg.pinv(hprev)
        wh = np.dot(vt, pinv)
        return clip(wh, self.norm) 

    def wo_update(self, yT, hT):
        hT = clip(hT, self.norm)
        pinv = np.linalg.pinv(hT)
        wo = np.dot(yT, pinv)
        return clip(wo, self.norm) 

    def ut_update(self, ut, vt, wi, xt, ht, alpha, gamma):
        u_v = ut + vt 
        new_ut = np.zeros_like(ut)
        sol1 = np.dot(wi, xt)  # u_v < 0
        sol2 = (alpha*ht + gamma*np.dot(wi, xt) - alpha*vt)/(gamma+alpha)

        new_ut[u_v>=0.] = sol2[u_v>=0.]
        new_ut[u_v<0.] = sol1[u_v<0.]

        return clip(new_ut, self.norm)

    def vt_update(self, ut, vt, wh, hprev, ht, omega, alpha):
        u_v = ut + vt 
        new_vt = np.zeros_like(vt)
        sol1 = np.dot(wh, hprev) # u_v < 0 
        sol2 = (omega*np.dot(wh, hprev) - alpha*ut + alpha*ht)/(omega+alpha)    
        new_vt[u_v>=0.] = sol2[u_v>=0.]
        new_vt[u_v<0] = sol1[u_v<0]

        return clip(new_vt, self.norm)

    def ht_update(self, omega, vnext, wh, alpha, ut, vt):
        parta = omega*np.dot(wh, vnext) + alpha*relu(ut + vt)
        partb = omega*np.dot(wh.T, wh) + alpha*np.eye(wh.shape[1])
        partb = clip(partb, self.norm)
        np.linalg.pinv(partb)
        return clip(np.dot(np.linalg.pinv(partb), parta), self.norm)

    # update last output
    def hT_update(self, yT, wo):
        wo = clip(wo, self.norm)
        hT = np.dot(np.linalg.pinv(wo), yT)
        return clip(hT, self.norm) 

    # necessary becoz yT is used in wo update
    # target is in one-hot format
    # target: (OUTPUT_DIM, N_BATCH)
    def yT_update(self, target, beta, wo, hT, lambda_lagrange):
        yT = (target + beta*np.dot(wo, hT) - lambda_lagrange/2)/(1+beta)
        return clip(yT, self.norm) 

    def lambda_update(self, beta, yT, wo, hT):
        lambda_up = beta*(yT - np.dot(wo, hT))
        return clip(self.lambda_lagrange + lambda_up, self.norm) 

    # input shape: (N_BATCH, seq_len, INPUT_DIM)
    # many-to-one
    def feed_forward(self, inputs):
        seq_len = inputs.shape[1]

        hidden = np.zeros((self.hid, self.nbatch))

        for t in range(seq_len):
            X = inputs[:, t, :]  #(INPUT_DIM, N_BATCH)
            hidden = relu(np.dot(self.wi, X) + np.dot(self.wh, hidden))

        output = np.dot(self.wo, hidden)
        return output 
        # shape: (OUTPUT_DIM, N_BATCH)
    
    def fit(self, inputs, labels, alpha, beta, gamma, omega, val_gen):
        # inputs: (INPUT_DIM, SEQ_LEN, N_BATCH)
        seq_len = inputs.shape[1]
        init_hidden = np.zeros((self.hid, self.nbatch))
        
        for t in range(seq_len):
            xt = inputs[:, t, :]
            self.wi = self.wi_update(self.u[t], xt)
            self.u[t] = self.ut_update(self.u[t], self.v[t], self.wi, xt, self.h[t], alpha, gamma)
            if t>0:
                self.wh = self.wh_update(self.v[t], self.h[t-1])
                self.v[t] = self.vt_update(self.u[t], self.v[t], self.wh, self.h[t-1], self.h[t], omega, alpha)
            else:
                # step 0
                self.wh = self.wh_update(self.v[t], init_hidden)
                self.v[t] = self.vt_update(self.u[t], self.v[t], self.wh, init_hidden, self.h[t], omega, alpha)
            if t < seq_len-1:
                self.h[t] = self.ht_update(omega, self.v[t+1], self.wh, alpha, self.u[t], self.v[t])
            else:
                self.h[t] = self.hT_update(self.yT, self.wo)
        
        self.wo = self.wo_update(self.yT, self.h[-1])
        self.yT = self.yT_update(labels, beta, self.wo, self.h[-1], self.lambda_lagrange)
        self.lambda_lagrange = self.lambda_update(beta, self.yT, self.wo, self.h[-1])

#         ## add accuracy to evaluate function if needed
#         for (data, labels) in val_gen:
#             loss = self.evaluate(data, labels)
#             break
#         return loss 

    def warming(self, inputs, labels, alpha, beta, gamma, omega, epochs):
        # inputs: (INPUT_DIM, SEQ_LEN, N_BATCH)
        seq_len = inputs.shape[1]
        init_hidden = np.zeros((self.hid, self.nbatch))
        for ep in range(epochs):
#             print ("------ Warming: {:d} ------".format(ep))
            for t in range(seq_len):
                xt = inputs[:, t, :]
                self.wi = self.wi_update(self.u[t], xt)
#                 print ("wi: ", self.wi[0][0])
                self.u[t] = self.ut_update(self.u[t], self.v[t], self.wi, xt, self.h[t], alpha, gamma)
#                 print ("ut: ", self.u[t][0][0])
                if t>0:
                    self.wh = self.wh_update(self.v[t], self.h[t-1])
                    self.v[t] = self.vt_update(self.u[t], self.v[t], self.wh, self.h[t-1], self.h[t], omega, alpha)
                else:
                    # step 0
                    self.wh = self.wh_update(self.v[t], init_hidden)
                    self.v[t] = self.vt_update(self.u[t], self.v[t], self.wh, init_hidden, self.h[t], omega, alpha)
#                 print ("wh: ", self.wh[0][0])
#                 print ("vt: ", self.v[t][0][0])
                if t < seq_len-1:
                    self.h[t] = self.ht_update(omega, self.v[t+1], self.wh, alpha, self.u[t], self.v[t])
                else:
                    self.h[t] = self.hT_update(self.yT, self.wo)
#                 print ("ht: ", self.h[t][0][0])
                
#                 self.wi_list.append(self.wi[2][2])
#                 self.wh_list.append(self.wh[2][2])
#                 self.wo_list.append(self.wo[0][1])
                
#             plt.plot(range(len(self.wi_list)), self.wi_list)
#             plt.plot(range(len(self.wi_list)), self.wh_list)
#             plt.plot(range(len(self.wi_list)), self.wo_list)
#             plt.legend(['wi', 'wh', 'wo'], loc='upper left')

#             plt.show()
            
            self.wo = self.wo_update(self.yT, self.h[-1])
            self.yT = self.yT_update(labels, beta, self.wo, self.h[-1], self.lambda_lagrange)

        


    def evaluate(self, inputs, labels):
        # inputs: (input_dim, seq_len, N_BATCH)
        # labels: (output_dim, N_BATCH)
        preds = self.feed_forward(inputs)
        # (OUTPUT_DIM, N_BATCH)
        labels = np.asarray(labels)

        loss = np.mean(np.square(np.subtract(preds, labels)))
        return loss 


In [29]:
n_inputs = 14
n_outputs = 1
n_hiddens = 32
n_batch = 128
seqlen = lookback // step
train_epochs = 5
warm_epochs = 5

alpha = 50
beta = 0.1 
gamma = 0.1 
omega = 0.1

In [15]:
try_list = [0.1, 1.0, 10.0, 50.0, 100.0]
for alpha in try_list:
    for beta in try_list:
        for gamma in try_list:
            for omega in try_list:
                try:
                    model = RNN(n_inputs, n_hiddens, n_outputs, n_batch, seqlen)
                    steps = 0
                    for (data, labels) in train_gen:
                        if steps >= train_steps:
                            break
                        model.warming(data, labels, alpha, beta, gamma, omega, warm_epochs)
                        steps += 1
                    print ("------ success ------",alpha, beta, gamma, omega)
                except:
                    print ("failed: ", alpha, beta, gamma, omega)

failed:  0.1 0.1 0.1 0.1
failed:  0.1 0.1 0.1 1.0


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  large = s > cutoff


failed:  0.1 0.1 0.1 10.0
failed:  0.1 0.1 0.1 50.0
failed:  0.1 0.1 0.1 100.0
failed:  0.1 0.1 1.0 0.1
failed:  0.1 0.1 1.0 1.0
failed:  0.1 0.1 1.0 10.0
failed:  0.1 0.1 1.0 50.0
failed:  0.1 0.1 1.0 100.0
failed:  0.1 0.1 10.0 0.1
failed:  0.1 0.1 10.0 1.0
failed:  0.1 0.1 10.0 10.0
failed:  0.1 0.1 10.0 50.0
failed:  0.1 0.1 10.0 100.0
failed:  0.1 0.1 50.0 0.1
failed:  0.1 0.1 50.0 1.0
failed:  0.1 0.1 50.0 10.0
failed:  0.1 0.1 50.0 50.0
failed:  0.1 0.1 50.0 100.0
failed:  0.1 0.1 100.0 0.1
failed:  0.1 0.1 100.0 1.0
failed:  0.1 0.1 100.0 10.0
failed:  0.1 0.1 100.0 50.0
failed:  0.1 0.1 100.0 100.0
failed:  0.1 1.0 0.1 0.1
failed:  0.1 1.0 0.1 1.0
failed:  0.1 1.0 0.1 10.0
failed:  0.1 1.0 0.1 50.0
failed:  0.1 1.0 0.1 100.0
failed:  0.1 1.0 1.0 0.1
failed:  0.1 1.0 1.0 1.0
failed:  0.1 1.0 1.0 10.0
failed:  0.1 1.0 1.0 50.0
failed:  0.1 1.0 1.0 100.0
failed:  0.1 1.0 10.0 0.1
failed:  0.1 1.0 10.0 1.0
failed:  0.1 1.0 10.0 10.0
failed:  0.1 1.0 10.0 50.0
failed:  0.1 1.0 10.0

failed:  10.0 10.0 0.1 1.0
failed:  10.0 10.0 0.1 10.0
failed:  10.0 10.0 0.1 50.0
failed:  10.0 10.0 0.1 100.0
failed:  10.0 10.0 1.0 0.1
failed:  10.0 10.0 1.0 1.0
failed:  10.0 10.0 1.0 10.0
failed:  10.0 10.0 1.0 50.0
failed:  10.0 10.0 1.0 100.0
failed:  10.0 10.0 10.0 0.1
failed:  10.0 10.0 10.0 1.0
failed:  10.0 10.0 10.0 10.0
failed:  10.0 10.0 10.0 50.0
failed:  10.0 10.0 10.0 100.0
failed:  10.0 10.0 50.0 0.1
failed:  10.0 10.0 50.0 1.0
failed:  10.0 10.0 50.0 10.0
failed:  10.0 10.0 50.0 50.0
failed:  10.0 10.0 50.0 100.0
failed:  10.0 10.0 100.0 0.1
failed:  10.0 10.0 100.0 1.0
failed:  10.0 10.0 100.0 10.0
failed:  10.0 10.0 100.0 50.0
failed:  10.0 10.0 100.0 100.0
failed:  10.0 50.0 0.1 0.1
failed:  10.0 50.0 0.1 1.0
failed:  10.0 50.0 0.1 10.0
failed:  10.0 50.0 0.1 50.0
failed:  10.0 50.0 0.1 100.0
failed:  10.0 50.0 1.0 0.1
failed:  10.0 50.0 1.0 1.0
failed:  10.0 50.0 1.0 10.0
failed:  10.0 50.0 1.0 50.0
failed:  10.0 50.0 1.0 100.0
failed:  10.0 50.0 10.0 0.1
failed

failed:  100.0 50.0 10.0 10.0
failed:  100.0 50.0 10.0 50.0
failed:  100.0 50.0 10.0 100.0
failed:  100.0 50.0 50.0 0.1
failed:  100.0 50.0 50.0 1.0
failed:  100.0 50.0 50.0 10.0
failed:  100.0 50.0 50.0 50.0
failed:  100.0 50.0 50.0 100.0
failed:  100.0 50.0 100.0 0.1
failed:  100.0 50.0 100.0 1.0
failed:  100.0 50.0 100.0 10.0
failed:  100.0 50.0 100.0 50.0
failed:  100.0 50.0 100.0 100.0
failed:  100.0 100.0 0.1 0.1
failed:  100.0 100.0 0.1 1.0
failed:  100.0 100.0 0.1 10.0
failed:  100.0 100.0 0.1 50.0
failed:  100.0 100.0 0.1 100.0
failed:  100.0 100.0 1.0 0.1
failed:  100.0 100.0 1.0 1.0
failed:  100.0 100.0 1.0 10.0
failed:  100.0 100.0 1.0 50.0
failed:  100.0 100.0 1.0 100.0
failed:  100.0 100.0 10.0 0.1
failed:  100.0 100.0 10.0 1.0
failed:  100.0 100.0 10.0 10.0
failed:  100.0 100.0 10.0 50.0
failed:  100.0 100.0 10.0 100.0
failed:  100.0 100.0 50.0 0.1
failed:  100.0 100.0 50.0 1.0
failed:  100.0 100.0 50.0 10.0
failed:  100.0 100.0 50.0 50.0
failed:  100.0 100.0 50.0 100.0


In [58]:
test_loss = []
for (data, labels) in test_gen:
    test_loss.append(model.evaluate(data, labels))
print (sum(test_loss)/float(len(test_loss)))

KeyboardInterrupt: 

In [None]:
alpha = 1.0
beta = 0.1 
gamma = 0.1 
omega = 0.1
norm = 1.0

warm_epochs = 5
train_epochs = 5

model = RNN(n_inputs, n_hiddens, n_outputs, n_batch, seqlen, norm)
steps = 0
for (data, labels) in train_gen:
    if steps >= train_steps:
        break
    model.warming(data, labels, alpha, beta, gamma, omega, warm_epochs)
    steps += 1

for e in range(train_epochs):
    steps = 0
    for (data, labels) in train_gen:
        if steps >= train_steps:
            break
        loss = model.fit(data, labels, alpha, beta, gamma, omega, val_gen)
        print ("epoch: %d, val loss: %.3f"%(e, loss))
        steps += 1



In [137]:
for seqlen in [6, 12, 24, 48, 96, 192]:
    #preparing the training, validation, and test generators
    #seqlen = lookback//step
    step = 6
    lookback = seqlen * step
    delay = 72  #from the past 10 days to predict the next day
    batch_size = 128

    #only training data need to randomly select batch
    train_gen = generator(float_data, 
                          lookback=lookback,
                          delay=delay,
                          min_index=0,
                          max_index=200000,
                          shuffle=True,
                          step=step,
                          batch_size=batch_size)

    val_gen = generator(float_data,
                        lookback=lookback,
                        delay=delay,
                        min_index=200001,
                        max_index=300000,
                        step=step,
                        batch_size=batch_size)

    test_gen = generator(float_data,
                         lookback=lookback,
                         delay=delay,
                         min_index=300001,
                         max_index=None,
                         step=step,
                         batch_size=batch_size)

    train_steps = (200001 - lookback) // batch_size
    val_steps = (300000 - 200001 - lookback) // batch_size
    test_steps = (len(float_data) - 300001 - lookback) // batch_size
    
    alpha = 10.0
    beta = 0.1 
    gamma = 1.0 
    omega = 0.1
    norm = 5.0

    warm_epochs = 3
    train_epochs = 10

    model = RNN(n_inputs, n_hiddens, n_outputs, n_batch, seqlen, norm)
    steps = 0
    try:
        for (data, labels) in train_gen:
            if steps >= train_steps:
                break
            model.warming(data, labels, alpha, beta, gamma, omega, warm_epochs)
            steps += 1

        for e in range(train_epochs):
            steps = 0
            for (data, labels) in train_gen:
                if steps >= train_steps:
                    break
                loss = model.fit(data, labels, alpha, beta, gamma, omega, val_gen)
#                 print ("epoch: %d, val loss: %.3f"%(e, loss))
                steps += 1
    except:
        pass
    
    steps = 0
    test_loss = []
    for (data, labels) in test_gen:
        if steps >= test_steps:
            break
        loss = model.evaluate(data, labels)
        if loss < 1.0:
            test_loss.append(loss)
        steps += 1
    print ("--------- Test Result ---------")
    if len(test_loss) == 0:
        test_loss += [0]
    print ("seqlen: {}, loss: {:.4f}".format(seqlen, sum(test_loss)/float(len(test_loss))))



--------- Test Result ---------
seqlen: 12, loss: 0.9542
--------- Test Result ---------
seqlen: 24, loss: 0.0000
--------- Test Result ---------
seqlen: 48, loss: 0.7722
--------- Test Result ---------
seqlen: 96, loss: 0.0000


KeyboardInterrupt: 

In [140]:
for seqlen in [24, 96, 192]:
    #preparing the training, validation, and test generators
    #seqlen = lookback//step
    step = 6
    lookback = seqlen * step
    delay = 72  #from the past 10 days to predict the next day
    batch_size = 128

    #only training data need to randomly select batch
    train_gen = generator(float_data, 
                          lookback=lookback,
                          delay=delay,
                          min_index=0,
                          max_index=200000,
                          shuffle=True,
                          step=step,
                          batch_size=batch_size)

    val_gen = generator(float_data,
                        lookback=lookback,
                        delay=delay,
                        min_index=200001,
                        max_index=300000,
                        step=step,
                        batch_size=batch_size)

    test_gen = generator(float_data,
                         lookback=lookback,
                         delay=delay,
                         min_index=300001,
                         max_index=None,
                         step=step,
                         batch_size=batch_size)

    train_steps = (200001 - lookback) // batch_size
    val_steps = (300000 - 200001 - lookback) // batch_size
    test_steps = (len(float_data) - 300001 - lookback) // batch_size
    
    alpha = 10.0
    beta = 0.1 
    gamma = 1.0 
    omega = 0.1
    norm = 5.0

    warm_epochs = 2
    train_epochs = 5

    model = RNN(n_inputs, n_hiddens, n_outputs, n_batch, seqlen, norm)
    steps = 0
    try:
        for (data, labels) in train_gen:
            if steps >= train_steps:
                break
            model.warming(data, labels, alpha, beta, gamma, omega, warm_epochs)
            steps += 1

        for e in range(train_epochs):
            print ("Train epoch: ", e)
            steps = 0
            for (data, labels) in train_gen:
                if steps >= train_steps:
                    break
                model.fit(data, labels, alpha, beta, gamma, omega, val_gen)
#                 print ("epoch: %d, val loss: %.3f"%(e, loss))
                steps += 1
    except:
        pass
    
    steps = 0
    test_loss = []
    for (data, labels) in test_gen:
        if steps >= test_steps:
            break
        loss = model.evaluate(data, labels)
        if loss < 1.0:
            test_loss.append(loss)
        steps += 1
    print ("--------- Test Result ---------")
    if len(test_loss) == 0:
        test_loss += [0]
    print ("seqlen: {}, loss: {:.4f}".format(seqlen, sum(test_loss)/float(len(test_loss))))



Train epoch:  0
Train epoch:  1
Train epoch:  2
Train epoch:  3
Train epoch:  4
--------- Test Result ---------
seqlen: 24, loss: 0.6450
Train epoch:  0
Train epoch:  1
Train epoch:  2
--------- Test Result ---------
seqlen: 96, loss: 0.0000
--------- Test Result ---------
seqlen: 192, loss: 0.0000


In [None]:
# seqlen: 6, loss: 0.7530