## **CHARACTER MODELLING USING LSTM/RNN**

+ The Model takes the text file as input and trains the RNN that learns to predict the next character in a sequence

+ This RNN can be used to generate text character by  character that will look like the original training data.

### **Text Generation Using LSTM/RNN**

#### Import Required Libraries

In [1]:
import tensorflow as tf
import time
import codecs
import os
import collections
from six.moves import cPickle

import numpy as np

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


#### Data Loader:

+ The following cell is the class that helps to read datafrom input file

In [2]:
class TextLoader():
    def __init__(self, data_dir, batch_size, seq_length, encoding='utf-8'):
        self.data_dir = data_dir
        self.batch_size = batch_size
        self.seq_length = seq_length
        self.encoding = encoding

        input_file = os.path.join(data_dir, "input.txt")
        vocab_file = os.path.join(data_dir, "vocab.pkl")
        tensor_file = os.path.join(data_dir, "data.npy")

        if not (os.path.exists(vocab_file) and os.path.exists(tensor_file)):
            print("reading text file")
            self.preprocess(input_file, vocab_file, tensor_file)
        else:
            print("loading preprocessed files")
            self.load_preprocessed(vocab_file, tensor_file)
        self.create_batches()
        self.reset_batch_pointer()

    def preprocess(self, input_file, vocab_file, tensor_file):
        with codecs.open(input_file, "r", encoding=self.encoding) as f:
            data = f.read()
        counter = collections.Counter(data)
        count_pairs = sorted(counter.items(), key=lambda x: -x[1])
        self.chars, _ = zip(*count_pairs)
        self.vocab_size = len(self.chars)
        self.vocab = dict(zip(self.chars, range(len(self.chars))))
        with open(vocab_file, 'wb') as f:
            cPickle.dump(self.chars, f)
        self.tensor = np.array(list(map(self.vocab.get, data)))
        np.save(tensor_file, self.tensor)

    def load_preprocessed(self, vocab_file, tensor_file):
        with open(vocab_file, 'rb') as f:
            self.chars = cPickle.load(f)
        self.vocab_size = len(self.chars)
        self.vocab = dict(zip(self.chars, range(len(self.chars))))
        self.tensor = np.load(tensor_file)
        self.num_batches = int(self.tensor.size / (self.batch_size * self.seq_length))

    def create_batches(self):
        self.num_batches = int(self.tensor.size / (self.batch_size * self.seq_length))

        # When the data (tensor) is too small, let's give them a better error message
        if self.num_batches==0:
            assert False, "Not enough data. Make seq_length and batch_size small."

        self.tensor = self.tensor[:self.num_batches * self.batch_size * self.seq_length]
        xdata = self.tensor
        ydata = np.copy(self.tensor)
        ydata[:-1] = xdata[1:]
        ydata[-1] = xdata[0]
        self.x_batches = np.split(xdata.reshape(self.batch_size, -1), self.num_batches, 1)
        self.y_batches = np.split(ydata.reshape(self.batch_size, -1), self.num_batches, 1)


    def next_batch(self):
        x, y = self.x_batches[self.pointer], self.y_batches[self.pointer]
        self.pointer += 1
        return x, y

    def reset_batch_pointer(self):
        self.pointer = 0

#### Parameters

In [3]:
seq_length = 50
batch_size = 60
num_epochs = 50
learning_rate = 0.002
decay_rate = 0.97
rnn_size = 128
num_layers = 2

Download the Input file, and print a part

In [4]:
!wget -nv -O input.txt https://ibm.box.com/shared/static/a3f9e9mbpup09toq35ut7ke3l3lf03hg.txt 
with open('input.txt', 'r') as f:
    read_data = f.read()
    print (read_data[0:100])
f.closed

2020-08-18 04:22:33 URL:https://public.boxcloud.com/d/1/b1!dHcL0sd815gxFj5dDf6XjcLrbPNJjmnGG4Pg5j9FzN8FhTyMhTS29TM0yBI44-VP8oLYeiJsbrHyMaNnbcX7ejNLdNWlJUa-IQvsOIosuKdJzLamJKv6bgjc0WJKQG4yiH9M97PTxnEq01x19DNju3hWFJzyL29K26CB6qm-qeFqCy34mVaQMPhiERx03FLRS4g7lfkzKBBO8xiG8JYou3g1r2cjerTl3s_Ylvj2UZny-Rq3Gf3P-iX6PKrb_Om_dsYt2mHrvZZA6FQvJRyHQvM0TFcA3IPalOr2qUmkAvoDrfZyuvRCWA7LfeHWmV74OaIqW0gISzT6AMVVKakJJMEHP9Bh20K0jSCn4J5YxyUCca3MxhnbYWI6ad6xFax8gnJz_tpDWsWpmaDpDyHnfMezkxRxZrrwWmh64oEuKPGsFmaurz8XMiXQyzaYtTtzVG6XqpfsB_w33cHnaf3n7bljqihHsvOa5kgZJ4j5YLCwHlln9QYQSol4dnkIfxd2CV2oXAIzGfdzyKC41KE6nAu7TU0ERRr6pR0GtX02UM00wuxUuiLNl6K3ThRbn20so39UXGcZcphCdGEzQBoquMpFupEUlrG8USUoLJ5hAyyCEM5ZyB_6_4eKz1oT4brGVxMjAYNUq9ltWfs6cgz6kxBnPhl81l_4oY14oGoq7CyRFm63KvMeb5u9Z_jy1r9VsxxN8P9pbRjeTD30LDf7-iXLYsOw3kIHiSeN3gj_OS0mMEbN7pyLhTlABvC0p9Y41_7KSO8Skur09aeFWCJH63TPfI4urmR3_Y19bomHzPoMp_nDZ6_rdENdTGa7jp-6Tscc4ZFWxBZ7xHOzb0_uuXrTCbx8wC4FKzgYIC1d7FJ5ACTJnZbQSInyK-D105dShRtReKfSqjp8tnQNXBlTv12c1ADACEJVUeZbXyopYyNGn

True

+ Read data as batches, using TextLoader Class.

+ It will convert the character to numbers and represent each sequence as a vector in batches.

In [5]:
data_loader = TextLoader('', batch_size, seq_length)
vocab_size = data_loader.vocab_size
print ("vocabulary size:" ,data_loader.vocab_size)
print ("Characters:" ,data_loader.chars)
print ("vocab number of 'F':",data_loader.vocab['F'])
print ("Character sequences (first batch):", data_loader.x_batches[0])

loading preprocessed files
vocabulary size: 65
Characters: (' ', 'e', 't', 'o', 'a', 'h', 's', 'r', 'n', 'i', '\n', 'l', 'd', 'u', 'm', 'y', ',', 'w', 'f', 'c', 'g', 'I', 'b', 'p', ':', '.', 'A', 'v', 'k', 'T', "'", 'E', 'O', 'N', 'R', 'S', 'L', 'C', ';', 'W', 'U', 'H', 'M', 'B', '?', 'G', '!', 'D', '-', 'F', 'Y', 'P', 'K', 'V', 'j', 'q', 'x', 'z', 'J', 'Q', 'Z', 'X', '3', '&', '$')
vocab number of 'F': 49
Character sequences (first batch): [[49  9  7 ...  1  4  7]
 [19  4 14 ... 14  9 20]
 [ 8 20 10 ...  8 10 18]
 ...
 [21  2  0 ...  0 21  0]
 [ 9  7  7 ...  0  2  3]
 [ 3  7  0 ...  5  9 23]]


#### Input and Output

In [6]:
x,y = data_loader.next_batch()
x

array([[49,  9,  7, ...,  1,  4,  7],
       [19,  4, 14, ..., 14,  9, 20],
       [ 8, 20, 10, ...,  8, 10, 18],
       ...,
       [21,  2,  0, ...,  0, 21,  0],
       [ 9,  7,  7, ...,  0,  2,  3],
       [ 3,  7,  0, ...,  5,  9, 23]])

In [7]:
y

array([[ 9,  7,  6, ...,  4,  7,  0],
       [ 4, 14, 22, ...,  9, 20,  5],
       [20, 10, 29, ..., 10, 18,  4],
       ...,
       [ 2,  0,  6, ..., 21,  0,  6],
       [ 7,  7,  4, ...,  2,  3,  0],
       [ 7,  0, 33, ...,  9, 23,  0]])

In [8]:
print(x.shape)
print(y.shape)

(60, 50)
(60, 50)


#### LSTM Architecture

Each LSTM cell has 5 parts:

+ Input
+ prv_state
+ prv_output
+ new_state
+ new_output

Num of Layers =2

#### Define Satcked RNN Cell

In [9]:
cell = tf.contrib.rnn.BasicRNNCell(rnn_size)

In [10]:
stacked_cell = tf.contrib.rnn.MultiRNNCell([cell] * num_layers)

In [11]:
stacked_cell.output_size

128

State variable keeps output and new_state of the LSTM.

In [12]:
stacked_cell.state_size

(128, 128)

#### Define Input Data

In [13]:
input_data = tf.placeholder(tf.int32, [batch_size, seq_length])
input_data

<tf.Tensor 'Placeholder:0' shape=(60, 50) dtype=int32>

Target Data

In [14]:
targets = tf.placeholder(tf.int32, [batch_size, seq_length])
targets

<tf.Tensor 'Placeholder_1:0' shape=(60, 50) dtype=int32>

+ Memory state of the network is initialized with a vector of zeros and gets updated after reading each character.

+ "BasicRNNCell.zero_state(batch_size, dtype)" will return zero filled zero tensors.

In [15]:
initial_state = stacked_cell.zero_state(batch_size, tf.float32)

In [16]:
initial_state

(<tf.Tensor 'MultiRNNCellZeroState/BasicRNNCellZeroState/zeros:0' shape=(60, 128) dtype=float32>,
 <tf.Tensor 'MultiRNNCellZeroState/BasicRNNCellZeroState_1/zeros:0' shape=(60, 128) dtype=float32>)

Check the Value of input Data.

In [17]:
session = tf.Session()
feed_dict = {input_data : x, targets : y}

session.run(input_data, feed_dict)

array([[49,  9,  7, ...,  1,  4,  7],
       [19,  4, 14, ..., 14,  9, 20],
       [ 8, 20, 10, ...,  8, 10, 18],
       ...,
       [21,  2,  0, ...,  0, 21,  0],
       [ 9,  7,  7, ...,  0,  2,  3],
       [ 3,  7,  0, ...,  5,  9, 23]], dtype=int32)

#### Embeddings

In [18]:
with tf.variable_scope('rnnlm', reuse=False):
    softmax_w = tf.get_variable("softmax_w", [rnn_size, vocab_size])
    softmax_b = tf.get_variable("softmax_b", [vocab_size])
    
        
    # embedding variable is initialized randomely
    embedding = tf.get_variable("embedding", [vocab_size, rnn_size])


    
    em = tf.nn.embedding_lookup(embedding, input_data)
    
    
    inputs = tf.split(em, seq_length, 1)
    
    
    
    inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

In [19]:
session.run(tf.global_variables_initializer())

session.run(embedding)

array([[ 0.1039549 , -0.08005431, -0.16920386, ..., -0.05346642,
         0.13948037, -0.06619333],
       [ 0.04570121,  0.16956244,  0.06341098, ...,  0.10946678,
         0.01900171, -0.03551383],
       [ 0.09159268, -0.05950297,  0.02533752, ..., -0.13935378,
         0.09763466, -0.02779722],
       ...,
       [ 0.13641317, -0.13807559,  0.05002356, ..., -0.08333156,
        -0.05852745,  0.1285715 ],
       [ 0.09111233,  0.09306572,  0.02881256, ...,  0.08099751,
        -0.02222961,  0.07656036],
       [ 0.05359456,  0.16265638,  0.05553299, ...,  0.09978445,
        -0.05299493,  0.02511995]], dtype=float32)

In [20]:
em = tf.nn.embedding_lookup(embedding, input_data)
emp = session.run(em, feed_dict = {input_data:x})
emp[0]

array([[-0.03555945, -0.05591494, -0.08674122, ..., -0.15841456,
        -0.01403545, -0.07003055],
       [ 0.06486641,  0.13916008,  0.1591173 , ...,  0.14497267,
         0.02352007, -0.03990957],
       [ 0.1573479 ,  0.10765173,  0.07542165, ...,  0.01938517,
         0.13281645,  0.02505915],
       ...,
       [ 0.04570121,  0.16956244,  0.06341098, ...,  0.10946678,
         0.01900171, -0.03551383],
       [ 0.09691967, -0.12097788, -0.10015451, ..., -0.05966448,
         0.07435201, -0.00315626],
       [ 0.1573479 ,  0.10765173,  0.07542165, ...,  0.01938517,
         0.13281645,  0.02505915]], dtype=float32)

In [21]:
print(em.shape)

(60, 50, 128)


In [22]:
print(emp.shape)

(60, 50, 128)


In [23]:
inputs = tf.split(em, seq_length, 1)
inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
inputs[0:5]

[<tf.Tensor 'Squeeze:0' shape=(60, 128) dtype=float32>,
 <tf.Tensor 'Squeeze_1:0' shape=(60, 128) dtype=float32>,
 <tf.Tensor 'Squeeze_2:0' shape=(60, 128) dtype=float32>,
 <tf.Tensor 'Squeeze_3:0' shape=(60, 128) dtype=float32>,
 <tf.Tensor 'Squeeze_4:0' shape=(60, 128) dtype=float32>]

#### Feeding a batch of 50 Sequence to RNN

In [24]:
session.run(inputs[0], feed_dict = {input_data:x})

array([[-0.03555945, -0.05591494, -0.08674122, ..., -0.15841456,
        -0.01403545, -0.07003055],
       [ 0.09789135,  0.17524461, -0.078011  , ..., -0.16016895,
        -0.07557131,  0.00349143],
       [-0.055346  , -0.08602604,  0.07792567, ...,  0.17299135,
         0.07704233, -0.1564047 ],
       ...,
       [-0.12628981, -0.04801689, -0.05817143, ..., -0.01046297,
        -0.10838394, -0.16038889],
       [ 0.06486641,  0.13916008,  0.1591173 , ...,  0.14497267,
         0.02352007, -0.03990957],
       [ 0.01414138,  0.13014899,  0.08713482, ..., -0.06855214,
         0.03155929,  0.02914608]], dtype=float32)

Check the new state and outputs

In [25]:

outputs, new_state = tf.contrib.legacy_seq2seq.rnn_decoder(inputs, initial_state, stacked_cell, loop_function=None, scope='rnnlm')
new_state

(<tf.Tensor 'rnnlm_1/rnnlm/multi_rnn_cell/cell_0/basic_rnn_cell/Tanh_98:0' shape=(60, 128) dtype=float32>,
 <tf.Tensor 'rnnlm_1/rnnlm/multi_rnn_cell/cell_0/basic_rnn_cell/Tanh_99:0' shape=(60, 128) dtype=float32>)

In [26]:
outputs[0:5]

[<tf.Tensor 'rnnlm_1/rnnlm/multi_rnn_cell/cell_0/basic_rnn_cell/Tanh_1:0' shape=(60, 128) dtype=float32>,
 <tf.Tensor 'rnnlm_1/rnnlm/multi_rnn_cell/cell_0/basic_rnn_cell/Tanh_3:0' shape=(60, 128) dtype=float32>,
 <tf.Tensor 'rnnlm_1/rnnlm/multi_rnn_cell/cell_0/basic_rnn_cell/Tanh_5:0' shape=(60, 128) dtype=float32>,
 <tf.Tensor 'rnnlm_1/rnnlm/multi_rnn_cell/cell_0/basic_rnn_cell/Tanh_7:0' shape=(60, 128) dtype=float32>,
 <tf.Tensor 'rnnlm_1/rnnlm/multi_rnn_cell/cell_0/basic_rnn_cell/Tanh_9:0' shape=(60, 128) dtype=float32>]

Check the output network after feeding it with first batch

In [27]:
first_output = outputs[0]

session.run(tf.global_variables_initializer())

session.run(first_output, feed_dict = {input_data:x})

array([[-0.04861006,  0.03924445, -0.04854872, ...,  0.01310559,
        -0.04325602,  0.02226839],
       [ 0.14082868, -0.09441803,  0.0839439 , ...,  0.04821848,
        -0.06481316, -0.06492446],
       [ 0.01229214, -0.02644767,  0.01852591, ...,  0.02330318,
         0.04176168, -0.00712036],
       ...,
       [ 0.0199025 , -0.09378577, -0.05991625, ..., -0.04106001,
         0.02913299,  0.06101136],
       [ 0.04954085, -0.03680451,  0.03716031, ..., -0.08969241,
         0.04023373,  0.04445355],
       [-0.10488816, -0.07871149, -0.01299806, ..., -0.07100566,
        -0.04919834, -0.06924637]], dtype=float32)

#### Calculate the Probability using Softmax

In [28]:
output = tf.reshape(tf.concat( outputs,1), [-1, rnn_size])
output

<tf.Tensor 'Reshape:0' shape=(3000, 128) dtype=float32>

In [29]:
logits = tf.matmul(output, softmax_w) + softmax_b
logits

<tf.Tensor 'add:0' shape=(3000, 65) dtype=float32>

In [30]:
probs = tf.nn.softmax(logits)
probs

<tf.Tensor 'Softmax:0' shape=(3000, 65) dtype=float32>

probability of next character in all batches

In [31]:
session.run(tf.global_variables_initializer())
session.run(probs, feed_dict = {input_data:x})

array([[0.01475394, 0.01829847, 0.02017664, ..., 0.01643569, 0.01166173,
        0.01352268],
       [0.01321734, 0.01589874, 0.01485914, ..., 0.01585357, 0.0113903 ,
        0.0167792 ],
       [0.01426184, 0.01513404, 0.01405699, ..., 0.0125112 , 0.01292378,
        0.01315115],
       ...,
       [0.0209191 , 0.02091805, 0.02358138, ..., 0.01521501, 0.01223968,
        0.01203389],
       [0.0175853 , 0.01975196, 0.01607215, ..., 0.01122846, 0.01189314,
        0.01257226],
       [0.01367688, 0.015186  , 0.01166135, ..., 0.0102095 , 0.01622624,
        0.01499231]], dtype=float32)

Calculate the cost of training with Loss Function

In [32]:
grad_clip = 5

tvars = tf.trainable_variables()
tvars

[<tf.Variable 'rnnlm/softmax_w:0' shape=(128, 65) dtype=float32_ref>,
 <tf.Variable 'rnnlm/softmax_b:0' shape=(65,) dtype=float32_ref>,
 <tf.Variable 'rnnlm/embedding:0' shape=(65, 128) dtype=float32_ref>,
 <tf.Variable 'rnnlm/multi_rnn_cell/cell_0/basic_rnn_cell/kernel:0' shape=(256, 128) dtype=float32_ref>,
 <tf.Variable 'rnnlm/multi_rnn_cell/cell_0/basic_rnn_cell/bias:0' shape=(128,) dtype=float32_ref>]

#### Together

In [33]:
class LSTMModel():
    def __init__(self,sample=False):
        rnn_size = 128 
        batch_size = 60
        seq_length = 50
        num_layers = 2
        vocab_size = 65
        grad_clip = 5.
        if sample:
            print(">> sample mode:")
            batch_size = 1
            seq_length = 1
        
        basic_cell = tf.contrib.rnn.BasicRNNCell(rnn_size)
        
        
        
        self.stacked_cell = tf.contrib.rnn.MultiRNNCell([basic_cell] * num_layers)

        self.input_data = tf.placeholder(tf.int32, [batch_size, seq_length], name="input_data")
        self.targets = tf.placeholder(tf.int32, [batch_size, seq_length], name="targets")
        
        
        
        self.initial_state = stacked_cell.zero_state(batch_size, tf.float32)

        with tf.variable_scope('rnnlm_class1'):
            softmax_w = tf.get_variable("softmax_w", [rnn_size, vocab_size]) #128x65
            softmax_b = tf.get_variable("softmax_b", [vocab_size]) # 1x65
            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding", [vocab_size, rnn_size])  #65x128
                inputs = tf.split(tf.nn.embedding_lookup(embedding, self.input_data), seq_length, 1)
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
                
                
                


        # The value of state is updated after processing each batch of chars.
        outputs, last_state = tf.contrib.legacy_seq2seq.rnn_decoder(inputs, self.initial_state, self.stacked_cell, loop_function=None, scope='rnnlm_class1')
        output = tf.reshape(tf.concat(outputs,1), [-1, rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example([self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([batch_size * seq_length])],
                vocab_size)
        self.cost = tf.reduce_sum(loss) / batch_size / seq_length
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
    
    
    def sample(self, sess, chars, vocab, num=200, prime='The ', sampling_type=1):
        state = sess.run(self.stacked_cell.zero_state(1, tf.float32))
        #print state
        for char in prime[:-1]:
            x = np.zeros((1, 1))
            x[0, 0] = vocab[char]
            feed = {self.input_data: x, self.initial_state:state}
            [state] = sess.run([self.final_state], feed)

        def weighted_pick(weights):
            t = np.cumsum(weights)
            s = np.sum(weights)
            return(int(np.searchsorted(t, np.random.rand(1)*s)))

        ret = prime
        char = prime[-1]
        for n in range(num):
            x = np.zeros((1, 1))
            x[0, 0] = vocab[char]
            feed = {self.input_data: x, self.initial_state:state}
            [probs, state] = sess.run([self.probs, self.final_state], feed)
            p = probs[0]

            if sampling_type == 0:
                sample = np.argmax(p)
            elif sampling_type == 2:
                if char == ' ':
                    sample = weighted_pick(p)
                else:
                    sample = np.argmax(p)
            else: # sampling_type == 1 default:
                sample = weighted_pick(p)

            pred = chars[sample]
            ret += pred
            char = pred
        return ret

#### Creating LSTM Object

In [34]:
with tf.variable_scope("rnn"):
    model = LSTMModel()

#### Train using LSTM Model Class

Train the Model through feeding Batches

In [35]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for e in range(num_epochs): # num_epochs is 5 for test, but should be higher
        sess.run(tf.assign(model.lr, learning_rate * (decay_rate ** e)))
        data_loader.reset_batch_pointer()
        state = sess.run(model.initial_state) # (2x[60x128])
        for b in range(data_loader.num_batches): #for each batch
            start = time.time()
            x, y = data_loader.next_batch()
            feed = {model.input_data: x, model.targets: y, model.initial_state:state}
            train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)
            end = time.time()
        print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                .format(e * data_loader.num_batches + b, num_epochs * data_loader.num_batches, e, train_loss, end - start))
        with tf.variable_scope("rnn", reuse=True):
            sample_model = LSTMModel(sample=True)
            print (sample_model.sample(sess, data_loader.chars , data_loader.vocab, num=50, prime='The ', sampling_type=1))
            print ('----------------------------------')

370/18550 (epoch 0), train_loss = 1.915, time/batch = 0.482
>> sample mode:
The aut not he sol. Ged I my them Satel?
She chise'd t
----------------------------------
741/18550 (epoch 1), train_loss = 1.748, time/batch = 0.462
>> sample mode:
The wriep asse: boot begiend.

ANFERIO:
Wes but frien:
----------------------------------
1112/18550 (epoch 2), train_loss = 1.669, time/batch = 0.480
>> sample mode:
The Sid life?

ASTINGWARKATLANO:
Whone her.

KING EDWA
----------------------------------
1483/18550 (epoch 3), train_loss = 1.626, time/batch = 0.559
>> sample mode:
The stily die.

CLARENCE:
Lo is a hogrown,
In ever,
An
----------------------------------
1854/18550 (epoch 4), train_loss = 1.598, time/batch = 0.503
>> sample mode:
The raign lets aftere
I hole foreet comes that behoked
----------------------------------
2225/18550 (epoch 5), train_loss = 1.575, time/batch = 0.541
>> sample mode:
The was to come; bid Cade:
And I'll neweds an Forth
Do
----------------------------------
