# Testing different neural network architectures for next-token-prediction

## Michał Gromadzki

In [1]:
import tensorflow as tf
import numpy as np
tf.random.set_seed(1337)

In [2]:
#using tiny shakespeare as input data
with open('input.txt', 'r', encoding='utf-8') as f:
    text = f.read()

## Preprocessing and necessary functions

In [3]:
chars = sorted(list(set(text)))
vocab_size = len(chars)

In [4]:
stoi = { ch:i for i,ch in enumerate(chars) }
itos = { i:ch for i,ch in enumerate(chars) }

In [5]:
encode = lambda s: [stoi[c] for c in s] 
decode = lambda l: ''.join([itos[i] for i in l]) 

In [6]:
n = int(0.9*len(text))
train_data = encode(text[:n])
val_data = encode(text[n:])

In [7]:
train_ds = tf.data.Dataset.from_tensor_slices(train_data)
val_ds = tf.data.Dataset.from_tensor_slices(val_data)

In [8]:
block_size = 128
sequences_train = train_ds.batch(block_size+1, drop_remainder=True)

for seq in sequences_train.take(1):
  print(decode(seq.numpy()))

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to d


In [9]:
sequences_val = val_ds.batch(block_size+1, drop_remainder=True)

In [10]:
def split_sequence(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

In [11]:
split_sequence("abcdefgh")

('abcdefg', 'bcdefgh')

In [12]:
train_ds_final = sequences_train.map(split_sequence)
val_ds_final = sequences_val.map(split_sequence)

In [13]:
def make_ds(text, block_size, batch_size):
    n = int(0.9*len(text))
    train_data = encode(text[:n])
    val_data = encode(text[n:])
    train_ds = tf.data.Dataset.from_tensor_slices(train_data)
    val_ds = tf.data.Dataset.from_tensor_slices(val_data)
    sequences_train = train_ds.batch(block_size+1, drop_remainder=True)
    sequences_val = val_ds.batch(block_size+1, drop_remainder=True)
    train_ds_final = sequences_train.map(split_sequence)
    val_ds_final = sequences_val.map(split_sequence)
    return train_ds_final.batch(batch_size), val_ds_final.batch(batch_size)

In [14]:
def generate(model, block_size, context, new_tokens):
    context = tf.expand_dims(context[0,-block_size:],0)
    for i in range(new_tokens):
        logits = model.predict(tf.expand_dims(context[0,-block_size:],0),verbose=0)
        logits = logits[0,-1,:]
        pred = tf.random.categorical(tf.math.log(tf.expand_dims(logits,0)),1)
        pred = tf.cast(pred,dtype=tf.int32)
        context = tf.concat([context,pred],axis=1)
    return context

# Models

## Dense - small

In [22]:
new_tokens = 100 #new tokens to generate

In [40]:
block_size = 32 #context size
batch_size = 32
n_embed = 4 #embedding dimensions of each token

In [41]:
train_ds, val_ds = make_ds(text, block_size, batch_size)

In [42]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, n_embed, input_length=block_size),
    tf.keras.layers.Dense(vocab_size),
    tf.keras.layers.Softmax(),
])

In [43]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_3 (Embedding)     (None, 32, 4)             260       
                                                                 
 dense_3 (Dense)             (None, 32, 65)            325       
                                                                 
 softmax_2 (Softmax)         (None, 32, 65)            0         
                                                                 
Total params: 585
Trainable params: 585
Non-trainable params: 0
_________________________________________________________________


In [44]:
model.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.losses.SparseCategoricalCrossentropy())

In [45]:
model.fit(train_ds, epochs=20, batch_size=batch_size, validation_data=val_ds)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x21902f4b610>

In [46]:
for context, _ in val_ds.take(1):    
    print(decode(context[0].numpy()))
    result = generate(model, block_size , context, new_tokens)
    print("-" * 20)
    print(decode(result.numpy()[0]))

?

GREMIO:
Good morrow, neighbou
--------------------
?

GREMIO:
Good morrow, neighboue'dy ar thbe sh s wl moive war ingutheereot STy wiy? alt
IXCENEENEBuur punkiwaeppe pou, alee arth, s


## Dense - large

In [47]:
block_size = 64
batch_size = 32
n_embed = 32

In [48]:
train_ds, val_ds = make_ds(text, block_size, batch_size)

In [49]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, n_embed, input_length=block_size),
    tf.keras.layers.Dense(block_size * n_embed),
    tf.keras.layers.Dense(128),
    tf.keras.layers.Dense(128),
    tf.keras.layers.Dense(vocab_size),
    tf.keras.layers.Softmax(),
])

In [50]:
model.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.losses.SparseCategoricalCrossentropy())

In [51]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_4 (Embedding)     (None, 64, 32)            2080      
                                                                 
 dense_4 (Dense)             (None, 64, 2048)          67584     
                                                                 
 dense_5 (Dense)             (None, 64, 128)           262272    
                                                                 
 dense_6 (Dense)             (None, 64, 128)           16512     
                                                                 
 dense_7 (Dense)             (None, 64, 65)            8385      
                                                                 
 softmax_3 (Softmax)         (None, 64, 65)            0         
                                                                 
Total params: 356,833
Trainable params: 356,833
Non-tr

In [52]:
model.fit(train_ds, epochs=20, batch_size=batch_size, validation_data=val_ds)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x21a6fe1df90>

In [53]:
for context, _ in val_ds.take(1):    
    print(decode(context[0].numpy()))
    result = generate(model, block_size , context, new_tokens)
    print("-" * 20)
    print(decode(result.numpy()[0]))

?

GREMIO:
Good morrow, neighbour Baptista.

BAPTISTA:
Good morr
--------------------
?

GREMIO:
Good morrow, neighbour Baptista.

BAPTISTA:
Good morrane wearknon son doocyo w pis jourel t y louthmy an.
TIO sio, es hetou ioil ll VIO:
TIIneter
Pllo ck


## RNN - small

In [54]:
block_size = 32
batch_size = 32
n_embed = 4

In [55]:
train_ds, val_ds = make_ds(text, block_size, batch_size)

In [56]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, n_embed, input_length=block_size),
    tf.keras.layers.SimpleRNN(block_size * n_embed, return_sequences=True),
    tf.keras.layers.Dense(vocab_size),
    tf.keras.layers.Softmax(),
])

In [57]:
model.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.losses.SparseCategoricalCrossentropy())

In [58]:
model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_5 (Embedding)     (None, 32, 4)             260       
                                                                 
 simple_rnn (SimpleRNN)      (None, 32, 128)           17024     
                                                                 
 dense_8 (Dense)             (None, 32, 65)            8385      
                                                                 
 softmax_4 (Softmax)         (None, 32, 65)            0         
                                                                 
Total params: 25,669
Trainable params: 25,669
Non-trainable params: 0
_________________________________________________________________


In [59]:
model.fit(train_ds, epochs=20, batch_size=batch_size, validation_data=val_ds)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x21a86d0db10>

In [60]:
for context, _ in val_ds.take(1):    
    print(decode(context[0].numpy()))
    result = generate(model, block_size , context, new_tokens)
    print("-" * 20)
    print(decode(result.numpy()[0]))

?

GREMIO:
Good morrow, neighbou
--------------------
?

GREMIO:
Good morrow, neighboursel:
And you whon a quacr goo.

RUCENTIO:
I gravent you, Basag:
Af han mero.

HORTENSIO:
Af o't way


# RNN - large

In [67]:
block_size = 64
batch_size = 32
n_embed = 32

In [68]:
train_ds, val_ds = make_ds(text, block_size, batch_size)

In [69]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, n_embed, input_length=block_size),
    tf.keras.layers.SimpleRNN(block_size * n_embed, return_sequences=True),
    tf.keras.layers.Dense(vocab_size),
    tf.keras.layers.Softmax(),
])

In [70]:
model.summary()

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_8 (Embedding)     (None, 64, 32)            2080      
                                                                 
 simple_rnn_4 (SimpleRNN)    (None, 64, 2048)          4261888   
                                                                 
 dense_11 (Dense)            (None, 64, 65)            133185    
                                                                 
 softmax_7 (Softmax)         (None, 64, 65)            0         
                                                                 
Total params: 4,397,153
Trainable params: 4,397,153
Non-trainable params: 0
_________________________________________________________________


In [71]:
model.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.losses.SparseCategoricalCrossentropy())

In [72]:
model.fit(train_ds, epochs=20, batch_size=batch_size, validation_data=val_ds)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x2192e37e410>

In [73]:
for context, _ in val_ds.take(1):    
    print(decode(context[0].numpy()))
    result = generate(model, block_size , context, new_tokens)
    print("-" * 20)
    print(decode(result.numpy()[0]))

?

GREMIO:
Good morrow, neighbour Baptista.

BAPTISTA:
Good morr
--------------------
?

GREMIO:
Good morrow, neighbour Baptista.

BAPTISTA:
Good morr and heresher eny I lovohen!

WANUEN RORTER:
O the clesin houn I has!
That in thee.

ESAUNETINSES:
I


## GRU - small

In [80]:
block_size = 32
batch_size = 32
n_embed = 4

In [81]:
train_ds, val_ds = make_ds(text, block_size, batch_size)

In [82]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, n_embed, input_length=block_size),
    tf.keras.layers.GRU(block_size * n_embed, return_sequences=True),
    tf.keras.layers.Dense(vocab_size),
    tf.keras.layers.Softmax(),
])

In [83]:
model.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.losses.SparseCategoricalCrossentropy())

In [84]:
model.summary()

Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_10 (Embedding)    (None, 32, 4)             260       
                                                                 
 gru_3 (GRU)                 (None, 32, 128)           51456     
                                                                 
 dense_13 (Dense)            (None, 32, 65)            8385      
                                                                 
 softmax_9 (Softmax)         (None, 32, 65)            0         
                                                                 
Total params: 60,101
Trainable params: 60,101
Non-trainable params: 0
_________________________________________________________________


In [85]:
model.fit(train_ds, epochs=20, batch_size=batch_size, validation_data=val_ds)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x21a6a9388e0>

In [86]:
for context, _ in val_ds.take(1):    
    print(decode(context[0].numpy()))
    result = generate(model, block_size , context, new_tokens)
    print("-" * 20)
    print(decode(result.numpy()[0]))

?

GREMIO:
Good morrow, neighbou
--------------------
?

GREMIO:
Good morrow, neighbour!

BUUNT:
All of at, that you'll even stland-siscentless all be pustantent and an apbanise,
With hi


## GRU - large

In [87]:
block_size = 64
batch_size = 32
n_embed = 32

In [88]:
train_ds, val_ds = make_ds(text, block_size, batch_size)

In [89]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, n_embed, input_length=block_size),
    tf.keras.layers.GRU(block_size * n_embed, return_sequences=True),
    tf.keras.layers.Dense(vocab_size),
    tf.keras.layers.Softmax(),
])

In [90]:
model.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.losses.SparseCategoricalCrossentropy())

In [91]:
model.summary()

Model: "sequential_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_11 (Embedding)    (None, 64, 32)            2080      
                                                                 
 gru_4 (GRU)                 (None, 64, 2048)          12791808  
                                                                 
 dense_14 (Dense)            (None, 64, 65)            133185    
                                                                 
 softmax_10 (Softmax)        (None, 64, 65)            0         
                                                                 
Total params: 12,927,073
Trainable params: 12,927,073
Non-trainable params: 0
_________________________________________________________________


In [92]:
model.fit(train_ds, epochs=20, batch_size=batch_size, validation_data=val_ds)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x21a6fde83d0>

In [93]:
for context, _ in val_ds.take(1):    
    print(decode(context[0].numpy()))
    result = generate(model, block_size , context, new_tokens)
    print("-" * 20)
    print(decode(result.numpy()[0]))

?

GREMIO:
Good morrow, neighbour Baptista.

BAPTISTA:
Good morr
--------------------
?

GREMIO:
Good morrow, neighbour Baptista.

BAPTISTA:
Good morrow to: it is as mine own
So sairted so insid: a propage attain'd;
I herred in the true gentleman to 


## LSTM - small

In [94]:
block_size = 32
batch_size = 32
n_embed = 4

In [95]:
train_ds, val_ds = make_ds(text, block_size, batch_size)

In [96]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, n_embed, input_length=block_size),
    tf.keras.layers.LSTM(block_size * n_embed, return_sequences=True),
    tf.keras.layers.Dense(vocab_size),
    tf.keras.layers.Softmax(),
])

In [97]:
model.summary()

Model: "sequential_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_12 (Embedding)    (None, 32, 4)             260       
                                                                 
 lstm (LSTM)                 (None, 32, 128)           68096     
                                                                 
 dense_15 (Dense)            (None, 32, 65)            8385      
                                                                 
 softmax_11 (Softmax)        (None, 32, 65)            0         
                                                                 
Total params: 76,741
Trainable params: 76,741
Non-trainable params: 0
_________________________________________________________________


In [98]:
model.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.losses.SparseCategoricalCrossentropy())

In [99]:
model.fit(train_ds, epochs=20, batch_size=batch_size, validation_data=val_ds)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x21a8852ee00>

In [100]:
for context, _ in val_ds.take(1):    
    print(decode(context[0].numpy()))
    result = generate(model, block_size , context, new_tokens)
    print("-" * 20)
    print(decode(result.numpy()[0]))

?

GREMIO:
Good morrow, neighbou
--------------------
?

GREMIO:
Good morrow, neighboused of it in a wine;
That the handsables, sir. Master by the not out name, thereforal, if I would of


## LSTM - large

In [101]:
block_size = 64
batch_size = 32
n_embed = 32

In [102]:
train_ds, val_ds = make_ds(text, block_size, batch_size)

In [103]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, n_embed, input_length=block_size),
    tf.keras.layers.LSTM(block_size * n_embed, return_sequences=True),
    tf.keras.layers.Dense(vocab_size),
    tf.keras.layers.Softmax(),
])

In [104]:
model.summary()

Model: "sequential_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_13 (Embedding)    (None, 64, 32)            2080      
                                                                 
 lstm_1 (LSTM)               (None, 64, 2048)          17047552  
                                                                 
 dense_16 (Dense)            (None, 64, 65)            133185    
                                                                 
 softmax_12 (Softmax)        (None, 64, 65)            0         
                                                                 
Total params: 17,182,817
Trainable params: 17,182,817
Non-trainable params: 0
_________________________________________________________________


In [105]:
model.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.losses.SparseCategoricalCrossentropy())

In [106]:
model.fit(train_ds, epochs=20, batch_size=batch_size, validation_data=val_ds)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x21ad7708250>

In [107]:
for context, _ in val_ds.take(1):    
    print(decode(context[0].numpy()))
    result = generate(model, block_size , context, new_tokens)
    print("-" * 20)
    print(decode(result.numpy()[0]))

?

GREMIO:
Good morrow, neighbour Baptista.

BAPTISTA:
Good morr
--------------------
?

GREMIO:
Good morrow, neighbour Baptista.

BAPTISTA:
Good morrow, sir, I see, you would not speak with honour fair.
Say you so protect, my gifts talk thou wilv bu


## Bigger Model

In [24]:
block_size = 256
batch_size = 32
n_embed = 16

In [25]:
train_ds, val_ds = make_ds(text, block_size, batch_size)

In [30]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, n_embed, input_length=block_size),
    tf.keras.layers.LSTM(2048, return_sequences=True),
    tf.keras.layers.LayerNormalization(),
    tf.keras.layers.Dense(vocab_size),
    tf.keras.layers.Softmax(),
    tf.keras.layers.Dropout(0.1)
])

In [31]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 256, 16)           1040      
                                                                 
 lstm_2 (LSTM)               (None, 256, 2048)         16916480  
                                                                 
 layer_normalization_1 (Laye  (None, 256, 2048)        4096      
 rNormalization)                                                 
                                                                 
 dense_2 (Dense)             (None, 256, 65)           133185    
                                                                 
 softmax_2 (Softmax)         (None, 256, 65)           0         
                                                                 
 dropout_2 (Dropout)         (None, 256, 65)           0         
                                                      

In [32]:
model.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.losses.SparseCategoricalCrossentropy())

In [33]:
model.fit(train_ds, epochs=20, batch_size=batch_size, validation_data=val_ds)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1b8f93307f0>

In [34]:
for context, _ in val_ds.take(1):    
    print(decode(context[0].numpy()))
    result = generate(model, block_size , context, 5 * new_tokens)
    print("-" * 20)
    print(decode(result.numpy()[0]))

?

GREMIO:
Good morrow, neighbour Baptista.

BAPTISTA:
Good morrow, neighbour Gremio.
God save you, gentlemen!

PETRUCHIO:
And you, good sir! Pray, have you not a daughter
Call'd Katharina, fair and virtuous?

BAPTISTA:
I have a daughter, sir, called Katha
--------------------
?

GREMIO:
Good morrow, neighbour Baptista.

BAPTISTA:
Good morrow, neighbour Gremio.
God save you, gentlemen!

PETRUCHIO:
And you, good sir! Pray, have you not a daughter
Call'd Katharina, fair and virtuous?

BAPTISTA:
I have a daughter, sir, called Kathan that tell thee,
And your cozer of your geest.

ANGELO:
And I'll will mon well never find contentence:
And not your revenge with comeen their least
when shall begawe the tears woman seak madestable
A lengled hath it speed  toonge, became thee us?

GLOUCESTER:
I think you are can from thee when you been your hoar,
Till eeot eyes faith, on my woman's sirder;
I knee thee in peice: bit is fallestle, And even seech
Sisterners you? what teil? Frield, say then all I s

In [35]:
print(decode(result.numpy()[0]))

?

GREMIO:
Good morrow, neighbour Baptista.

BAPTISTA:
Good morrow, neighbour Gremio.
God save you, gentlemen!

PETRUCHIO:
And you, good sir! Pray, have you not a daughter
Call'd Katharina, fair and virtuous?

BAPTISTA:
I have a daughter, sir, called Kathan that tell thee,
And your cozer of your geest.

ANGELO:
And I'll will mon well never find contentence:
And not your revenge with comeen their least
when shall begawe the tears woman seak madestable
A lengled hath it speed  toonge, became thee us?

GLOUCESTER:
I think you are can from thee when you been your hoar,
Till eeot eyes faith, on my woman's sirder;
I knee thee in peice: bit is fallestle, And even seech
Sisterners you? what teil? Frield, say then all I said after a
woman, as I fiild away


In [36]:
result = generate(model, block_size , context, 10000)
open('more.txt', 'w').write(decode(result.numpy()[0]))

10256

## Final version in model.py