In [None]:
# importing tensorflow and numpy
import tensorflow as tf
import numpy as np
import os

Get and prepare data

In [None]:
## Run once in cobab to retrieve king james bible input file
path_to_file = "the-king-james-bible.txt"
text = open(path_to_file,'r').read()

In [None]:
vocab = sorted(set(text))
char_to_ind = {char:ind for ind,char in enumerate(vocab)}
ind_to_char = np.array(vocab)
encoded_text = np.array([char_to_ind[c] for c in text])
print(char_to_ind)
print(len(vocab))

{'\n': 0, ' ': 1, '!': 2, "'": 3, '(': 4, ')': 5, '*': 6, ',': 7, '-': 8, '.': 9, '0': 10, '1': 11, '2': 12, '3': 13, '4': 14, '5': 15, '6': 16, '7': 17, '8': 18, '9': 19, ':': 20, ';': 21, '?': 22, 'A': 23, 'B': 24, 'C': 25, 'D': 26, 'E': 27, 'F': 28, 'G': 29, 'H': 30, 'I': 31, 'J': 32, 'K': 33, 'L': 34, 'M': 35, 'N': 36, 'O': 37, 'P': 38, 'Q': 39, 'R': 40, 'S': 41, 'T': 42, 'U': 43, 'V': 44, 'W': 45, 'Y': 46, 'Z': 47, 'a': 48, 'b': 49, 'c': 50, 'd': 51, 'e': 52, 'f': 53, 'g': 54, 'h': 55, 'i': 56, 'j': 57, 'k': 58, 'l': 59, 'm': 60, 'n': 61, 'o': 62, 'p': 63, 'q': 64, 'r': 65, 's': 66, 't': 67, 'u': 68, 'v': 69, 'w': 70, 'x': 71, 'y': 72, 'z': 73, '\ufeff': 74}
75


In [None]:
seq_len = 120
char_dataset = tf.data.Dataset.from_tensor_slices(encoded_text)
sequences = char_dataset.batch(seq_len+1,drop_remainder=True)

In [None]:
def create_seq_targets(seq):
  input_text = seq[:-1]
  target_text = seq[1:]
  return input_text, target_text

In [None]:
dataset = sequences.map(create_seq_targets)

for input_txt, target_txt in dataset.take(1):
  print(input_txt.numpy())
  print(" ".join(ind_to_char[input_txt.numpy()]))
  print('\n')
  print(target_txt.numpy())
  print(" ".join(ind_to_char[target_txt.numpy()]))

[74 42 55 52  1 28 56 65 66 67  1 24 62 62 58  1 62 53  1 35 62 66 52 66
 20  1  1 25 48 59 59 52 51  1 29 52 61 52 66 56 66  0  0  0 11 20 11  1
 31 61  1 67 55 52  1 49 52 54 56 61 61 56 61 54  1 29 62 51  1 50 65 52
 48 67 52 51  1 67 55 52  1 55 52 48 69 52 61  1 48 61 51  1 67 55 52  1
 52 48 65 67 55  9  0  0 11 20 12  1 23 61 51  1 67 55 52  1 52 48 65 67]
﻿ T h e   F i r s t   B o o k   o f   M o s e s :     C a l l e d   G e n e s i s 
 
 
 1 : 1   I n   t h e   b e g i n n i n g   G o d   c r e a t e d   t h e   h e a v e n   a n d   t h e   e a r t h . 
 
 1 : 2   A n d   t h e   e a r t


[42 55 52  1 28 56 65 66 67  1 24 62 62 58  1 62 53  1 35 62 66 52 66 20
  1  1 25 48 59 59 52 51  1 29 52 61 52 66 56 66  0  0  0 11 20 11  1 31
 61  1 67 55 52  1 49 52 54 56 61 61 56 61 54  1 29 62 51  1 50 65 52 48
 67 52 51  1 67 55 52  1 55 52 48 69 52 61  1 48 61 51  1 67 55 52  1 52
 48 65 67 55  9  0  0 11 20 12  1 23 61 51  1 67 55 52  1 52 48 65 67 55]
T h e   F i r s t   B o o 

In [None]:
batch_size = 128
buffer_size = 10000
dataset = dataset.shuffle(buffer_size).batch(batch_size,drop_remainder=True)

In [None]:
dataset

<BatchDataset shapes: ((128, 120), (128, 120)), types: (tf.int64, tf.int64)>

In [None]:
# number of neurons in the hidden layer
vocab_size = len(vocab)
rnn_neurons = 1026
embed_dim = 64

from tensorflow.keras.losses import sparse_categorical_crossentropy
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding,GRU,Dense

def sparse_cat_loss(y_true,y_pred):
  return sparse_categorical_crossentropy(y_true,y_pred,from_logits=True)

def create_model(vocab_size,embed_dim,rnn_neurons,batch_size):
  model = Sequential()
  model.add(Embedding(vocab_size,embed_dim,batch_input_shape=[batch_size,None]))
  model.add(GRU(rnn_neurons,return_sequences=True,stateful=True,recurrent_initializer='glorot_uniform'))
  model.add(Dense(vocab_size))
  model.compile('adam',loss=sparse_cat_loss)
  return model

model = create_model(vocab_size=vocab_size,embed_dim=embed_dim,rnn_neurons=rnn_neurons,batch_size=128)

In [None]:
model.summary()

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_9 (Embedding)      (128, None, 64)           4800      
_________________________________________________________________
gru_9 (GRU)                  (128, None, 1026)         3361176   
_________________________________________________________________
dense_9 (Dense)              (128, None, 75)           77025     
Total params: 3,443,001
Trainable params: 3,443,001
Non-trainable params: 0
_________________________________________________________________


In [None]:
for input_example_batch, target_example_batch in dataset.take(1):
  example_batch_predictions = model(input_example_batch)

In [None]:
example_batch_predictions.shape

TensorShape([128, 120, 75])

In [None]:
epochs = 10
model.fit(dataset,epochs=epochs)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7faa38043358>

In [None]:
model.save('king_james.h5')

In [None]:
from tensorflow.keras.models import load_model

In [None]:
model = create_model(vocab_size,embed_dim,rnn_neurons,batch_size=1)
model.load_weights('king_james.h5')
model.build(tf.TensorShape([1,None]))

In [None]:
model.summary()

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_10 (Embedding)     (1, None, 64)             4800      
_________________________________________________________________
gru_10 (GRU)                 (1, None, 1026)           3361176   
_________________________________________________________________
dense_10 (Dense)             (1, None, 75)             77025     
Total params: 3,443,001
Trainable params: 3,443,001
Non-trainable params: 0
_________________________________________________________________


In [None]:
def generate_text(model,start_seed,gen_size=500,temp=1.0):
  num_generate = gen_size
  input_eval = [char_to_ind[s] for s in start_seed]
  input_eval = tf.expand_dims(input_eval,0)
  text_generated = []
  temperature = temp
  model.reset_states()

  for i in range(num_generate):
    predictions = model(input_eval)
    predictions = tf.squeeze(predictions,0)
    predictions = predictions/temperature
    predicted_id = tf.random.categorical(predictions,num_samples=1)[-1,0].numpy()
    input_eval = tf.expand_dims([predicted_id],0)
    text_generated.append(ind_to_char[predicted_id])
  return (start_seed+"".join(text_generated))


In [None]:
print(generate_text(model,"Genesis",gen_size=1000))

Genesise at the countra, and pain fraiket, till I break in two of God.

1:24 And for the devils pour out anithers, that great Ahinah the son of Jerahor
the Balhathites.

26:29 Neither came neither stil.

1:14 Seeing Noah went heal, and set him up.

1:7 And when he gave him like a ray rumbur of your lips. And he went
down unto me appused in mine enemies' fear: 1:27  Where is afraid at his
decree, and fastened the face of the
anointing accospitaling of the eliquetiles, and did they not pray you,
even with our fathers obtainly me about: they of the land, yet not shew
them by vineyard of the children, and pronounce hundred pleasing up on the other.

41:11 And in nothing cometh ene affer likengs out of the land, seek thy land
that weight of your words; Hear the word is the Lord:20 And a certain woman, beholding the thigh the hith his
princes, and their a thousand, and a ram, and upright
in their loins, and are thine eyes, that I might receive a gloriou also was power to
hear God, to day of 