In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

In [2]:
path = 'moby_dick.txt'
text = open(path, 'r').read()

In [3]:
# Unique characters in the file 
vocab = sorted(set(text))
print(vocab)

['\n', ' ', '!', '"', '$', '&', "'", '(', ')', '*', ',', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', ']', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


In [4]:
#text Processing
char_to_ind = {char:ind for ind,char in enumerate(vocab)}

In [5]:
ind_to_char = np.array(vocab)

In [6]:
encoded_text = np.array([char_to_ind[c]for c in text])
encoded_text

array([28, 33, 26, ..., 12,  0,  0])

In [7]:
# Length of the vocabulary in chars
vocab_size = len(vocab)

# The embedding dimension
embed_dim = 64

# Number of RNN units
rnn_neurons = 512
rnn_hidden = 256

In [8]:
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from tensorflow.keras.losses import sparse_categorical_crossentropy

In [9]:
#Loss
def sparse_cat_loss(y_true,y_pred):
    return sparse_categorical_crossentropy(y_true,y_pred,from_logits=True)

In [10]:
def create_model(vocab_size, embed_dim, rnn_neurons,rnn_hidden,batch_size):
    
    model = Sequential()
    
    model.add(Embedding(vocab_size, embed_dim,batch_input_shape=[batch_size, None]))
    
    model.add(GRU(rnn_neurons,return_sequences=True,stateful=True,recurrent_initializer='glorot_uniform'))
    
    model.add(GRU(rnn_hidden,return_sequences=True,stateful=True,recurrent_initializer='glorot_uniform'))
    
    # Final Dense Layer to Predict
    model.add(Dense(vocab_size))
    
    model.compile(optimizer='adam', loss=sparse_cat_loss) 
    
    return model

In [11]:
from tensorflow.keras.models import load_model

In [12]:
model = create_model(vocab_size, embed_dim, rnn_neurons,rnn_hidden,batch_size=1)

model.load_weights('moby_dick.h5')

model.build(tf.TensorShape([1, None]))


In [13]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (1, None, 64)             5184      
_________________________________________________________________
gru (GRU)                    (1, None, 512)            887808    
_________________________________________________________________
gru_1 (GRU)                  (1, None, 256)            591360    
_________________________________________________________________
dense (Dense)                (1, None, 81)             20817     
Total params: 1,505,169
Trainable params: 1,505,169
Non-trainable params: 0
_________________________________________________________________


In [14]:
def generate_text(model, start_seed,gen_size=100,temp=1.0):
  
  #model: Trained Model to Generate Text
  #start_seed: Intial Seed text in string form
  #gen_size: Number of characters to generate

  # Number of characters to generate
  num_generate = gen_size

  # Vecotrizing starting seed text
  input_eval = [char_to_ind[s] for s in start_seed]

  # Expand to match batch format shape
  input_eval = tf.expand_dims(input_eval, 0)

  # Empty list to hold resulting generated text
  text_generated = []

  # Temperature effects randomness in our resulting text
  # The term is derived from entropy/thermodynamics.
  # The temperature is used to effect probability of next characters.
  # Higher probability == lesss surprising/ more expected
  # Lower temperature == more surprising / less expected
 
  temperature = temp

  # Here batch size == 1
  model.reset_states()

  for i in range(num_generate):

      # Generate Predictions
      predictions = model(input_eval)

      # Remove the batch shape dimension
      predictions = tf.squeeze(predictions, 0)

      # Use a cateogircal disitribution to select the next character
      predictions = predictions / temperature
      predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

      # Pass the predicted charracter for the next input
      input_eval = tf.expand_dims([predicted_id], 0)

      # Transform back to character letter
      text_generated.append(ind_to_char[predicted_id])

  return (start_seed + ''.join(text_generated))
  

In [16]:
print(generate_text(model,"land",gen_size=1000))

lands.)--Thundering on the way working in
the part of the whaling fishermen advantively reward by other mechanches; and to
square haven if for the floor, and
would through the scorghed respliciently encountered, chest of sight, he
establish a chief-clotment at the nerrow we barrely
before.  Any more than it is, I do asy heads
and told, but stuffed off in his whole glance, and full of mouth, "take token of
the forehead's cheep and ute, ten instant stepped and seldom into the proper gloubles, may barrels towards the spears, was likewise food, in order to stop four
life, a looked bir seas gried operate it.

Now, at a general terrors
shouted out the devil, not my mast-head, no blood, Queequeg, "bearted to much the holiest; an
unshimable foubt without drowsing full of
sparinary; but so dispiritias, Ahab knowingly tutking his harpoon, Ahab just as voyage, when about was crossed by gentleman,
Ahab," cried Ahab after discoverer to the unaccountable fonce, alax: when her cabin semine malities
t

As you can see there are many made up words but i think we have good results. If you want more realistic results, you must work with bigger text source.