In [1]:
import tensorflow as tf
import numpy as np
import os

In [2]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

In [3]:
with open(path_to_file,encoding='utf-8') as f:
    text=f.read()

In [4]:
vocab = sorted(set(text))

In [5]:
vocab

['\n',
 ' ',
 '!',
 '$',
 '&',
 "'",
 ',',
 '-',
 '.',
 '3',
 ':',
 ';',
 '?',
 'A',
 'B',
 'C',
 'D',
 'E',
 'F',
 'G',
 'H',
 'I',
 'J',
 'K',
 'L',
 'M',
 'N',
 'O',
 'P',
 'Q',
 'R',
 'S',
 'T',
 'U',
 'V',
 'W',
 'X',
 'Y',
 'Z',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z']

# Data Preparation

In [6]:
idx2char=np.array(vocab)
char2idx = {w:i for i,w in enumerate( idx2char)}

In [7]:
encoded = np.array([char2idx[c] for c in text], dtype=np.int32)

In [8]:
text[:10],encoded[:10]

('First Citi', array([18, 47, 56, 57, 58,  1, 15, 47, 58, 47], dtype=int32))

## 1st method

In [9]:
def get_batches(arr, n_seqs, n_steps):
    # Get the number of characters per batch and number of batches we can make
    batch_size = n_seqs * n_steps
    n_batches = len(arr)//batch_size #k
    print(n_batches)
    # Keep only enough characters to make full batches
    arr = arr[:n_batches*batch_size]
    # Reshape into batch_size rows
    arr = arr.reshape((n_seqs,-1))
    print(arr.shape)
    x_train=[]
    y_train=[]
    for n in range(0, arr.shape[1], n_steps):
        # The features
        x = arr[:,n:n+n_steps]
        # The targets, shifted by one
        y = np.zeros_like(x)
        y[:,:-1],y[:,-1]=x[:,1:],x[:,0]
        
        x_train.append(x)
        y_train.append(y)
    return x_train,y_train

In [10]:
x_train,y_train = get_batches(encoded, n_seqs=64, n_steps=100)

174
(64, 17400)


In [11]:
x=np.array(x_train)
y=np.array(y_train)
x.shape,y.shape

((174, 64, 100), (174, 64, 100))

In [12]:
dataset=tf.data.Dataset.from_tensor_slices((x,y)).shuffle(buffer_size=100)

In [13]:
for i,j in dataset.take(1):
    print(i.shape)

(64, 100)


## 2nd method

In [29]:
char_dataset=tf.data.Dataset.from_tensor_slices(encoded)

In [None]:
# batch method easily convert data into sequences of desired size
seq_len = 100
sequences=char_dataset.batch(batch_size=seq_len+1,drop_remainder=True)

In [184]:
def shift(chunk):
    x =chunk[:-1]
    y =chunk[1:]
    return x,y
dataset=sequences.map(shift)

In [185]:
for i ,t in dataset.take(1):
    print(i,t)

tf.Tensor(
[18 47 56 57 58  1 15 47 58 47 64 43 52 10  0 14 43 44 53 56 43  1 61 43
  1 54 56 53 41 43 43 42  1 39 52 63  1 44 59 56 58 46 43 56  6  1 46 43
 39 56  1 51 43  1 57 54 43 39 49  8  0  0 13 50 50 10  0 31 54 43 39 49
  6  1 57 54 43 39 49  8  0  0 18 47 56 57 58  1 15 47 58 47 64 43 52 10
  0 37 53 59], shape=(100,), dtype=int32) tf.Tensor(
[47 56 57 58  1 15 47 58 47 64 43 52 10  0 14 43 44 53 56 43  1 61 43  1
 54 56 53 41 43 43 42  1 39 52 63  1 44 59 56 58 46 43 56  6  1 46 43 39
 56  1 51 43  1 57 54 43 39 49  8  0  0 13 50 50 10  0 31 54 43 39 49  6
  1 57 54 43 39 49  8  0  0 18 47 56 57 58  1 15 47 58 47 64 43 52 10  0
 37 53 59  1], shape=(100,), dtype=int32)


In [186]:
batch_size=64

In [187]:
n_batches=len(text)//(64*100)
n_batches

174

In [188]:
dataset=dataset.batch(batch_size,drop_remainder=True)

# Building the Model

In [88]:
from tensorflow.keras import Model,Input,Sequential
from tensorflow.keras import layers

In [105]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.Sequential([
        layers.Input(batch_shape=[batch_size, None]),
        layers.Embedding(vocab_size, embedding_dim),
        layers.LSTM(rnn_units,return_sequences=True,stateful=True,dropout=0.2),
        layers.LSTM(rnn_units,return_sequences=True,stateful=True,dropout=0.2),
        layers.Dense(vocab_size)
    ])
    return model

In [106]:
vocab_size=len(vocab)
embed_dim=100
units=128

In [107]:
model = build_model(
  vocab_size = vocab_size,
  embedding_dim=embed_dim,
  rnn_units=units,
  batch_size=64)

- `stateful=True` treat batches like a long continuous sequence
  
    - as it keeps the hidden stats between batches to remember sequence across batches
    - it requires `n_batches` to be fixed and manually do `model.reset_states()` after each epoch
- `stateful=False` after each batch LSTM resets its hidden state ,forget everything before the batch

In [108]:
model.summary()

In [109]:
# from livelossplot.tf_keras import PlotLossesCallback
ck_point=tf.keras.callbacks.ModelCheckpoint(
    filepath='text_gen.weights.h5',
    save_weights_only=True,
)

In [110]:
def loss(labels,logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels,logits,from_logits=True)

In [111]:
model.compile(optimizer='adam',loss=loss)

In [113]:
# model.load_weights('text_gen1.weights.h5')

In [24]:
epochs=20
for epoch in range(epochs):
    print(f"Epoch {epoch}/{epochs}\n")
    model.fit(dataset,epochs=1,callbacks=[ck_point],shuffle=False)
    for layer in model.layers:
            if hasattr(layer,'reset_states'):
                layer.reset_states()

Epoch 0/20

[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 32ms/step - loss: 2.9606
Epoch 1/20

[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 34ms/step - loss: 2.4128
Epoch 2/20

[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 34ms/step - loss: 2.2789
Epoch 3/20

[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 35ms/step - loss: 2.1963
Epoch 4/20

[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 34ms/step - loss: 2.1356
Epoch 5/20

[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 34ms/step - loss: 2.0879
Epoch 6/20

[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 34ms/step - loss: 2.0490
Epoch 7/20

[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 37ms/step - loss: 2.0148
Epoch 8/20

[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 36ms/step - loss: 1.9862
Epoch 9/20

[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s

# Model Inference

we give the model the starting string ,and every output is fed again as input to predictthe next 

In [25]:
string=u"ROMEO: "

In [26]:
input_string=[char2idx[i] for i in string]

In [27]:
input_string = tf.expand_dims(input_string,0)

In [28]:
input_string

<tf.Tensor: shape=(1, 7), dtype=int32, numpy=array([[30, 27, 25, 17, 27, 10,  1]], dtype=int32)>

In [29]:
for layer in model.layers:
    if hasattr(layer,'reset_states'):
        layer.reset_states()

In [30]:
pred=model.predict(input_string)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 233ms/step


In [31]:
pred.shape

(64, 7, 65)

rebuild the model by keeping architecture but change batch size to 1

In [33]:
model2=build_model(vocab_size,embed_dim,units,batch_size=1)
model2.load_weights('text_gen1.weights.h5')
model2.build(tf.TensorShape([1, None]))

In [34]:
prediction=model2.predict(input_string)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 180ms/step


In [35]:
prediction.shape

(1, 7, 65)

In [36]:
pred = tf.squeeze(prediction, 0)
pred.shape

TensorShape([7, 65])

In [58]:
p=tf.random.categorical(pred,num_samples=1).numpy()

In [59]:
p

array([[15],
       [19],
       [ 7],
       [30],
       [26],
       [ 0],
       [42]])

In [60]:
''.join(idx2char[p[-1][0]])

'd'

In [63]:
p=tf.random.categorical(pred[-1:],num_samples=1)[-1,0].numpy()
p

np.int64(46)

In [64]:
''.join(idx2char[p])

'h'

## Make final generation

In [70]:
def generate_text(model,start_str,num_gen=1000):
    input_string=[char2idx[i] for i in start_str]
    input_string = tf.expand_dims(input_string,0)
    print(input_string.shape)
    for layer in model.layers:
        if hasattr(layer,'reset_states'):
            layer.reset_states()
    generated_text=[]
    # low temp --> more predictable text more coherent
    # high --> more surprising text
    temperature=.3 #  

    for i in range (num_gen):
        prediction=model(input_string)
        pred= tf.squeeze(prediction,0)
        pred=pred/temperature

        pred_id=tf.random.categorical(pred[-1:],num_samples=1)[-1,0].numpy()
        # concatebate the previous inputs with current to be fed together to predict next char
        input_string = tf.concat([input_string, [[pred_id]]], axis=-1)
        generated_text.append(idx2char[pred_id])
        
    return start_str + ''.join(generated_text)

In [74]:
generate_text(model2,start_str=u"Hello ",num_gen=100)

(1, 6)


'Hello make the prome\nThe countred the some and the stand the she meed the aring the with the death and wit'