In [1]:
import tensorflow as tf
import numpy as np
import os

In [2]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

In [3]:
with open(path_to_file,encoding='utf-8') as f:
    text=f.read()

In [4]:
vocab = sorted(set(text))

In [5]:
vocab

['\n',
 ' ',
 '!',
 '$',
 '&',
 "'",
 ',',
 '-',
 '.',
 '3',
 ':',
 ';',
 '?',
 'A',
 'B',
 'C',
 'D',
 'E',
 'F',
 'G',
 'H',
 'I',
 'J',
 'K',
 'L',
 'M',
 'N',
 'O',
 'P',
 'Q',
 'R',
 'S',
 'T',
 'U',
 'V',
 'W',
 'X',
 'Y',
 'Z',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z']

# Data Preparation

In [72]:
idx2char=dict(enumerate(vocab))
char2idx = {w:i for i,w in idx2char.items()}

In [73]:
idx2char[0]

'\n'

In [74]:
len(text)

1115394

In [75]:
encoded = np.array([char2idx[c] for c in text], dtype=np.int32)

In [76]:
len(encoded)

1115394

In [77]:
text[:10],encoded[:10]

('First Citi', array([18, 47, 56, 57, 58,  1, 15, 47, 58, 47], dtype=int32))

In [175]:
char_dataset=tf.data.Dataset.from_tensor_slices(encoded)

In [176]:
index2char = np.array(vocab)

In [177]:
for i in char_dataset.take(5):
    print(index2char[i.numpy()])

F
i
r
s
t


In [178]:
# batch method easily convert data into sequences of desired size
sequences=char_dataset.batch(batch_size=seq_len+1,drop_remainder=True)

In [179]:
seq_len = 100

In [180]:
for i in sequences.take(1):
    print(repr(''.join(index2char[i.numpy()])))

'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '


In [184]:
def shift(chunk):
    x =chunk[:-1]
    y =chunk[1:]
    return x,y
dataset=sequences.map(shift)

In [185]:
for i ,t in dataset.take(1):
    print(i,t)

tf.Tensor(
[18 47 56 57 58  1 15 47 58 47 64 43 52 10  0 14 43 44 53 56 43  1 61 43
  1 54 56 53 41 43 43 42  1 39 52 63  1 44 59 56 58 46 43 56  6  1 46 43
 39 56  1 51 43  1 57 54 43 39 49  8  0  0 13 50 50 10  0 31 54 43 39 49
  6  1 57 54 43 39 49  8  0  0 18 47 56 57 58  1 15 47 58 47 64 43 52 10
  0 37 53 59], shape=(100,), dtype=int32) tf.Tensor(
[47 56 57 58  1 15 47 58 47 64 43 52 10  0 14 43 44 53 56 43  1 61 43  1
 54 56 53 41 43 43 42  1 39 52 63  1 44 59 56 58 46 43 56  6  1 46 43 39
 56  1 51 43  1 57 54 43 39 49  8  0  0 13 50 50 10  0 31 54 43 39 49  6
  1 57 54 43 39 49  8  0  0 18 47 56 57 58  1 15 47 58 47 64 43 52 10  0
 37 53 59  1], shape=(100,), dtype=int32)


In [186]:
batch_size=64

In [187]:
n_batches=len(text)//(64*100)
n_batches

174

In [188]:
dataset=dataset.batch(batch_size,drop_remainder=True)

In [189]:
from tensorflow.keras import Model,Input,Sequential
from tensorflow.keras import layers

In [190]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.Sequential([
        layers.Input(batch_shape=[batch_size, None]),
        layers.Embedding(vocab_size, embedding_dim),
        layers.LSTM(rnn_units,return_sequences=True,stateful=True,dropout=0.2),
        layers.LSTM(rnn_units,return_sequences=True,stateful=True,dropout=0.2),
        layers.Dense(vocab_size)
    ])
    return model

In [191]:
vocab_size=len(vocab)
embed_dim=256
units=512

In [192]:
model = build_model(
  vocab_size = vocab_size,
  embedding_dim=embed_dim,
  rnn_units=units,
  batch_size=64)

- `stateful=True` treat batches like a long continuous sequence
  
    - as it keeps the hidden stats between batches to remember sequence across batches
    - it requires `n_batches` to be fixed and manually do `model.reset_states()` after each epoch
- `stateful=False` after each batch LSTM resets its hidden state ,forget everything before the batch

In [195]:
model.summary()

In [196]:
# from livelossplot.tf_keras import PlotLossesCallback
ck_point=tf.keras.callbacks.ModelCheckpoint(
    filepath='text_gen.weights.h5',
    save_weights_only=True,
)

In [197]:
def loss(labels,logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels,logits,from_logits=True)

In [198]:
model.compile(optimizer='adam',loss=loss)

In [225]:
model.load_weights('text_gen.weights.h5')

In [226]:
epochs=100
for epoch in range(epochs):
    print(f"Epoch {epoch}/{epochs}\n")
    model.fit(dataset,epochs=1,callbacks=[ck_point],shuffle=False)
    for layer in model.layers:
            if hasattr(layer,'reset_states'):
                layer.reset_states()

[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 27ms/step - loss: 1.5913
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 30ms/step - loss: 1.5905
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 28ms/step - loss: 1.5897
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 29ms/step - loss: 1.5890
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 30ms/step - loss: 1.5882
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 30ms/step - loss: 1.5874
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 29ms/step - loss: 1.5866
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 29ms/step - loss: 1.5859
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 30ms/step - loss: 1.5851
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 31ms/step - loss: 1.5844
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 31ms/step - loss: 1.5836

In [None]:
Try GRU

In [99]:
# model.build()

# make prediction

we give the model the starting string ,and every output is fed again as input to predictthe next 

In [200]:
string=u"ROMEO: "

In [201]:
input_string=[char2idx[i] for i in string]

In [202]:
input_string = tf.expand_dims(input_string,0)

In [203]:
input_string

<tf.Tensor: shape=(1, 7), dtype=int32, numpy=array([[30, 27, 25, 17, 27, 10,  1]], dtype=int32)>

In [204]:
for layer in model.layers:
    if hasattr(layer,'reset_states'):
        layer.reset_states()

In [205]:
pred=model.predict(input_string)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 155ms/step


In [206]:
pred.shape

(64, 7, 65)

rebuild the model by keeping architecture but change batch size to 1

In [None]:
model2=build_model(vocab_size,embed_dim,units,batch_size=1)
model2.load_weights('text_gen.weights.h5')
model2.build(tf.TensorShape([1, None]))

In [233]:
prediction=model2.predict(input_string)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step


In [234]:
prediction.shape

(1, 7, 65)

In [235]:
pred = tf.squeeze(prediction, 0)
pred.shape

TensorShape([7, 65])

In [236]:
p=tf.random.categorical(pred,num_samples=1).numpy()

In [237]:
p

array([[47],
       [10],
       [17],
       [10],
       [26],
       [ 0],
       [21]])

In [214]:
''.join(index2char[p[-1][0]])

'A'

In [244]:
tf.random.categorical(pred[-1:],num_samples=1)[-1,0].numpy()

np.int64(21)

In [267]:
def generate_text(model,start_str,num_gen=1000):
    input_string=[char2idx[i] for i in start_str]
    input_string = tf.expand_dims(input_string,0)
    print(input_string.shape)
    for layer in model.layers:
        if hasattr(layer,'reset_states'):
            layer.reset_states()
    generated_text=[]
    # low temp --> more predictable text more coherent
    # high --> more surprising text
    temperature=.3 #  

    for i in range (num_gen):
        prediction=model(input_string)
        pred= tf.squeeze(prediction,0)
        pred=pred/temperature

        pred_id=tf.random.categorical(pred[-1:],num_samples=1)[-1,0].numpy()
        input_string = tf.concat([input_string, [[pred_id]]], axis=-1)
        generated_text.append(index2char[pred_id])
        
    return start_str + ''.join(generated_text)

In [268]:
generate_text(model2,start_str=u"ROMEO: ",num_gen=100)

(1, 7)


'ROMEO: I have so stand the comes she shall death the words the come to the have to the rath with to me the '

In [None]:
seq_len=150

In [None]:
"ROMEO: Why 'it\nLefur, furbhter a rather or thought ceeress fair atquarty priths\nThat you shone, now\nsirets convery me my weach meet, we this goner:\nYou and grace, which and and lord, God; My a pyshert: yet m"

In [None]:
# change temperature 
# number of layer 
# use bi-lstm
# more ebpochs =30

In [None]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.Sequential([
        layers.Input(batch_shape=[batch_size, None]),
        layers.Embedding(vocab_size, embedding_dim),
        layers.LSTM(rnn_units, return_sequences=True, stateful=True),
        layers.Dense(vocab_size, activation='softmax')  # Add softmax
    ])
    return model

# Then change loss function:
def loss(labels, predictions):
    return tf.keras.losses.sparse_categorical_crossentropy(
        labels, predictions, from_logits=False  # Change to False
    )

# And in generation, skip the softmax step:
def generate_text(model, start_str, num_gen=1000, temperature=1.0):
    # ...
    predictions = model(input_eval)
    predictions = tf.squeeze(predictions, 0)[-1, :]
    
    # Already probabilities, but can still apply temperature:
    # (though this is less clean mathematically)
    predictions = tf.math.pow(predictions, 1/temperature)
    predictions = predictions / tf.reduce_sum(predictions)
    
    predicted_id = tf.random.categorical(
        tf.expand_dims(tf.math.log(predictions), 0),  # Need log for categorical
        num_samples=1
    )[-1, 0].numpy()