In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Embedding, SimpleRNN, Dense


In [2]:
path = tf.keras.utils.get_file(
    'shakespeare.txt',
    'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt'
)

text = open(path, 'rb').read().decode(encoding='utf-8')


Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt
[1m1115394/1115394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [3]:
vocab = sorted(set(text))

char2idx = {char: idx for idx, char in enumerate(vocab)}
idx2char = np.array(vocab)


In [4]:
text_as_int = np.array([char2idx[c] for c in text])


In [5]:
seq_length = 100

sequences = []
targets = []

for i in range(0, len(text_as_int) - seq_length):
    sequences.append(text_as_int[i:i + seq_length])
    targets.append(text_as_int[i + 1:i + seq_length + 1])

X = np.array(sequences)
y = np.array(targets)


In [6]:
model = Sequential([
    Input(shape=(seq_length,)),
    Embedding(input_dim=len(vocab), output_dim=64),
    SimpleRNN(128, return_sequences=True),
    Dense(len(vocab), activation='softmax')
])


In [7]:
model.summary()


In [8]:
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy'
)


In [9]:
model.fit(
    X,
    y,
    epochs=5,
    batch_size=64
)



Epoch 1/5
[1m17427/17427[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1148s[0m 66ms/step - loss: 1.7271
Epoch 2/5
[1m17427/17427[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1167s[0m 66ms/step - loss: 1.4740
Epoch 3/5
[1m17427/17427[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1153s[0m 66ms/step - loss: 1.4568
Epoch 4/5
[1m17427/17427[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1155s[0m 66ms/step - loss: 1.4488
Epoch 5/5
[1m17427/17427[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1159s[0m 66ms/step - loss: 1.4446


<keras.src.callbacks.history.History at 0x790f4b767d70>

In [10]:
def generate_text(start_string, num_generate=300):
    input_eval = [char2idx[c] for c in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

    text_generated = []

    for _ in range(num_generate):
        predictions = model(input_eval)
        predictions = predictions[:, -1, :]

        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1, 0].numpy()

        input_eval = tf.concat(
            [input_eval[:, 1:], tf.expand_dims([predicted_id], 0)],
            axis=1
        )

        text_generated.append(idx2char[predicted_id])

    return start_string + ''.join(text_generated)


In [None]:
print(generate_text("ROMEO: "))
