In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Embedding
from tensorflow.keras.utils import to_categorical
import requests

In [2]:
url = 'https://www.gutenberg.org/files/1524/1524-0.txt'  # Hamlet
response = requests.get(url)
text = response.text.lower()
text = text[:200000]   # limit for faster training

In [3]:
chars = sorted(list(set(text)))
char_to_idx = {c: i for i, c in enumerate(chars)}
idx_to_char = {i: c for i, c in enumerate(chars)}
vocab_size = len(chars)
print("Total unique characters:", vocab_size)

Total unique characters: 49


In [4]:
seq_length = 40
X = []
y = []
for i in range(len(text) - seq_length):
    seq_in = text[i:i + seq_length]
    seq_out = text[i + seq_length]
    X.append([char_to_idx[ch] for ch in seq_in])
    y.append(char_to_idx[seq_out])

X = np.array(X)
y = to_categorical(y, num_classes=vocab_size)
print("Total sequences:", len(X))

Total sequences: 178022


In [6]:
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=64, input_length=seq_length),
    SimpleRNN(128),
    Dense(vocab_size, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam')
model.summary()

In [7]:
model.fit(X, y, batch_size=128, epochs=3)

Epoch 1/3
[1m1391/1391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 39ms/step - loss: 2.3048
Epoch 2/3
[1m1391/1391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 44ms/step - loss: 1.9329
Epoch 3/3
[1m1391/1391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 43ms/step - loss: 1.8058


<keras.src.callbacks.history.History at 0x23523e3dd90>

In [8]:
def sample_with_temperature(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    log_preds = np.log(preds + 1e-12) / temperature
    exp_preds = np.exp(log_preds)
    probs = exp_preds / np.sum(exp_preds)
    return np.random.choice(range(len(probs)), p=probs)

In [9]:
def generate_text(model, seed_text, length=200, temperature=1.0):
    if len(seed_text) < seq_length:
        seed_text = ' ' * (seq_length - len(seed_text)) + seed_text
    seed = seed_text[-seq_length:]
    generated = seed
    for _ in range(length):
        x_pred = np.array([[char_to_idx.get(c, 0) for c in seed]])
        preds = model.predict(x_pred, verbose=0)[0]
        next_index = sample_with_temperature(preds, temperature)
        next_char = idx_to_char[next_index]
        generated += next_char
        seed = seed[1:] + next_char
    return generated

In [10]:
seed = text[:40]
print("Seed:", repr(seed))
print("\nGenerated:\n", generate_text(model, seed, length=500, temperature=0.8))

Seed: '*** start of the project gutenberg ebook'

Generated:
 *** start of the project gutenberg ebooke.

way, but the gords indasting stald sone.

hamlet.
gurestars.
i do the cat is but trou to this wall be shat would majuse;
of end gupbly a vall a f on ow the ’tave to the well
that this stoit in chation so bot the onco.

porngiffald.

hamlet.
i wat their dage. so the of with you, my soter of acaicherous of o deed the his hamlet, it his berarsi bustle
that well the the the lood that a wall barn. dreave us this munker prown him.

wing.
o deed
his brearifu
his some will lood,
lay then my that the
