In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
from tensorflow.keras.utils import to_categorical
import requests

In [3]:
url = 'https://www.gutenberg.org/files/1524/1524-0.txt' 
response = requests.get(url)
text = response.text.lower()

In [4]:
text = text[:200000]   # use only first 200k chars

In [5]:
chars = sorted(list(set(text)))
char_to_idx = {c: i for i, c in enumerate(chars)}
idx_to_char = {i: c for i, c in enumerate(chars)}
vocab_size = len(chars)
print("Total unique characters:", vocab_size)

Total unique characters: 49


In [6]:
seq_length = 40
X = []
y = []
for i in range(len(text) - seq_length):
    seq_in = text[i:i + seq_length]
    seq_out = text[i + seq_length]
    X.append([char_to_idx[ch] for ch in seq_in])
    y.append(char_to_idx[seq_out])

In [7]:
X = np.array(X)
y = to_categorical(y, num_classes=vocab_size)

In [8]:
print("Total sequences:", len(X))

Total sequences: 178022


In [13]:
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=64, input_length=seq_length),
    LSTM(128),
    Dense(vocab_size, activation='softmax')
])

In [14]:
model.compile(loss='categorical_crossentropy', optimizer='adam')
model.summary()

In [16]:
model.fit(X, y, batch_size=128, epochs=5)

Epoch 1/5
[1m1391/1391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m205s[0m 147ms/step - loss: 2.3215
Epoch 2/5
[1m1391/1391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m227s[0m 163ms/step - loss: 1.9370
Epoch 3/5
[1m1391/1391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m268s[0m 193ms/step - loss: 1.7931
Epoch 4/5
[1m1391/1391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m196s[0m 141ms/step - loss: 1.7057
Epoch 5/5
[1m1391/1391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m247s[0m 177ms/step - loss: 1.6398


<keras.src.callbacks.history.History at 0x13201b76ae0>

In [17]:
def sample_with_temperature(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    if temperature <= 0:
        return np.argmax(preds)
    log_preds = np.log(preds + 1e-12) / temperature
    exp_preds = np.exp(log_preds)
    probs = exp_preds / np.sum(exp_preds)
    return np.random.choice(range(len(probs)), p=probs)

In [18]:
def generate_text(model, seed_text, length=200, temperature=1.0):
    if len(seed_text) < seq_length:
        seed_text = ' ' * (seq_length - len(seed_text)) + seed_text
    seed = seed_text[-seq_length:]
    generated = seed
    for _ in range(length):
        x_pred = np.array([[char_to_idx.get(c, 0) for c in seed]])
        preds = model.predict(x_pred, verbose=0)[0]
        next_index = sample_with_temperature(preds, temperature)
        next_char = idx_to_char[next_index]
        generated += next_char
        seed = seed[1:] + next_char
    return generated

In [19]:
seed = text[:40]
print("Seed:", repr(seed))
print("\nGenerated:\n", generate_text(model, seed, length=500, temperature=0.8))

Seed: '*** start of the project gutenberg ebook'

Generated:
 *** start of the project gutenberg ebook the know.

foretalie.
that enseal not priend; in ad yor some him;
that trace methere, sich the call winks to
the sing the cleature.

barnardo.
from my maction, and hearte say?
as in show in the conterigition in the caration;
friencly come a matter night the septor
for the pantingle and will this trued their of partly.
    that is, where love sillion hadds are breat,—
   my lord.

hamlet.
as wold a bring, from them. father entelath even give and more the bod ofter-we trulg.

[_exeunt._]

scent g
