## Character-Level Text Generator

In [1]:
import requests

# Load the tiny Shakespeare dataset
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
text = requests.get(url).text

print(f"Dataset length (characters): {len(text)}")
print("Sample:\n", text[:1000])

Dataset length (characters): 1115394
Sample:
 First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.

All:
We know't, we know't.

First Citizen:
Let us kill him, and we'll have corn at our own price.
Is't a verdict?

All:
No more talking on't; let it be done: away, away!

Second Citizen:
One word, good citizens.

First Citizen:
We are accounted poor citizens, the patricians good.
What authority surfeits on would relieve us: if they
would yield us but the superfluity, while it were
wholesome, we might guess they relieved us humanely;
but they think we are too dear: the leanness that
afflicts us, the object of our misery, is as an
inventory to particularise their abundance; our
sufferance is a gain to them Let us revenge this with
our pikes, ere we become rakes: for the gods know I
speak this in 

In [2]:
# Create character-level vocabulary
chars = sorted(list(set(text)))
vocab_size = len(chars)

# Mapping from char to int and vice versa
char2idx = {ch: i for i, ch in enumerate(chars)}
idx2char = {i: ch for i, ch in enumerate(chars)}

# Encode the entire dataset
encoded_text = [char2idx[ch] for ch in text]

print(f"Unique characters: {vocab_size}")
print(f"Sample encoding: {encoded_text[:10]}")

Unique characters: 65
Sample encoding: [18, 47, 56, 57, 58, 1, 15, 47, 58, 47]


In [3]:
seq_length = 100
step = 1  # Move one character at a time (overlapping sequences)

input_sequences = []
target_chars = []

for i in range(0, len(encoded_text) - seq_length, step):
    input_sequences.append(encoded_text[i: i + seq_length])
    target_chars.append(encoded_text[i + seq_length])

In [4]:
import numpy as np

X = np.array(input_sequences)
y = np.array(target_chars)

print(f"Input shape: {X.shape}")
print(f"Target shape: {y.shape}")

Input shape: (1115294, 100)
Target shape: (1115294,)


In [5]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

vocab_size = len(char2idx)  # Total number of unique characters

model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=64, input_length=seq_length),
    LSTM(128, return_sequences=False),
    Dense(vocab_size, activation='softmax')
])



In [6]:
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

model.summary()

In [7]:
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(
    monitor='loss',        # Since we don’t have validation split here, use training loss
    patience=3,            # Wait 3 epochs without improvement
    restore_best_weights=True
)

In [8]:
from tensorflow.keras.callbacks import ReduceLROnPlateau

reduce_lr = ReduceLROnPlateau(
    monitor='loss',
    factor=0.5,            # Reduce LR by half
    patience=2,            # After 2 epochs of no improvement
    verbose=1
)

In [9]:
history = model.fit(
    X, y,
    batch_size=128,
    epochs=10,
    callbacks=[early_stop, reduce_lr]
)

Epoch 1/10
[1m8714/8714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m957s[0m 108ms/step - accuracy: 0.3599 - loss: 2.2612 - learning_rate: 0.0010
Epoch 2/10
[1m8714/8714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m958s[0m 110ms/step - accuracy: 0.4983 - loss: 1.6920 - learning_rate: 0.0010
Epoch 3/10
[1m8714/8714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m974s[0m 112ms/step - accuracy: 0.5260 - loss: 1.5824 - learning_rate: 0.0010
Epoch 4/10
[1m8714/8714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m988s[0m 113ms/step - accuracy: 0.5416 - loss: 1.5225 - learning_rate: 0.0010
Epoch 5/10
[1m8714/8714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m987s[0m 113ms/step - accuracy: 0.5501 - loss: 1.4905 - learning_rate: 0.0010
Epoch 6/10
[1m8714/8714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1002s[0m 115ms/step - accuracy: 0.5576 - loss: 1.4620 - learning_rate: 0.0010
Epoch 7/10
[1m8714/8714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1016s[0m 117ms/step - accu

In [10]:
import numpy as np

def sample_next_char(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds + 1e-8) / temperature  # log for numerical stability
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [11]:
def generate_text(model, seed_text, gen_length=300, temperature=1.0):
    model_input = [char2idx[c] for c in seed_text]
    generated = seed_text

    for _ in range(gen_length):
        input_seq = np.array(model_input[-seq_length:]).reshape(1, -1)
        preds = model.predict(input_seq, verbose=0)[0]
        next_index = sample_next_char(preds, temperature)
        next_char = idx2char[next_index]

        generated += next_char
        model_input.append(next_index)

    return generated

In [12]:
seed = "ROMEO: "  # Can be any string from dataset
print(generate_text(model, seed, gen_length=500, temperature=0.8))

ROMEO: What you have
griefs: in't a kings of heavy could to death with more,
Thought with ones, 'tis bear his warming before it
Edward's sorrow of touch away the earth.
My supplad on our grough, throw thou should by haw
To prince in her swards and threep the things,
That laughter the lords: by the devil upon his foot
Ere suppey about the vantage.

MISARDO:
And gentle!

GREGORY:
This is give himself, I be heads and yed
And rest not holds and come Warwick of the gaze:
For they will to meet, my court?

SI


### SAVE MODEL

In [24]:
model.save("saved_models/lstm_char_model.keras")  # No .h5 extension

In [26]:
from tensorflow.keras.models import load_model
model = load_model("saved_models/lstm_char_model.keras")

  saveable.load_own_variables(weights_store.get(inner_path))
