# Natural Language Generation

In [1]:
# Import libraries
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam

In [2]:
# Load dataset
with open("dataset.txt", "r", encoding="utf-8") as f:
    text = f.read()

print("Dataset length:", len(text))
print("First 500 characters:\n", text[:500])

Dataset length: 1115394
First 500 characters:
 First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.

All:
We know't, we know't.

First Citizen:
Let us kill him, and we'll have corn at our own price.
Is't a verdict?

All:
No more talking on't; let it be done: away, away!

Second Citizen:
One word, good citizens.

First Citizen:
We are accounted poor


In [3]:
# Create character mappings
chars = sorted(list(set(text)))
char_to_int = {c: i for i, c in enumerate(chars)}
int_to_char = {i: c for i, c in enumerate(chars)}

vocab_size = len(chars)
print("Vocabulary size:", vocab_size)

Vocabulary size: 65


In [4]:
# Prepare sequences
seq_length = 100
sequences = []
next_chars = []

for i in range(0, len(text) - seq_length):
    seq = text[i:i + seq_length]
    next_c = text[i + seq_length]
    sequences.append([char_to_int[c] for c in seq])
    next_chars.append(char_to_int[next_c])

print("Total sequences:", len(sequences))

X = np.array(sequences)
y = to_categorical(next_chars, num_classes=vocab_size)

Total sequences: 1115294


In [5]:
# Build model
model = Sequential([
    Embedding(vocab_size, 64, input_length=seq_length),
    LSTM(128, return_sequences=False),
    Dense(vocab_size, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.001))
model.summary()



In [6]:
# Train model
history = model.fit(X, y, batch_size=128, epochs=10)

Epoch 1/10
[1m8714/8714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2726s[0m 311ms/step - loss: 1.9890
Epoch 2/10
[1m8714/8714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2562s[0m 294ms/step - loss: 1.6670
Epoch 3/10
[1m8714/8714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3241s[0m 372ms/step - loss: 1.5786
Epoch 4/10
[1m8714/8714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2257s[0m 259ms/step - loss: 1.5305
Epoch 5/10
[1m8714/8714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1811s[0m 208ms/step - loss: 1.4990
Epoch 6/10
[1m8714/8714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1588s[0m 182ms/step - loss: 1.4758
Epoch 7/10
[1m8714/8714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2260s[0m 259ms/step - loss: 1.4580
Epoch 8/10
[1m8714/8714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2285s[0m 262ms/step - loss: 1.4442
Epoch 9/10
[1m8714/8714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2260s[0m 259ms/step - loss: 1.4322
Epoch 10/10
[1m8714/8714[0

In [7]:
# Save model
model.save("nlg_model.h5")



In [8]:
# Generate text function
def generate_text(seed_text, gen_length=300):
    input_seq = [char_to_int[c] for c in seed_text[-seq_length:]]
    for _ in range(gen_length):
        X_pred = np.array([input_seq[-seq_length:]])
        preds = model.predict(X_pred, verbose=0)[0]
        next_index = np.random.choice(len(preds), p=preds)
        next_char = int_to_char[next_index]
        input_seq.append(next_index)
    generated = ''.join(int_to_char[i] for i in input_seq)
    return generated

In [9]:
# Example generation
seed = "ROMEO: "
generated_text = generate_text(seed, gen_length=300)
print("Generated Text:\n", generated_text)

Generated Text:
 ROMEO: Womiced, i' you widow,
He scover you now,
Would yet business but I name. Igane.
You are on by him for what he therefore truth'd successard
Of away, he is far here to his name fal
She senning Aubutue what's tried, therein.
It not hip, but fortune the wear, to me;
Buside corrily. Can so, but talk, if 
