In [1]:
#All modules and Libraries Imports
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint


  if not hasattr(np, "object"):


In [2]:
#GPU Setup
print("Available GPUs:", tf.config.list_physical_devices('GPU'))

for gpu in tf.config.list_physical_devices('GPU'):
    tf.config.experimental.set_memory_growth(gpu, True)


Available GPUs: []


In [3]:
#Load MULTIPLE .txt Files
DATASET_PATH = r"C:\Users\sagar\Documents\New folder (2)"

def load_text(folder):
    texts = []
    for file in os.listdir(folder):
        if file.endswith(".txt"):
            with open(os.path.join(folder, file),
                      encoding="utf-8", errors="ignore") as f:
                texts.append(f.read().lower())
    return "\n".join(texts)

text = load_text(DATASET_PATH)
print("Total characters:", len(text))


Total characters: 6285444


In [5]:
#Character Tokenization
chars = sorted(set(text))
vocab_size = len(chars)

char_to_idx = {c: i for i, c in enumerate(chars)}
idx_to_char = {i: c for i, c in enumerate(chars)}

encoded_text = np.array([char_to_idx[c] for c in text])


In [8]:
#Memory-Safe Data Generator
SEQ_LENGTH = 50
BATCH_SIZE = 64

def data_generator(encoded, seq_len, batch_size):
    while True:
        X, y = [], []
        for _ in range(batch_size):
            idx = np.random.randint(0, len(encoded) - seq_len - 1)
            X.append(encoded[idx:idx + seq_len])
            y.append(encoded[idx + seq_len])
        yield np.array(X), np.array(y)

steps_per_epoch = len(encoded_text) // (SEQ_LENGTH * BATCH_SIZE)


In [9]:
#Optimized LSTM Model
model = Sequential([
    Embedding(vocab_size, 128),
    LSTM(256, return_sequences=True),
    Dropout(0.2),
    LSTM(128),
    Dense(vocab_size, activation="softmax")
])

model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy"
)

model.summary()


In [11]:
#Training
callbacks = [
    EarlyStopping(monitor="loss", patience=3),
    ModelCheckpoint(
        "lstm_text_generator.keras",
        monitor="loss",
        save_best_only=True
    )
]

model.fit(
    data_generator(encoded_text, SEQ_LENGTH, BATCH_SIZE),
    steps_per_epoch=steps_per_epoch,
    epochs=2,
    callbacks=callbacks
)


Epoch 1/2
[1m1964/1964[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m644s[0m 328ms/step - loss: 2.3321
Epoch 2/2
[1m1964/1964[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m667s[0m 340ms/step - loss: 2.0048


<keras.src.callbacks.history.History at 0x210d0747610>

In [12]:
#Temperature Sampling
def sample_with_temperature(preds, temperature=0.8):
    preds = np.log(preds + 1e-8) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    return np.random.choice(len(preds), p=preds)


In [14]:
#Generate Text OUTPUT
def generate_text(seed_text, length=300, temperature=0.8):
    seed_text = seed_text.lower()
    result = seed_text

    for _ in range(length):
        encoded = [char_to_idx.get(c, 0) for c in seed_text]
        encoded = pad_sequences([encoded], maxlen=SEQ_LENGTH, truncating="pre")

        preds = model.predict(encoded, verbose=0)[0]
        next_idx = sample_with_temperature(preds, temperature)
        next_char = idx_to_char[next_idx]

        result += next_char
        seed_text += next_char
        seed_text = seed_text[1:]

    return result


In [15]:
#OUTPUT
print("\nGenerated Text:\n")
print(generate_text("to be or not to be", length=400, temperature=0.7))



Generated Text:

to be or not to benenting about at babmon in the his lidaud moon the harry masted out the with the loo cout his you rest apout to ham lith muin!’

“momed,” snormon was wall said acle iom a gow a shisce in the corceately dook, and weren. whour that he waw mastly a hay witklil of bear of mutigald of the mele, brace to the wand gotter reemy of to con fourd the said of had and that wempooged to who grape bewed bething 
