In [None]:

import numpy as np
import string
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.utils import to_categorical, Sequence
from tensorflow.keras.callbacks import EarlyStopping


with open("pg100.txt", "r", encoding="utf-8") as file:
    text = file.read().lower()


text = text.translate(str.maketrans("", "", string.punctuation))



chars = sorted(set(text))
char2idx = {char: idx for idx, char in enumerate(chars)}
idx2char = {idx: char for char, idx in char2idx.items()}
vocab_size = len(chars)



class TextSequenceGenerator(Sequence):
    def __init__(self, text, char2idx, seq_length, batch_size, vocab_size):
        self.text = text
        self.char2idx = char2idx
        self.seq_length = seq_length
        self.batch_size = batch_size
        self.vocab_size = vocab_size
        self.steps = (len(text) - seq_length - 1) // batch_size

    def __len__(self):
        return self.steps

    def __getitem__(self, idx):
        X_batch = []
        y_batch = []
        start = idx * self.batch_size
        for i in range(start, start + self.batch_size):
            seq_in = self.text[i:i + self.seq_length]
            seq_out = self.text[i + self.seq_length]
            X_batch.append([self.char2idx[c] for c in seq_in])
            y_batch.append(self.char2idx[seq_out])
        return np.array(X_batch), to_categorical(y_batch, num_classes=self.vocab_size)



sequence_length = 100
batch_size = 128
epochs = 20



model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=64, input_length=sequence_length))
model.add(LSTM(128))
model.add(Dense(vocab_size, activation="softmax"))

model.compile(loss="categorical_crossentropy", optimizer="adam")



train_generator = TextSequenceGenerator(text, char2idx, sequence_length, batch_size, vocab_size)
early_stop = EarlyStopping(monitor="loss", patience=3)

model.fit(train_generator, epochs=epochs, callbacks=[early_stop])



start_index = np.random.randint(0, len(text) - sequence_length - 1)
seed_text = text[start_index:start_index + sequence_length]
generated_text = seed_text

for _ in range(500):
    input_sequence = np.array([[char2idx[char] for char in seed_text]])
    prediction = model.predict(input_sequence, verbose=0)[0]
    next_index = np.argmax(prediction)
    next_char = idx2char[next_index]

    generated_text += next_char
    seed_text = seed_text[1:] + next_char



with open("generated_output.txt", "w", encoding="utf-8") as output_file:
    output_file.write("Seed Text:\n" + seed_text + "\n\nGenerated Text:\n" + generated_text)

print("✅ Text generation complete! Check 'generated_output.txt'")


In [None]:


import numpy as np
import string
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.utils import to_categorical, Sequence
from tensorflow.keras.callbacks import EarlyStopping


with open("pg100.txt", "r", encoding="utf-8") as file:
    text = file.read(200000).lower()  # Limit to first 200k characters for speed

# Remove punctuation
text = text.translate(str.maketrans("", "", string.punctuation))


chars = sorted(set(text))
char2idx = {char: idx for idx, char in enumerate(chars)}
idx2char = {idx: char for char, idx in char2idx.items()}
vocab_size = len(chars)


class TextSequenceGenerator(Sequence):
    def __init__(self, text, char2idx, seq_length, batch_size, vocab_size, step=5):
        self.text = text
        self.char2idx = char2idx
        self.seq_length = seq_length
        self.batch_size = batch_size
        self.vocab_size = vocab_size
        self.step = step
        self.indices = list(range(0, len(text) - seq_length - 1, step))
        self.steps = len(self.indices) // batch_size

    def __len__(self):
        return self.steps

    def __getitem__(self, idx):
        X_batch = []
        y_batch = []
        start = idx * self.batch_size
        for i in range(start, start + self.batch_size):
            seq_start = self.indices[i]
            seq_in = self.text[seq_start:seq_start + self.seq_length]
            seq_out = self.text[seq_start + self.seq_length]
            X_batch.append([self.char2idx[c] for c in seq_in])
            y_batch.append(self.char2idx[seq_out])
        return np.array(X_batch), to_categorical(y_batch, num_classes=self.vocab_size)



seq_length = 100
batch_size = 128
epochs = 5  # Fewer epochs for speed
step = 5 


model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=64, input_length=seq_length))
model.add(LSTM(64))  # Reduced size LSTM
model.add(Dense(vocab_size, activation="softmax"))

model.compile(loss="categorical_crossentropy", optimizer="adam")



train_generator = TextSequenceGenerator(text, char2idx, seq_length, batch_size, vocab_size, step=step)
early_stop = EarlyStopping(monitor="loss", patience=2)

model.fit(train_generator, epochs=epochs, callbacks=[early_stop])



start_index = np.random.randint(0, len(text) - seq_length - 1)
seed_text = text[start_index:start_index + seq_length]
generated_text = seed_text

for _ in range(500):
    input_sequence = np.array([[char2idx[char] for char in seed_text]])
    pred = model.predict(input_sequence, verbose=0)[0]
    next_index = np.argmax(pred)
    next_char = idx2char[next_index]

    generated_text += next_char
    seed_text = seed_text[1:] + next_char



with open("generated_output.txt", "w", encoding="utf-8") as output_file:
    output_file.write("Seed Text:\n" + seed_text + "\n\nGenerated Text:\n" + generated_text)

print("✅ Quick text generation complete! Check 'generated_output.txt'")


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
✅ Quick text generation complete! Check 'generated_output.txt'


In [2]:


import numpy as np
import string
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.utils import to_categorical, Sequence
from tensorflow.keras.callbacks import EarlyStopping



with open("pg100.txt", "r", encoding="utf-8") as file:
    text = file.read(300000).lower()  # Limit to first 200k characters for speed


text = text.translate(str.maketrans("", "", string.punctuation))


chars = sorted(set(text))
char2idx = {char: idx for idx, char in enumerate(chars)}
idx2char = {idx: char for char, idx in char2idx.items()}
vocab_size = len(chars)



class TextSequenceGenerator(Sequence):
    def __init__(self, text, char2idx, seq_length, batch_size, vocab_size, step=5):
        self.text = text
        self.char2idx = char2idx
        self.seq_length = seq_length
        self.batch_size = batch_size
        self.vocab_size = vocab_size
        self.step = step
        self.indices = list(range(0, len(text) - seq_length - 1, step))
        self.steps = len(self.indices) // batch_size

    def __len__(self):
        return self.steps

    def __getitem__(self, idx):
        X_batch = []
        y_batch = []
        start = idx * self.batch_size
        for i in range(start, start + self.batch_size):
            seq_start = self.indices[i]
            seq_in = self.text[seq_start:seq_start + self.seq_length]
            seq_out = self.text[seq_start + self.seq_length]
            X_batch.append([self.char2idx[c] for c in seq_in])
            y_batch.append(self.char2idx[seq_out])
        return np.array(X_batch), to_categorical(y_batch, num_classes=self.vocab_size)


seq_length = 100
batch_size = 128
epochs = 10  # Fewer epochs for speed
step = 5  



model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=64, input_length=seq_length))
model.add(LSTM(64))  # Reduced size LSTM
model.add(Dense(vocab_size, activation="softmax"))

model.compile(loss="categorical_crossentropy", optimizer="adam")



train_generator = TextSequenceGenerator(text, char2idx, seq_length, batch_size, vocab_size, step=step)
early_stop = EarlyStopping(monitor="loss", patience=2)

model.fit(train_generator, epochs=epochs, callbacks=[early_stop])



start_index = np.random.randint(0, len(text) - seq_length - 1)
seed_text = text[start_index:start_index + seq_length]
generated_text = seed_text

for _ in range(500):
    input_sequence = np.array([[char2idx[char] for char in seed_text]])
    prediction = model.predict(input_sequence, verbose=0)[0]
    next_index = np.argmax(prediction)
    next_char = idx2char[next_index]

    generated_text += next_char
    seed_text = seed_text[1:] + next_char



with open("generated_output_new.txt", "w", encoding="utf-8") as out_file:
    out_file.write("Seed Text:\n" + seed_text + "\n\nGenerated Text:\n" + generated_text)

print("✅ Quick text generation complete! Check 'generated_output.txt'")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
✅ Quick text generation complete! Check 'generated_output.txt'
