In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
file_path = "/content/drive/MyDrive/Colab Notebooks/Sample-Text.txt"

with open(file_path, 'r', encoding='utf-8') as f:
    text = f.read().lower()


In [3]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# Tokenize text
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])
total_words = len(tokenizer.word_index) + 1

# Create input sequences
input_sequences = []
for line in text.split('\n'):
    tokens = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(tokens)):
        n_gram = tokens[:i+1]
        input_sequences.append(n_gram)

# Pad sequences
max_seq_len = max([len(seq) for seq in input_sequences])
input_sequences = pad_sequences(input_sequences, maxlen=max_seq_len, padding='pre')

# Split into X and y
X = input_sequences[:, :-1]
y = to_categorical(input_sequences[:, -1], num_classes=total_words)


In [16]:
model = Sequential()
model.add(Embedding(input_dim=total_words, output_dim=64, input_shape=(X.shape[1],)))  # ← FIXED
model.add(LSTM(150, return_sequences=True))
model.add(LSTM(100))
model.add(Dense(total_words, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()


In [17]:
model.fit(X, y, epochs=300, verbose=1)


Epoch 1/300
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 272ms/step - accuracy: 0.0428 - loss: 5.0889
Epoch 2/300
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 267ms/step - accuracy: 0.0907 - loss: 4.9562
Epoch 3/300
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 268ms/step - accuracy: 0.0871 - loss: 4.7912
Epoch 4/300
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 508ms/step - accuracy: 0.0881 - loss: 4.6805
Epoch 5/300
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 268ms/step - accuracy: 0.0836 - loss: 4.6683
Epoch 6/300
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 273ms/step - accuracy: 0.0835 - loss: 4.6421
Epoch 7/300
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 268ms/step - accuracy: 0.1025 - loss: 4.5540
Epoch 8/300
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 545ms/step - accuracy: 0.1074 - loss: 4.5593
Epoch 9/300
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x786d0250ae90>

In [23]:
def generate_text(seed_text, next_words=20):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=X.shape[1], padding='pre')
        predicted_probs = model.predict(token_list, verbose=0)
        predicted_index = np.argmax(predicted_probs)
        predicted_word = tokenizer.index_word.get(predicted_index, '')

        if not predicted_word:
            break
        seed_text += " " + predicted_word
    return seed_text

# Example
print(generate_text("This year is", next_words=20))

This year is however the usual cheerful atmosphere was tinged with a sense of unease a shadowy figure shrouded in a dark cloak
