# Import Dependencies

In [2]:
import nltk
import numpy as np
import tensorflow as tf
from nltk.corpus import treebank
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [3]:
# Download the Penn Treebank dataset
nltk.download('treebank')

# Load the dataset
sentences = treebank.sents()
corpus = [' '.join(sentence) for sentence in sentences]

[nltk_data] Downloading package treebank to
[nltk_data]     /Users/ozairrahman/nltk_data...
[nltk_data]   Package treebank is already up-to-date!


In [4]:
# Tokenization
tokenizer = Tokenizer()
tokenizer.fit_on_texts(corpus)
total_words = len(tokenizer.word_index) + 1

In [5]:
# Create input sequences
input_sequences = []
for line in corpus:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i + 1]
        input_sequences.append(n_gram_sequence)

In [6]:
# Pad sequences
max_sequence_length = max(len(x) for x in input_sequences)
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_length, padding='pre')

In [7]:
# Create predictors and label
X, y = input_sequences[:, :-1], input_sequences[:, -1]
y = tf.keras.utils.to_categorical(y, num_classes=total_words)

# Building Model

In [11]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(total_words, 100, input_length=max_sequence_length - 1),
    tf.keras.layers.LSTM(100),
    tf.keras.layers.Dense(total_words, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, y, epochs=50, verbose=1)



Epoch 1/50
[1m2781/2781[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m297s[0m 106ms/step - accuracy: 0.0556 - loss: 7.2827
Epoch 2/50
[1m2781/2781[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m303s[0m 109ms/step - accuracy: 0.1281 - loss: 6.2461
Epoch 3/50
[1m2781/2781[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m297s[0m 107ms/step - accuracy: 0.1647 - loss: 5.6673
Epoch 4/50
[1m2781/2781[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m299s[0m 107ms/step - accuracy: 0.1911 - loss: 5.2028
Epoch 5/50
[1m2781/2781[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m307s[0m 110ms/step - accuracy: 0.2149 - loss: 4.7979
Epoch 6/50
[1m2781/2781[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m298s[0m 107ms/step - accuracy: 0.2407 - loss: 4.4485
Epoch 7/50
[1m2781/2781[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m302s[0m 109ms/step - accuracy: 0.2697 - loss: 4.1147
Epoch 8/50
[1m2781/2781[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m303s[0m 109ms/step - accuracy: 0.3037 - loss:

<keras.src.callbacks.history.History at 0x17c351f30>

# Save Model

In [12]:
model.save('model_2.keras')