In [None]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, Embedding, Dropout
from tensorflow.keras.optimizers import Adam
import pickle

# Load data
X = np.load('../data/X.npy')
y = np.load('../data/y.npy')

with open('../data/tokenizer.pkl', 'rb') as f:
    tokenizer = pickle.load(f)

vocab_size = len(tokenizer.word_index) + 1
max_sequence_length = X.shape[1]
embedding_dim = 100

print(f"Vocab size: {vocab_size}")
print(f"Sequence length: {max_sequence_length}")

# **Train Deep RNN Model**
This model uses multiple SimpleRNN layers with Dropout for regularization.

In [None]:
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim))

# First RNN layer
model.add(SimpleRNN(units=128, return_sequences=True))
model.add(Dropout(0.2))

# Second RNN layer
model.add(SimpleRNN(units=128, return_sequences=True))
model.add(Dropout(0.2))

# Third RNN layer
model.add(SimpleRNN(units=128))
model.add(Dropout(0.2))

# Output layer
model.add(Dense(units=vocab_size, activation='softmax'))

# Build model to see summary
model.build(input_shape=(None, max_sequence_length))

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

In [None]:
history = model.fit(X, y, epochs=10, batch_size=32, validation_split=0.2)

In [None]:
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label='val_accuracy')
plt.legend()
plt.show()

In [None]:
model.save('../saved_models/deep_rnn_model.keras')