In [None]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.text import tokenizer_from_json
from tensorflow.keras.utils import to_categorical
import json

# Load padded sequences and tokenizer
padded_sequences = pd.read_csv('data/padded_sequences.csv').values
tokenizer_json = json.load(open('data/tokenizer.json'))
tokenizer = tokenizer_from_json(tokenizer_json)

# Prepare input (X) and output (y)
X = padded_sequences[:, :-1]  # All but the last token
y = padded_sequences[:, 1:]   # All but the first token

# Convert output to categorical
y = to_categorical(y, num_classes=len(tokenizer.word_index) + 1)

# Define LSTM model
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=128, input_length=X.shape[1]))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(128))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])

# Train the model
model.fit(X, y, batch_size=64, epochs=20, validation_split=0.2)

# Save the model
model.save('lovecraft_lstm_model.h5')

print("Model training completed!")
