In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.callbacks import EarlyStopping
import pickle
import os

# Configuration
MAX_FEATURES = 10000  # vocabulary size
MAX_LEN = 500
EMBEDDING_DIM = 128
RNN_UNITS = 128
EPOCHS = 10
BATCH_SIZE = 32
VALIDATION_SPLIT = 0.2

def load_and_preprocess_data():
    """Load and preprocess IMDB dataset."""
    print("Loading IMDB dataset...")
    (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=MAX_FEATURES)

    print(f'Training data shape: {X_train.shape}, Training labels shape: {y_train.shape}')
    print(f'Testing data shape: {X_test.shape}, Testing labels shape: {y_test.shape}')

    # Pad sequences
    print(f"Padding sequences to max length: {MAX_LEN}")
    X_train = sequence.pad_sequences(X_train, maxlen=MAX_LEN)
    X_test = sequence.pad_sequences(X_test, maxlen=MAX_LEN)

    return (X_train, y_train), (X_test, y_test)

def create_model():
    """Create and compile the RNN model."""
    print("Creating model architecture...")
    model = Sequential([
        Embedding(MAX_FEATURES, EMBEDDING_DIM, input_length=MAX_LEN, name='embedding'),
        SimpleRNN(RNN_UNITS, activation='relu', name='rnn'),
        Dense(1, activation='sigmoid', name='output')
    ])

    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy']
    )

    return model

def train_model(model, X_train, y_train):
    """Train the model with early stopping."""
    print("Training model...")

    # Create early stopping callback
    early_stop = EarlyStopping(
        monitor='val_loss',
        patience=5,
        restore_best_weights=True,
        verbose=1
    )

    # Train the model
    history = model.fit(
        X_train, y_train,
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        validation_split=VALIDATION_SPLIT,
        callbacks=[early_stop],
        verbose=1
    )

    return history

def save_model_and_artifacts(model):
    """Save model and required artifacts for deployment."""

    # Create directory for model artifacts
    os.makedirs('model_artifacts', exist_ok=True)

    # 1. Save model in Keras 3 compatible format
    print("Saving model in .keras format...")
    model.save('model_artifacts/sentiment_model.keras')

    # 2. Also save as H5 for backward compatibility
    print("Saving model in .h5 format...")
    model.save('model_artifacts/sentiment_model.h5')

    # 3. Save word index and reverse word index
    print("Saving word indices...")
    word_index = imdb.get_word_index()
    reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

    with open('model_artifacts/word_index.pkl', 'wb') as f:
        pickle.dump(word_index, f)

    with open('model_artifacts/reverse_word_index.pkl', 'wb') as f:
        pickle.dump(reverse_word_index, f)

    # 4. Save model configuration
    config = {
        'max_features': MAX_FEATURES,
        'max_len': MAX_LEN,
        'embedding_dim': EMBEDDING_DIM,
        'rnn_units': RNN_UNITS
    }

    with open('model_artifacts/model_config.pkl', 'wb') as f:
        pickle.dump(config, f)

    print("All artifacts saved successfully!")

def evaluate_model(model, X_test, y_test):
    """Evaluate model performance."""
    print("Evaluating model...")
    test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
    print(f"Test Loss: {test_loss:.4f}")
    print(f"Test Accuracy: {test_accuracy:.4f}")
    return test_loss, test_accuracy

def main():
    """Main training pipeline."""
    print("Starting IMDB Sentiment Analysis Training Pipeline")
    print("=" * 50)

    # Load and preprocess data
    (X_train, y_train), (X_test, y_test) = load_and_preprocess_data()

    # Create model
    model = create_model()
    model.summary()

    # Train model
    history = train_model(model, X_train, y_train)

    # Evaluate model
    evaluate_model(model, X_test, y_test)

    # Save model and artifacts
    save_model_and_artifacts(model)

    print("\nTraining completed successfully!")
    print("Model artifacts saved in 'model_artifacts' directory")

    return model, history

if __name__ == "__main__":
    model, history = main()

Starting IMDB Sentiment Analysis Training Pipeline
Loading IMDB dataset...
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Training data shape: (25000,), Training labels shape: (25000,)
Testing data shape: (25000,), Testing labels shape: (25000,)
Padding sequences to max length: 500
Creating model architecture...


Training model...
Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 209ms/step - accuracy: 0.5911 - loss: 240944431104.0000 - val_accuracy: 0.7004 - val_loss: 0.5821
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 203ms/step - accuracy: 0.6869 - loss: 0.8139 - val_accuracy: 0.7254 - val_loss: 0.5477
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 208ms/step - accuracy: 0.5583 - loss: nan - val_accuracy: 0.5062 - val_loss: nan
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 201ms/step - accuracy: 0.5011 - loss: nan - val_accuracy: 0.5062 - val_loss: nan
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 208ms/step - accuracy: 0.5020 - loss: nan - val_accuracy: 0.5062 - val_loss: nan
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 205ms/step - accuracy: 0.5025 - loss: nan - val_accuracy: 0.5062 - val_loss: nan



Test Loss: 0.5435
Test Accuracy: 0.7271
Saving model in .keras format...
Saving model in .h5 format...
Saving word indices...
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
All artifacts saved successfully!

Training completed successfully!
Model artifacts saved in 'model_artifacts' directory
