In [4]:
from keras.datasets import imdb
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.layers import Embedding
from keras.optimizers.legacy import Adam
from keras.metrics import AUC

from sklearn.model_selection import train_test_split

In [5]:
def create_dataset(
        vocabulary_size: int,
        max_tokens: int,
        test_size: float,
        random_state: int = 69
):
    (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=vocabulary_size)

    X_train = pad_sequences(X_train, maxlen=max_tokens)
    X_test = pad_sequences(X_test, maxlen=max_tokens)

    X_test, X_val, y_test, y_val = train_test_split(
        X_test, y_test,
        random_state=random_state,
        test_size=test_size
    )

    return X_train, y_train, X_val, y_val, X_test, y_test

In [6]:
def build_model(
        vocabulary_size: int,
        max_tokens: int,
        embedding_dim: int,
        dropout: float = 0.0,
):
    lstm = Sequential()
    lstm.add(Embedding(input_dim=vocabulary_size, output_dim=embedding_dim, input_length=max_tokens))
    lstm.add(LSTM(units=embedding_dim, dropout=dropout))
    lstm.add(Dense(units=1, activation='sigmoid'))

    optimizer = Adam(lr=1e-4, decay=1e-4)

    lstm.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy', AUC()])

    return lstm

In [13]:
vocabulary_size = 50000
max_tokens = 256

X_train, y_train, X_val, y_val, X_test, y_test = create_dataset(
    vocabulary_size=vocabulary_size,
    max_tokens=max_tokens,
    test_size=0.1
)

model = build_model(
    vocabulary_size=vocabulary_size,
    max_tokens=max_tokens,
    embedding_dim=256,
    dropout=0.25
)

In [14]:
model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=3,
    batch_size=16,
)

Epoch 1/3


2023-11-26 15:20:56.883806: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-11-26 15:20:57.175288: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-11-26 15:20:57.844353: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-11-26 15:23:53.077978: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-11-26 15:23:53.163539: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/3
Epoch 3/3


<keras.src.callbacks.History at 0x293f40190>

In [15]:
model.evaluate(X_test, y_test)



[0.32651808857917786, 0.8780444264411926, 0.947562575340271]

In [17]:
model.save('/Users/tylerdurden/GitHub/uni/semester5/NN/lecture11/imdb_model2.h5')