Embedding is a technique used in machine learning and natural language processing to represent words, phrases, or other data in a continuous vector space. This representation allows models to capture semantic relationships and similarities between different entities.

In [1]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, SimpleRNN, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer 
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
# Load the IMDB dataset
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000)

In [3]:
x_train = pad_sequences(x_train, padding='post', maxlen=200)
x_test = pad_sequences(x_test, padding='post', maxlen=200)

In [4]:
x_train.shape

(25000, 200)

In [7]:
model = Sequential([
    Embedding(input_dim=10000, output_dim=64, input_length=200),
    SimpleRNN(16, return_sequences=False),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 200, 64)           640000    
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 16)                1296      
                                                                 
 dropout_1 (Dropout)         (None, 16)                0         
                                                                 
 dense_1 (Dense)             (None, 1)                 17        
                                                                 
Total params: 641,313
Trainable params: 641,313
Non-trainable params: 0
_________________________________________________________________


In [9]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
early_stopping = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)
history = model.fit(x_train, y_train, 
                    epochs=10, batch_size=128, 
                    validation_split=0.2, 
                    callbacks=[early_stopping])  

Epoch 1/10
Epoch 2/10
Epoch 3/10
