In [None]:
import numpy as np
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Parameters
max_features = 10000  # Consider only the top 10,000 words in the dataset
maxlen = 200  # Limit the maximum length of each review to 200 words
embedding_dim = 50  # Dimensionality of the embedding space

# Load the IMDB dataset
(X_train, _), (_, _) = imdb.load_data(num_words=max_features)

# Pad sequences to ensure uniform length
X_train = pad_sequences(X_train, maxlen=maxlen)

# Define the model
model = Sequential()
model.add(Embedding(input_dim=max_features, output_dim=embedding_dim, input_length=maxlen))

# Compile the model (not needed for printing embeddings)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Access the learned embeddings
embeddings = model.layers[0].get_weights()[0]

# Print the shape of the embeddings
print("Shape of word embeddings:", embeddings.shape)

# Print the first few embeddings
print("Word embeddings:")
for i in range(5):
    print(f"Word index {i}: {embeddings[i]}")


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
Shape of word embeddings: (10000, 50)
Word embeddings:
Word index 0: [ 0.04807285 -0.00058662 -0.02820878  0.00674134 -0.00417324 -0.00750982
 -0.04809252  0.03454049  0.03716179  0.02109833 -0.00994356  0.00430143
 -0.03056931  0.02417031 -0.04226265 -0.04880222  0.0161747  -0.04533769
 -0.03015019 -0.01227547 -0.02249843  0.04098021  0.018321    0.02507835
  0.02979488 -0.01860898  0.0346786   0.01216443  0.04781118 -0.02494792
 -0.00943723  0.0023643  -0.04260791 -0.02103874  0.02855989  0.01246569
 -0.03425239  0.02199224  0.02656057  0.0222691   0.04237549 -0.04094536
  0.00415481  0.03114421  0.01289362  0.03970803 -0.03114772  0.01770026
 -0.01431429 -0.02203734]
Word index 1: [ 0.04478847 -0.04664689  0.01623667 -0.00425769  0.00750063  0.03441974
 -0.00785174  0.04170359  0.01049595  0.04825724 -0.04421037  0.0131147
  0.0363943  -0.01426922 -0.01300881  0.0016926   0.00959295  0.0310886