In [1]:
# Importing important libraries
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding
import numpy as np

In [2]:
# Creating a simple keras model
model = Sequential()
embedding_layer = Embedding(input_dim = 10, output_dim = 4, input_length  = 2)
model.add(embedding_layer)
model.compile('adam', 'mse')

In [None]:
# input_dim -> Size of the vocabulary
# output_dim -> Length of the vector for each word
# input_length -> Maximum length of the Sequence

In [3]:
# Creating input data
input_data = np.array([[1, 2]])

In [4]:
# Taking predictions from the model
pred = model.predict(input_data)
print('Input Data Shape : ', input_data.shape)
print('Predictions : ', pred)

Input Data Shape :  (1, 2)
Predictions :  [[[ 0.03746562 -0.03275758  0.01205476  0.04053016]
  [-0.04000286  0.03024631  0.02041082 -0.04177812]]]


In [6]:
# Importing remaining packages
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Embedding, Dense

In [7]:
# Defining 10 reviews and labels
reviews = [
    'Never coming back!',
    'horrible service',
    'rude waitress',
    'cold food',
    'horrible food!',
    'awesome',
    'awesome services!',
    'rocks',
    'poor work',
    'couldn\'t have done better'
]
labels = np.array([1,1,1,1,1,0,0,0,0,0])

In [None]:
# We will take vocabulary size as 50 and one hot encode the words using one_hot function.

In [9]:
# Code
Vocab_size = 50
encoded_reviews = [one_hot(d, Vocab_size) for d in reviews]
print(f'encoded reviews : {encoded_reviews}')

encoded reviews : [[45, 17, 43], [45, 31], [21, 49], [40, 22], [45, 22], [32], [32, 27], [10], [46, 36], [6, 25, 44, 8]]


In [None]:
# Length of each vector exactly corresponds to the number of words in the corresponding review.

In [None]:
# Now we need to apply padding so that the encoded reviews are of same length.

In [11]:
# Code
max_length = 4
padded_reviews = pad_sequences(encoded_reviews, maxlen = max_length, padding = 'post')
print(padded_reviews)

[[45 17 43  0]
 [45 31  0  0]
 [21 49  0  0]
 [40 22  0  0]
 [45 22  0  0]
 [32  0  0  0]
 [32 27  0  0]
 [10  0  0  0]
 [46 36  0  0]
 [ 6 25 44  8]]


In [None]:
# One Hot representation of the embedding are ready and we can pass it to the embedding layers.
# In the next part, we will fix the length of embedded vectors to 8 (output_dim).

In [13]:
# Code
model = Sequential()
embedding_layer = Embedding(input_dim = Vocab_size, output_dim = 8, input_length = max_length)
model.add(embedding_layer)
model.add(Flatten())
model.add(Dense(1, activation = 'sigmoid'))
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['acc'])

In [14]:
# Printing the model summary
print(model.summary())

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 4, 8)              400       
_________________________________________________________________
flatten_1 (Flatten)          (None, 32)                0         
_________________________________________________________________
dense (Dense)                (None, 1)                 33        
Total params: 433
Trainable params: 433
Non-trainable params: 0
_________________________________________________________________
None


In [15]:
# Fitting the model
model.fit(padded_reviews, labels, epochs = 100, verbose = 1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<tensorflow.python.keras.callbacks.History at 0x144320130>

In [16]:
# Printing the embedding layer weights
print(embedding_layer.get_weights()[0].shape)

(50, 8)
