In [1]:
# Importing the libraries
import numpy as np
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Embedding

In [2]:
reviews = ['nice food',
        'amazing restaurant',
        'too good',
        'just loved it!',
        'will go again',
        'horrible food',
        'never go there',
        'poor service',
        'poor quality',
        'needs improvement']

sentiment = np.array([1,1,1,1,1,0,0,0,0,0]) # 1st 5 are positive reviews and remaining are negative reviews

In [3]:
# using the one-hot
one_hot("amazing restaurant",30) ## here we mentioned 30, which means it will assign numbers for words between 1 to 30

[27, 7]

In [4]:
# initialize the vocab size as 30
vocab_size = 30
#Convert all the reviews into One hot encode
encoded_reviews = [one_hot(d, vocab_size) for d in reviews]
print(encoded_reviews)

[[8, 27], [27, 7], [26, 26], [3, 18, 24], [26, 18, 4], [3, 27], [11, 18, 6], [15, 18], [15, 10], [7, 15]]


In [5]:
# we need to take the maximum sentence size and we have to apply padding for remaining
# let's say the max length we have is 3
# then apply the padding
max_length = 3
padded_reviews = pad_sequences(encoded_reviews, maxlen=max_length, padding='post') # post means add '0' at the end
print(padded_reviews)

[[ 8 27  0]
 [27  7  0]
 [26 26  0]
 [ 3 18 24]
 [26 18  4]
 [ 3 27  0]
 [11 18  6]
 [15 18  0]
 [15 10  0]
 [ 7 15  0]]


In [6]:
# We have to setup the Embedded vector
# Let's say we want that size to be 5
embeded_vector_size = 4

# Build the model
model = Sequential()

# Add the first layer -- embedding layer
model.add(Embedding(vocab_size, embeded_vector_size, input_length=max_length,name="embedding")) # we are providing the name "embedding" so that we 
# can use it later

# Add the flatten layer
model.add(Flatten())

# Add the single neuron, with activation function of sigmoid
model.add(Dense(1, activation='sigmoid'))

In [7]:
# Define X and Y
X = padded_reviews
y = sentiment

In [8]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 3, 4)              120       
                                                                 
 flatten (Flatten)           (None, 12)                0         
                                                                 
 dense (Dense)               (None, 1)                 13        
                                                                 
Total params: 133 (532.00 Byte)
Trainable params: 133 (532.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None


In [9]:
# Train the model
model.fit(X, y, epochs=50, verbose=0)

<keras.src.callbacks.History at 0x1fcef644390>

In [10]:
# evaluate the model
loss, accuracy = model.evaluate(X, y)
accuracy



0.8999999761581421

In [11]:
# To get the parameters for the model
weights = model.get_layer('embedding').get_weights()[0] # Here we are calling the name which we defined earlier
len(weights)

30

In [13]:
# for checking the weights and embedding
weights[8] # Checking for the word nice base on the one hot encoding

array([ 0.06378459,  0.02079196,  0.04959892, -0.05616137], dtype=float32)

In [14]:
weights[27]

array([ 0.07370112,  0.09616487,  0.00139125, -0.09563156], dtype=float32)