In [15]:
# Importing essential libraries
import numpy as np
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Embedding

In [16]:
# Our small dataset of reviews

# Defining a list of reviews
reviews = ['nice food',
        'amazing restaurant',
        'too good',
        'just loved it!',
        'will go again',
        'horrible food',
        'never go there',
        'poor service',
        'poor quality',
        'needs improvement']

# Creating an array for sentiments
sentiment = np.array([1,1,1,1,1,0,0,0,0,0])

In [17]:
# one_hot encoding example
# it take 2 value first is word & second is range

one_hot("amazing restaurant",30)

[6, 28]

In [18]:
 # Defining the size of the vocabulary

vocab_size = 30

# Encoding reviews using one_hot, List comprehension
encoded_reviews = [one_hot(d,vocab_size) for d in reviews]
encoded_reviews


[[14, 2],
 [6, 28],
 [7, 16],
 [10, 19, 7],
 [14, 24, 14],
 [12, 2],
 [14, 24, 26],
 [14, 2],
 [14, 15],
 [13, 19]]

In [21]:
# Defining the maximum length of padded sequences
max_length = 3

# Padding sequences
padded_reviews = pad_sequences(encoded_reviews,maxlen=max_length,padding="post")
padded_reviews

array([[14,  2,  0],
       [ 6, 28,  0],
       [ 7, 16,  0],
       [10, 19,  7],
       [14, 24, 14],
       [12,  2,  0],
       [14, 24, 26],
       [14,  2,  0],
       [14, 15,  0],
       [13, 19,  0]], dtype=int32)

In [37]:
# Defining the size of the embedding vectors
Embeded_vector_size = 4

# Creating a Sequential model
model = Sequential()

# Adding Embedding layer
model.add(Embedding(input_dim=vocab_size, output_dim=Embeded_vector_size, input_length=max_length, name="embedding"))


# Adding Flatten layer to flatten the input
model.add(Flatten())

# Adding Dense layer with sigmoid activation.
model.add(Dense(1,activation="sigmoid"))

In [38]:
x=padded_reviews
y=sentiment

In [39]:
# Compiling the model
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
model.summary()

Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 3, 4)              120       
                                                                 
 flatten_5 (Flatten)         (None, 12)                0         
                                                                 
 dense_5 (Dense)             (None, 1)                 13        
                                                                 
Total params: 133 (532.00 Byte)
Trainable params: 133 (532.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [41]:
# Training the model.
model.fit(x, y, epochs=50, verbose=0)

<keras.src.callbacks.History at 0x15cef29a0>

In [43]:
loss,accuracy=model.evaluate(x,y)
accuracy



0.8999999761581421

In [50]:
# Getting the weights of the Embedding layer

weights=model.get_layer("embedding").get_weights()[0]
weights

array([[-0.07128103, -0.1380862 , -0.08433288,  0.13980511],
       [ 0.04097874, -0.01196454, -0.02463349, -0.03068678],
       [ 0.1192823 , -0.04936989, -0.13902679,  0.12226249],
       [ 0.03737742,  0.04596836, -0.04306164, -0.03496295],
       [ 0.01129245, -0.02024971, -0.02897176, -0.03455574],
       [-0.01391552,  0.02276974, -0.03563224,  0.0126181 ],
       [-0.13200042,  0.10503878,  0.13546695,  0.14926015],
       [-0.04189454,  0.15295918,  0.13960473,  0.12077454],
       [-0.0150594 , -0.04674103, -0.01743301, -0.04402265],
       [ 0.04417099, -0.0344281 ,  0.0137894 , -0.04747739],
       [-0.13429049,  0.12932944,  0.0977078 ,  0.09436398],
       [-0.00100295, -0.00375133,  0.03894799, -0.03270918],
       [ 0.13431968, -0.1612863 , -0.10660245, -0.07168186],
       [ 0.06178768, -0.13740589, -0.08239155, -0.09079581],
       [ 0.13109568,  0.16781166, -0.05286231, -0.10544907],
       [ 0.09308697, -0.1122665 , -0.12299113,  0.10237128],
       [-0.10193539,  0.

In [51]:
len(weights)

30

**Checking for first word, "nice food" whose one_hot encoding is (14,2)**

In [55]:
# Displaying the weights at index 14
weights[14]

array([ 0.13109568,  0.16781166, -0.05286231, -0.10544907], dtype=float32)

In [54]:
# Displaying the weights at index 2
weights[2]

array([ 0.1192823 , -0.04936989, -0.13902679,  0.12226249], dtype=float32)

**So above array the vector for first word nice food, now they are not very similar becoz our dataset was very small, but this how they Implemented**