In [1]:
import numpy as np
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Embedding


In [2]:
reviews=['nice food',
        'amazing restaurant',
        'too good',
        'just loved it!',
        'will go again',
        'horrible food',
        'never go there',
        'poor service',
        'poor quality',
        'needs improvement']
sentiment=np.array([1,1,1,1,1,0,0,0,0,0])

In [3]:
one_hot("amazing restaurant",30)   #30 is the vocabulary size

[14, 16]

In [4]:
one_hot("amazing restaurant",50)   #50 is the vocabulary size. you get values between 1 to 50. so amazing got 36 and restaurant got 19

[34, 16]

In [5]:
vocab_size=50
encoded_reviews=[one_hot(d,vocab_size) for d in reviews]
encoded_reviews

[[33, 8],
 [34, 16],
 [33, 31],
 [12, 48, 36],
 [16, 44, 28],
 [40, 8],
 [13, 44, 21],
 [38, 12],
 [38, 31],
 [5, 43]]

In [6]:
max_length=3
padded_reviews=pad_sequences(encoded_reviews,maxlen=max_length,padding='post')
padded_reviews

array([[33,  8,  0],
       [34, 16,  0],
       [33, 31,  0],
       [12, 48, 36],
       [16, 44, 28],
       [40,  8,  0],
       [13, 44, 21],
       [38, 12,  0],
       [38, 31,  0],
       [ 5, 43,  0]])

In [7]:
embeded_vector_size=4
model=Sequential()
model.add(Embedding(vocab_size,embeded_vector_size,input_length=max_length,name="embedding"))
model.add(Flatten())
model.add(Dense(1,activation='sigmoid'))

In [8]:
X=padded_reviews
y=sentiment

In [9]:
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 3, 4)              200       
                                                                 
 flatten (Flatten)           (None, 12)                0         
                                                                 
 dense (Dense)               (None, 1)                 13        
                                                                 
Total params: 213 (852.00 Byte)
Trainable params: 213 (852.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [10]:
model.fit(X,y,epochs=50,verbose=0)

<keras.src.callbacks.History at 0x21ffe17d6d0>

In [11]:
loss,accuracy=model.evaluate(X,y)
accuracy
loss



0.6321685910224915

In [12]:
model.evaluate(X,y)



[0.6321685910224915, 1.0]

In [13]:
model.get_layer('embedding').get_weights()

[array([[-0.08216314, -0.07538348,  0.00807854, -0.08566938],
        [ 0.02670072,  0.00283396,  0.0150683 ,  0.02650651],
        [ 0.03918381,  0.04938804, -0.03105499, -0.03923974],
        [-0.01243529, -0.00678836, -0.03744118, -0.00772535],
        [-0.00117972, -0.00684365, -0.03205526, -0.04824449],
        [-0.09686375,  0.0016001 ,  0.06795985,  0.04479323],
        [-0.03724704,  0.02412999, -0.01731255, -0.02619852],
        [-0.03518318, -0.00906726, -0.03186977,  0.03878793],
        [-0.00850169, -0.06555107,  0.01104152, -0.03612821],
        [-0.03555058, -0.02685415, -0.03740792,  0.01585919],
        [-0.04836482,  0.02901696, -0.04396243, -0.01708928],
        [ 0.01649885, -0.04095041, -0.03778551, -0.03574239],
        [-0.05333111, -0.07608692, -0.01807136, -0.03358784],
        [-0.03546496,  0.0120766 ,  0.03102426,  0.05668727],
        [ 0.00611077, -0.02114694,  0.01272365, -0.01595014],
        [ 0.04039938, -0.04740801, -0.01056651, -0.03781551],
        

In [14]:
model.get_layer('embedding').get_weights()[0]

array([[-0.08216314, -0.07538348,  0.00807854, -0.08566938],
       [ 0.02670072,  0.00283396,  0.0150683 ,  0.02650651],
       [ 0.03918381,  0.04938804, -0.03105499, -0.03923974],
       [-0.01243529, -0.00678836, -0.03744118, -0.00772535],
       [-0.00117972, -0.00684365, -0.03205526, -0.04824449],
       [-0.09686375,  0.0016001 ,  0.06795985,  0.04479323],
       [-0.03724704,  0.02412999, -0.01731255, -0.02619852],
       [-0.03518318, -0.00906726, -0.03186977,  0.03878793],
       [-0.00850169, -0.06555107,  0.01104152, -0.03612821],
       [-0.03555058, -0.02685415, -0.03740792,  0.01585919],
       [-0.04836482,  0.02901696, -0.04396243, -0.01708928],
       [ 0.01649885, -0.04095041, -0.03778551, -0.03574239],
       [-0.05333111, -0.07608692, -0.01807136, -0.03358784],
       [-0.03546496,  0.0120766 ,  0.03102426,  0.05668727],
       [ 0.00611077, -0.02114694,  0.01272365, -0.01595014],
       [ 0.04039938, -0.04740801, -0.01056651, -0.03781551],
       [ 0.03150135, -0.

In [15]:
weights=model.get_layer('embedding').get_weights()[0]

In [16]:
len(weights)

50

In [17]:
weights[15]    #weights for nice word

array([ 0.04039938, -0.04740801, -0.01056651, -0.03781551], dtype=float32)

In [18]:
weights[36]   #weights for amazing word

array([ 0.03702239,  0.05115551, -0.09333027,  0.03153672], dtype=float32)