<center><h1 style="color:green">Supervised Word Embeddings</center>

In [1]:
import numpy as np
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Embedding 

In [2]:
reviews = ['nice food',
        'amazing restaurant',
        'too good',
        'just loved it!',
        'will go again',
        'horrible food',
        'never go there',
        'poor service',
        'poor quality',
        'needs improvement']

sentiment = np.array([1,1,1,1,1,0,0,0,0,0])

<b>One Hot Encoding

In [3]:
one_hot("amazing restaurant",30)

[4, 11]

In [4]:
vocab_size = 30
encoded_reviews = [one_hot(d,vocab_size) for d in reviews]
print(encoded_reviews)

[[12, 20], [4, 11], [25, 9], [27, 25, 22], [9, 18, 9], [19, 20], [12, 18, 27], [10, 22], [10, 9], [19, 13]]


<b>Added Paddings

In [5]:
max_length = 4
padded_reviews = pad_sequences(encoded_reviews, maxlen=max_length, padding='post')
print(padded_reviews)

[[12 20  0  0]
 [ 4 11  0  0]
 [25  9  0  0]
 [27 25 22  0]
 [ 9 18  9  0]
 [19 20  0  0]
 [12 18 27  0]
 [10 22  0  0]
 [10  9  0  0]
 [19 13  0  0]]


<b>Model Training and Embedding

In [6]:
embeded_vector_size = 5

model = Sequential()
model.add(Embedding(vocab_size, embeded_vector_size, input_length=max_length,name="embedding"))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))



In [7]:
X = padded_reviews
y = sentiment

In [8]:
model.build(input_shape=(None, max_length)) # manually buiding model

In [9]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

In [10]:
model.fit(X, y, epochs=50, verbose=0)

<keras.src.callbacks.history.History at 0x1dd9b64db50>

In [11]:
# evaluate the model
loss, accuracy = model.evaluate(X, y)
accuracy

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 297ms/step - accuracy: 0.9000 - loss: 0.6375


0.8999999761581421

<b>Embedding weights

In [12]:
weights = model.get_layer('embedding').get_weights()[0]
weights

array([[-0.02891003,  0.01081556,  0.0282058 , -0.00306427, -0.00234527],
       [ 0.02868289,  0.01105702,  0.00112859, -0.02521232, -0.03810974],
       [-0.02100469,  0.00635455, -0.00171461, -0.04919092, -0.00101597],
       [-0.01063729,  0.04512937, -0.01780921, -0.00046391,  0.0239643 ],
       [ 0.0250465 ,  0.07487744, -0.00270506, -0.05723004,  0.09254707],
       [ 0.00069884, -0.01605623, -0.04176838, -0.04419684, -0.01722751],
       [-0.04414156, -0.02894235,  0.03537646,  0.04942247, -0.0290749 ],
       [ 0.00807797, -0.00250784,  0.0002965 ,  0.02950518,  0.00018973],
       [ 0.00073749, -0.02012513, -0.01515085,  0.02114144,  0.01517625],
       [ 0.04859369,  0.0525638 , -0.0066433 , -0.01011298,  0.00922284],
       [-0.11264107, -0.0678744 ,  0.08829612,  0.11855378, -0.1206255 ],
       [-0.0830708 , -0.03984039, -0.03124751,  0.06288611, -0.02477491],
       [ 0.02483001,  0.0312437 ,  0.03741973, -0.00114625,  0.01459044],
       [ 0.02012528,  0.0248423 ,  0.0

In [13]:
len(weights)

30

In [14]:
len(weights[4])

5

In [15]:
weights[13]

array([ 0.02012528,  0.0248423 ,  0.0655949 , -0.08700713,  0.08648913],
      dtype=float32)

In [16]:
weights[4]

array([ 0.0250465 ,  0.07487744, -0.00270506, -0.05723004,  0.09254707],
      dtype=float32)

In [17]:
weights[24]

array([-0.02130263, -0.00888254, -0.03886173, -0.03355218,  0.03520123],
      dtype=float32)

In [18]:
weights[5]

array([ 0.00069884, -0.01605623, -0.04176838, -0.04419684, -0.01722751],
      dtype=float32)