Goal:
   
       Sentiment analysis using Word Embeddings(NLP)

In [1]:
import numpy as np
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Embedding

In [44]:
reviews = ['nice food',
        'amazing restaurant',
        'too good',
        'just loved it!',
        'will go again',
        'horrible food',
        'never go there',
        'poor service',
        'poor quality',
        'needs improvement']

sentiment = np.array([1,1,1,1,1,0,0,0,0,0])

In [45]:
one_hot("amazing restaurant",50)

[23, 27]

In [46]:
vocab_size = 50
encoded_reviews = [one_hot(d, vocab_size) for d in reviews]
encoded_reviews

[[22, 35],
 [23, 27],
 [27, 32],
 [13, 33, 21],
 [6, 1, 3],
 [9, 35],
 [45, 1, 24],
 [30, 10],
 [30, 10],
 [26, 8]]

In [64]:
type(encoded_reviews)

list

In [47]:
max_length = 3
padded_reviews = pad_sequences(encoded_reviews, maxlen=max_length, padding='post')
padded_reviews

array([[22, 35,  0],
       [23, 27,  0],
       [27, 32,  0],
       [13, 33, 21],
       [ 6,  1,  3],
       [ 9, 35,  0],
       [45,  1, 24],
       [30, 10,  0],
       [30, 10,  0],
       [26,  8,  0]])

In [48]:
embeded_vector_size = 4

model = Sequential()
model.add(Embedding(vocab_size, embeded_vector_size, input_length=max_length,name="embedding"))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))

In [49]:
X = padded_reviews
y = sentiment

In [50]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
print(model.summary())

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 3, 4)              200       
                                                                 
 flatten_1 (Flatten)         (None, 12)                0         
                                                                 
 dense_1 (Dense)             (None, 1)                 13        
                                                                 
Total params: 213
Trainable params: 213
Non-trainable params: 0
_________________________________________________________________
None


In [51]:
model.fit(X, y, epochs=50, verbose=0)

<keras.callbacks.History at 0x19c0bcd7c0>

In [52]:
loss, accuracy = model.evaluate(X, y)
accuracy



1.0

In [53]:
type(model.get_layer('embedding').get_weights())

list

In [54]:
np.array(model.get_layer('embedding').get_weights()).shape

(1, 50, 4)

In [55]:
weights = model.get_layer('embedding').get_weights()[0] # 0 is to get the first index from list
weights

array([[ 0.05750809, -0.08601989, -0.06215804, -0.02940659],
       [-0.04669215,  0.0526131 ,  0.04993277, -0.0421348 ],
       [ 0.02788388,  0.0299553 ,  0.04287738,  0.04261598],
       [-0.07036729,  0.08083656,  0.09264696,  0.09312554],
       [ 0.0278531 , -0.01034777, -0.04786097,  0.00501406],
       [-0.00173866, -0.04172125,  0.0170066 , -0.04757843],
       [-0.06298792, -0.0974207 ,  0.04998935,  0.06096543],
       [ 0.04595676,  0.04795501, -0.01052473, -0.02482252],
       [-0.02778705,  0.05935884,  0.08915491, -0.09955373],
       [ 0.07021528,  0.03185086, -0.09363673, -0.07562404],
       [-0.07603268,  0.02664629,  0.07536297, -0.05354173],
       [-0.00979913,  0.03940902,  0.02615892, -0.0076283 ],
       [ 0.00392939, -0.0024395 ,  0.04042039, -0.00437214],
       [-0.00509736, -0.01012597,  0.08308792,  0.08503109],
       [ 0.02042777,  0.00886322,  0.03454895, -0.01975518],
       [-0.04359242, -0.02528897, -0.01802852, -0.04233532],
       [ 0.00116355,  0.

In [56]:
model.get_layer('embedding').get_weights()[0].shape

(50, 4)

In [57]:
len(model.get_layer('embedding').get_weights()[0])

50

In [58]:
weights[12]

array([ 0.00392939, -0.0024395 ,  0.04042039, -0.00437214], dtype=float32)

In [59]:
weights[8]

array([-0.02778705,  0.05935884,  0.08915491, -0.09955373], dtype=float32)

In [67]:
weights[32]

array([ 0.01156714, -0.10033015, -0.04832137,  0.07599977], dtype=float32)

In [68]:
weights[35]

array([ 0.03883581, -0.06171758, -0.0467652 ,  0.00766122], dtype=float32)

In [61]:
pred = model.predict(X)



In [62]:
pred

array([[0.5199531 ],
       [0.5311593 ],
       [0.51955265],
       [0.5920669 ],
       [0.5609397 ],
       [0.47734442],
       [0.44076723],
       [0.44409952],
       [0.44409952],
       [0.42588934]], dtype=float32)

In [63]:
pred_list= []
for i in pred:
    if (i > 0.50):
        pred_list.append(1)
    else:
        pred_list.append(0)
pred_list    

[1, 1, 1, 1, 1, 0, 0, 0, 0, 0]

In [74]:
X_test1 = [23,9,35] # amazing,horrible, food
model.predict(X_test1) # Positive 



array([[0.5387879]], dtype=float32)

In [75]:
X_test2 = [33,22,27] #loved,nice restaurant
model.predict(X_test2) # Positive



array([[0.51802367]], dtype=float32)

In [76]:
X_test3 = [24,10,26] # there,quality,needs
model.predict(X_test3) # Negative



array([[0.44842425]], dtype=float32)