In [1]:
import numpy as np
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Embedding



In [2]:
# Food review classification for 10 sample data set
reviews = ['nice food',
          'amazing restaurant',
          'too good',
           'just loved it!',
          'will go gain',
          'horrible food',
          'never go there',
          'poor service',
           'poor quality',
          'needs improvement']
# Labels for each sample 1 means good and 0 means bad
sentiment = np.array([1,1,1,1,1,0,0,0,0,0])

In [3]:
# using one hot vector

# the one hot do encoding, takes the review and then you specify the vocabulary size "30" it give the number between 1 to 30

one_hot("amazing restaurant", 30)


[9, 1]

In [7]:
# vocabulary size

vocab_size = 30

# using list comprehension
encoded_reviews = [one_hot(d, vocab_size) for d in reviews]

# basically, it will encode the sample dataset review with vocab size from 1 to 30 and make a vector of it.

#print(encoded_reviews)

encoded_reviews

[[6, 8],
 [9, 1],
 [7, 16],
 [14, 22, 7],
 [24, 26, 12],
 [21, 8],
 [4, 26, 22],
 [25, 13],
 [25, 17],
 [4, 26]]

In [9]:
# For 2 and more than 2 word sentence we need padding

max_length = 3 # means length of sentence we have in the sample dataset
padded_reviews = pad_sequences(encoded_reviews, maxlen=max_length, padding='post')
padded_reviews

array([[ 6,  8,  0],
       [ 9,  1,  0],
       [ 7, 16,  0],
       [14, 22,  7],
       [24, 26, 12],
       [21,  8,  0],
       [ 4, 26, 22],
       [25, 13,  0],
       [25, 17,  0],
       [ 4, 26,  0]])

In [14]:
# making embedding vector for each work

embedded_vector_size = 4

model = Sequential()

# First layer is embedding layer

model.add(Embedding(vocab_size, embedded_vector_size, input_length=max_length, name="embedding"))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))

In [15]:
X = padded_reviews
y  = sentiment

In [16]:
# Model compilation
# The binary_crossentropy because the output will be 0 or 1
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 3, 4)              120       
                                                                 
 flatten_1 (Flatten)         (None, 12)                0         
                                                                 
 dense_1 (Dense)             (None, 1)                 13        
                                                                 
Total params: 133
Trainable params: 133
Non-trainable params: 0
_________________________________________________________________


In [17]:
model.fit(X,y, epochs=50, verbose=0)

<keras.callbacks.History at 0x1b1e3980af0>

In [18]:
# evaluale the model

loss, accuracy = model.evaluate(X,y)
accuracy



0.8999999761581421

In [20]:
#getting weights of embedding

#model.get_layer('embedding').get_weights()[0]

weights = model.get_layer('embedding').get_weights()[0]
len(weights)

30

In [22]:
weights[6]

array([-0.0896238 ,  0.04592543,  0.07529977, -0.05494934], dtype=float32)

In [23]:
weights[9]

array([-0.10538742,  0.05210855,  0.05932971, -0.04040863], dtype=float32)