In [20]:
import pandas as pd
import numpy as np

import tensorflow as tf
from keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import *
from keras.models import Model
from keras.datasets import imdb

<h1>Loading the data</h1>

In [2]:
vocab_size = 20000
(X_train,y_train),(X_test,y_test) = imdb.load_data(num_words=vocab_size)

In [3]:
X_train.shape

(25000,)

In [4]:
X_test.shape

(25000,)

<h1>Defining Hyperparameter</h1>

In [5]:
maxlen = 200
embed_dim = 32
num_head = 2
ff_dim = 32

<h1>Data Preprocessing</h1>

In [6]:
X_train = pad_sequences(X_train,maxlen = maxlen)
X_test = pad_sequences(X_test,maxlen = maxlen)


In [7]:
X_train.shape

(25000, 200)

In [8]:
X_train[1]

array([    0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     1,   194,  1153,   194,  8255,    78,   228,
           5,     6,  1463,  4369,  5012,   134,    26,     4,   715,
           8,   118,  1634,    14,   394,    20,    13,   119,   954,
         189,   102,     5,   207,   110,  3103,    21,    14,    69,
         188,     8,    30,    23,     7,     4,   249,   126,    93,
           4,   114,     9,  2300,  1523,     5,   647,     4,   116,
           9,    35,  8163,     4,   229,     9,   340,  1322,     4,
         118,     9,     4,   130,  4901,    19,     4,  1002,     5,
          89,    29,   952,    46,    37,     4,   455,     9,    45,
          43,    38,  1543,  1905,   398,     4,  1649,    26,  6853,
           5,   163,    11,  3215, 10156,     4,  1153,     9,   194,
         775,     7,  8255, 11596,   349,  2637,   148,   605, 15358,
        8003,    15,   123,   125,    68,     2,  6853,    15,   349,
         165,  4362,

<h1>Building the model</h1>

In [9]:
model = Model()

inputs = Input(shape = (maxlen,))

#Token Embedding
token_emb_layer = Embedding(input_dim=vocab_size,output_dim=embed_dim)
x = token_emb_layer(inputs)

#positional embedding layer
position = tf.range(0,maxlen)
pos_emb_layer = Embedding(input_dim = maxlen,output_dim = embed_dim)
position_emb = pos_emb_layer(position)

x = x + position_emb

#Add transfromer block
#1. Muliti head self attension
attention_output = MultiHeadAttention(num_heads = num_head,key_dim=embed_dim)(x,x)
attention_output = Dropout(0.1)(attention_output)

#residual connection 
x1 = LayerNormalization() (x+ attention_output)


ffn = Dense(ff_dim,activation = 'relu')(x1)
ffn = Dense(embed_dim) (ffn)
ffn = Dropout(0.1)(ffn)

x2 = LayerNormalization() (x1+ ffn)

# classification head
x3 = GlobalAveragePooling1D() (x2) # flatten 200 x 32
x3 = Dropout(0.1) (x3)
x3 = Dense(20, activation='relu') (x3)
x3 = Dropout(0.1) (x3)

# output layer
outputs = Dense(1, activation='sigmoid') (x3)

In [10]:
model = Model(inputs=inputs, outputs=outputs)

model.summary()

In [11]:
from keras.utils import plot_model
plot_model(model, show_layer_names=True, show_layer_activations=True, show_shapes=True)

You must install graphviz (see instructions at https://graphviz.gitlab.io/download/) for `plot_model` to work.


In [14]:
# compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [15]:
# train the model
history = model.fit(X_train, y_train, batch_size=32, epochs=5, validation_data=(X_test, y_test))

Epoch 1/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 43ms/step - accuracy: 0.8046 - loss: 0.3970 - val_accuracy: 0.8808 - val_loss: 0.2846
Epoch 2/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 42ms/step - accuracy: 0.9261 - loss: 0.1962 - val_accuracy: 0.8705 - val_loss: 0.3101
Epoch 3/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 41ms/step - accuracy: 0.9591 - loss: 0.1213 - val_accuracy: 0.8569 - val_loss: 0.3834
Epoch 4/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 41ms/step - accuracy: 0.9773 - loss: 0.0740 - val_accuracy: 0.8441 - val_loss: 0.4938
Epoch 5/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 41ms/step - accuracy: 0.9859 - loss: 0.0469 - val_accuracy: 0.8355 - val_loss: 0.6516


<h1>Prediction</h1>

In [28]:
new = X_test[800]

In [29]:
new = np.reshape(new,(1,maxlen))

In [30]:
new.shape


(1, 200)

In [31]:
model.predict(new)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step


array([[0.99957085]], dtype=float32)