In [1]:
from tensorflow.keras import datasets
from tensorflow.keras import Model, Input
from tensorflow.keras import layers

import numpy as np
import matplotlib.pyplot as plt

  from ._conv import register_converters as _register_converters


In [2]:
num_words = 10000
max_len = 500
val_split = 0.1

In [3]:
(train_x, train_y), (test_x, test_y) = datasets.imdb.load_data(num_words=num_words, maxlen=max_len)

In [4]:
def ohe_data(data):
    ohe_train_row = data.shape[0]
    ohe_train_col = num_words
    
    data_ohe = np.zeros((ohe_train_row, ohe_train_col))
    
    for i, value in enumerate(data):
        data_ohe[i, value] = 1
    
    return data_ohe  

In [5]:
train_x_ohe = ohe_data(train_x)

In [6]:
test_x_ohe = ohe_data(test_x)

In [7]:
train_x_ohe.shape

(25000, 10000)

In [8]:
input_tensor = Input((10000,))

d1 = layers.Dense(200, activation='relu')(input_tensor)
D1 = layers.Dropout(0.3)(d1)

d2 = layers.Dense(300, activation='relu')(D1)
D2 = layers.Dropout(0.3)(d2)

d3 = layers.Dense(100, activation='relu')(D2)
D3 = layers.Dropout(0.3)(d3)

out = layers.Dense(1, activation='sigmoid')(D3)

model = Model(inputs=input_tensor, outputs=out)

In [9]:
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])

In [10]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 10000)             0         
_________________________________________________________________
dense (Dense)                (None, 200)               2000200   
_________________________________________________________________
dropout (Dropout)            (None, 200)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 300)               60300     
_________________________________________________________________
dropout_1 (Dropout)          (None, 300)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 100)               30100     
_________________________________________________________________
dropout_2 (Dropout)          (None, 100)               0         
__________

In [11]:
model.fit(train_x_ohe, train_y, validation_split=val_split, shuffle=True, epochs=3)

Train on 22500 samples, validate on 2500 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0xf2eba39ba8>

In [12]:
model.evaluate(test_x_ohe, test_y)



[0.3334686711701583, 0.8859025158990043]

In [15]:
model.history.history

{'val_loss': [0.2733580232620239, 0.29415548692643645, 0.30877976982593536],
 'val_acc': [0.884, 0.8848, 0.8876],
 'loss': [0.34625487881236605, 0.2295208215713501, 0.18927903700244303],
 'acc': [0.8549777777777777, 0.9163555555555556, 0.9345777777777777]}

In [13]:
def prediction_custom(review):
    review = [review]
    idx = []
    review_ohe = np.zeros((1, 10000))
    
    for data in review:
        token = data.split()
        for word in token:
            if word.casefold() in datasets.imdb.get_word_index():
                idx.append(datasets.imdb.get_word_index()[word.casefold()])
            else:
                print("[-] The word {} is not in the word index dictionary\n    This may effect the prediction".format(word))
                
    for val in idx:
        review_ohe[0, val] = 1
        
    pred = model.predict(review_ohe).round()
    
    if pred[0][0] == 1:
        print('Good review')
    else:
        print('Bad review')
        
    return pred
    

In [14]:
rev = 'This is wonderful movie that i had ever watched'
pred_val = prediction_custom(rev)
pred_val

Good review


array([[1.]], dtype=float32)