In [2]:
import tensorflow as tf
from tensorflow import keras   #keras in a deep learning lib based on python
import numpy as np
import matplotlib.pyplot as plt

In [3]:
data = keras.datasets.imdb  #IMDB Movie reviews sentiment classification

In [4]:
(train_data, train_labels),(test_data, test_labels) = data.load_data(num_words=10000)

In [5]:
print(train_data[0])  # words from dictionary

[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]


In [6]:
word_index = data.get_word_index()

word_index = {k:(v+3) for k,v in word_index.items()}  #add 3 values in the dict
word_index["<PAD>"]=0  #to make all comments the same length, we should add paddings
word_index["<START>"]=1
word_index["<UNK>"]=2
word_index["<UNUSED>"]=3

In [7]:
reverse_word_index = dict([(value,key) for (key,value) in word_index.items()])  #translate back to natual language

In [8]:
print(len(test_data[0]),len(test_data[1]))

68 260


In [9]:
#fill comments which do not contains enough words(<250>) with <PAD>
train_data = keras.preprocessing.sequence.pad_sequences(train_data, value=word_index["<PAD>"], padding="post", maxlen=250)
test_data = keras.preprocessing.sequence.pad_sequences(test_data, value=word_index["<PAD>"], padding="post", maxlen=250)

In [10]:
def decode_review(text):
    return " ".join([reverse_word_index.get(i,"?") for i in text])

In [11]:
print(len(test_data[0]),len(test_data[1]))

250 250


In [12]:
model = keras.Sequential()          
#input is a comment, eg: It is a great movie.  ==   [23,124,5432,213]
model.add(keras.layers.Embedding(10000,16)) 
#embedding layer
#for each word we use a 10000-D vector to represent it (one-hot)
#for the content of each word(a 10000-D vector), we use 16 co-efficents to represent it
#let the "angle" of 2 similar meaning words be smaller
#let the "angle" of 2 disimilar meaning words be bigger
model.add(keras.layers.GlobalAveragePooling1D())   #avg pooling layer, recuce scale
model.add(keras.layers.Dense(16,activation="relu"))  #classfication, use 16 neurons to pick up pattern of certain words
model.add(keras.layers.Dense(1,activation="sigmoid")) #classfication, output layer: 0 or 1 (comment is positive or negative)

In [13]:
model.summary()
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 16)          160000    
_________________________________________________________________
global_average_pooling1d (Gl (None, 16)                0         
_________________________________________________________________
dense (Dense)                (None, 16)                272       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 17        
Total params: 160,289
Trainable params: 160,289
Non-trainable params: 0
_________________________________________________________________


In [14]:
x_validation = train_data[:10000]
x_train = train_data[10000:]
y_validation = train_labels[:10000]
y_train = train_labels[10000:]

In [15]:
fitModel = model.fit(x_train,y_train,epochs=40,batch_size=512, validation_data=(x_validation,y_validation), verbose=1)

Train on 15000 samples, validate on 10000 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [16]:
results = model.evaluate(test_data,test_labels)



In [17]:
print(results)  #loss,accuracy

[0.3328003268909454, 0.87048]


In [18]:
#make a prediction
test_review = test_data[0]
predict = model.predict([test_review])
print("Review: ")
print(decode_review(test_review))
print("Prediction: "+str(predict[0]))
print("Actual: "+str(test_labels[0]))
print(results)  #  0:bad    1:good

Review: 
<START> please give this one a miss br br <UNK> <UNK> and the rest of the cast rendered terrible performances the show is flat flat flat br br i don't know how michael madison could have allowed this one on his plate he almost seemed to know this wasn't going to work out and his performance was quite <UNK> so all you madison fans give this a miss <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> 

In [19]:
#saving the model
model.save("model.h5")

In [20]:
#loading model
modelFY = keras.models.load_model("model.h5")