In [2]:
!pip install tensorflow




In [3]:
import tensorflow as tf
from tensorflow import keras
import numpy as np

In [6]:
#loading the data
data = keras.datasets.imdb
# spliting the data to 2 main groups - train and test
(train_data,train_labels),(test_data,test_labels) = data.load_data(num_words=88000)

#print(train_data[0])

In [7]:
word_index = data.get_word_index()

word_index = {k: (v+3) for k,v in word_index.items()}
word_index["<PAD>"] = 0 # padding 
word_index["<START>"] = 1
word_index["<UNK>"] = 2
word_index["<UNUSED"] = 3

reverse_word_index = dict([(value,key) for (key, value) in word_index.items()])

In [5]:
#decode function
def decode_review(text):
    return " ".join([reverse_word_index.get(w,'?') for w in text])

#print(decode_review(train_data[0]))

In [6]:
#Preprocessing the data
#Define max review size as 256 words
train_data = keras.preprocessing.sequence.pad_sequences(train_data,value=word_index["<PAD>"],padding="post", maxlen=256)
test_data = keras.preprocessing.sequence.pad_sequences(test_data,value=word_index["<PAD>"],padding="post", maxlen=256)

#print(len(test_data[0]),len(test_data[1]))

In [7]:
#Model creation

model = keras.Sequential()
model.add(keras.layers.Embedding(88000,16))
model.add(keras.layers.GlobalAveragePooling1D())
model.add(keras.layers.Dense(16,activation='relu'))
model.add(keras.layers.Dense(1,activation='sigmoid'))

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 16)          1408000   
_________________________________________________________________
global_average_pooling1d (Gl (None, 16)                0         
_________________________________________________________________
dense (Dense)                (None, 16)                272       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 17        
Total params: 1,408,289
Trainable params: 1,408,289
Non-trainable params: 0
_________________________________________________________________


In [9]:
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

x_val = train_data[:10000]
x_train = train_data[10000:]


y_val = train_labels[:10000]
y_train = train_labels[10000:]

fitModel = model.fit(x_train,y_train,epochs=40,batch_size=512,validation_data=(x_val,y_val), verbose=1)

result = model.evaluate(test_data,test_labels)
print(result)
model.save("model.h5")


Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
[0.32787272334098816, 0.8732799887657166]


In [None]:
'''
test_review = test_data[0]
predict = model.predict([test_review])
print("Review: ")
print(decode_review(test_review))
print("predicition: "+ str(predict[0]))
print("Actual: "+ str(test_labels[0]))

print(result)'''

In [9]:

def review_encode(s):
    encoded = [1]
    
    for word in s:
        if word.lower() in word_index:
            encoded.append(word_index[word.lower()])
        else:
            encoded.append(2)
    return encoded

#Load the model    
model = keras.models.load_model("model.h5")

with open("text.txt", encoding="utf-8") as f:
    for line in f.readlines():
        nline = line.replace(",","").replace(".","").replace("(","").replace(")","").replace(":","").replace("\"","").strip().split(" ")
        encode = review_encode(nline)
        encode = keras.preprocessing.sequence.pad_sequences([encode],value=word_index["<PAD>"],padding="post", maxlen=256)
        predict = model.predict(encode)
        print(line)
        print(encode)
        print(predict[0])

Around the world we have a global obsession of superhero comic characters even though they are not real, the billion dollar industry about them. Would you even imagine the franchise we would have to endure if they came into real life! This franchise would become The One quite easily. Here comes this The Boys !

[[   1  187    4  182   75   28    6 4564 2970    7 3783  700  105   60
   151   36   26   24  147    4 8882 2863 1600   44   98   62   25   60
   838    4 3135   75   62   28    8 4325   48   36  385   83  147    2
    14 3135   62  413    4   31  179  714  133  266   14    4 1013    2
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0   