# Review management


In [None]:
%tensorflow_version 2.x  # this line is not required unless you are in a notebook
from keras.datasets import imdb
from keras.preprocessing import sequence
import keras
import tensorflow as tf
import os
import numpy as np

VOCAB_SIZE = 88584

MAXLEN = 250
BATCH_SIZE = 64

(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words = VOCAB_SIZE)
train_data = sequence.pad_sequences(train_data, MAXLEN)
test_data = sequence.pad_sequences(test_data, MAXLEN)

Colab only includes TensorFlow 2.x; %tensorflow_version has no effect.


In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(VOCAB_SIZE, 32),
    tf.keras.layers.LSTM(32),
    tf.keras.layers.Dense(1, activation="sigmoid")
])

In [None]:
model.compile(loss="binary_crossentropy",optimizer="rmsprop",metrics=['acc'])

history = model.fit(train_data, train_labels, epochs=10, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
# take unique words from text data and put it in to dictionary each word have specific value 
 
vocab = {}    
word_encoding = 1
def bag_of_words(text):
  global word_encoding

  words = text.lower().split(" ")  # create a list of all of the words in the text.
  bag = {}  # stores all of the encodings and their frequency

  for word in words:
    if word in vocab:
      encoding = vocab[word]  # get encoding from vocab
    else:
      vocab[word] = word_encoding
      encoding = word_encoding
      word_encoding += 1
    
    if encoding in bag:
      bag[encoding] += 1
    else:
      bag[encoding] = 1
  
  return bag

text = "this is a test to see if this test will work is is test a a"
bag = bag_of_words(text)
print(bag)      # Each word showing its frequency
print(vocab)

{1: 2, 2: 3, 3: 3, 4: 3, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1}
{'this': 1, 'is': 2, 'a': 3, 'test': 4, 'to': 5, 'see': 6, 'if': 7, 'will': 8, 'work': 9}


In [None]:
len(train_data[4])

147

In [None]:
len(train_data[0])

218

In [None]:
# len of data r different but NN we  work on same len data so we convt data in same length

In [None]:
train_data = sequence.pad_sequences(train_data, MAXLEN) # if words more than 250 trim words
test_data = sequence.pad_sequences(test_data, MAXLEN) # if words less than 250 add 0s

In [None]:
# model creation

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(VOCAB_SIZE, 32),
    tf.keras.layers.LSTM(32),  # 32 dimension of every single word
    tf.keras.layers.Dense(1, activation="sigmoid")
])       # output is more then 0.5 +ve review
     # output is less then 0.5 -ve review

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 32)          2834688   
                                                                 
 lstm (LSTM)                 (None, 32)                8320      
                                                                 
 dense (Dense)               (None, 1)                 33        
                                                                 
Total params: 2,843,041
Trainable params: 2,843,041
Non-trainable params: 0
_________________________________________________________________


In [None]:
# train model and compile it

model.compile(loss="binary_crossentropy",optimizer="rmsprop",metrics=['acc'])  # loss function tells us that how far away we our from correct probability.

history = model.fit(train_data, train_labels, epochs=10, validation_split=0.2)




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
# tf.convert_to_tensor(train_data, dtype=tf.int32)
results = model.evaluate(test_data, test_labels)
print(results)

[0.39589956402778625, 0.8561599850654602]


In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 32)          2834688   
                                                                 
 lstm (LSTM)                 (None, 32)                8320      
                                                                 
 dense (Dense)               (None, 1)                 33        
                                                                 
Total params: 2,843,041
Trainable params: 2,843,041
Non-trainable params: 0
_________________________________________________________________


Model save

In [None]:
## save model
save_path = './modelnlp.h5'
model.save(save_path)


In [None]:
## load tensorflow model
save_path = './modelnlp.h5'
model = keras.models.load_model(save_path)

In [None]:
word_index = imdb.get_word_index()

def encode_text(text):  # function to encode the text
  tokens = keras.preprocessing.text.text_to_word_sequence(text)  # text will convert into individual tokens
  tokens = [word_index[word] if word in word_index else 0 for word in tokens]
  return sequence.pad_sequences([tokens], MAXLEN)[0]

text = "that movie was just amazing, so amazing"
encoded = encode_text(text)
print(encoded.shape)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
(250,)


In [None]:
# make a decode function
# Go to word from  integer

reverse_word_index = {value: key for (key, value) in word_index.items()}

def decode_integers(integers):
    PAD = 0
    text = ""
    for num in integers:
      if num != PAD:  # num is not zero
        text += reverse_word_index[num] + " "

    return text[:-1]
  
print(decode_integers(encoded))

that movie was just amazing so amazing


In [None]:
#  make a prediction function

def predict(text):
  encoded_text = encode_text(text)  # encode the text 
  pred = np.zeros((1,250))         # 
  pred[0] = encoded_text
  # print(pred)
  # print(pred.shape)
  result = model.predict(pred) # input of model (1,250)
  # print(result[0])        # probability of prediction
  if result[0]>=0.5:
    print('Positive Review')
  else:
    print('Negative Review')
  # print(result)
  # print(result[0])   # +ve review value is more then 0.5
                     # -ve review value is less then 0.5
review_1 = "That movie was! really loved it and would great watch it again because it was amazingly great"
predict(review_1)

review_2 = "that movie really sucked. I hated it and wouldn't watch it again. Was one of the worst things I've ever watched"
predict(review_2)


Positive Review
Negative Review


In [None]:
review_1_0='very nice hotel i will go again'
predict(review_1_0)

Positive Review


In [None]:
review_0='very nice movie'
predict(review_0)

Positive Review


In [None]:
review_0='worst movie'
predict(review_0)

Negative Review


In [None]:
review_0='i will never see this movie'
predict(review_0)# negative review

Negative Review


In [None]:
review_3="An excellent movie that I enjoyed a lot"
predict(review_3)

Positive Review


In [None]:
review_5=" can one make such a horrible movie? there is no story, but the acting of hero was very good"
predict(review_5)

[[0.57212216]]
Positive Review


In [None]:
review_4 ="This is the worst movie I have ever seen oh shit"
predict(review_4)

Negative Review


In [None]:
review_5="the acting of hero was very good"
predict(review_5)

Positive Review
