In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import numpy as np
import pandas as pd
import string
import re
import nltk
import seaborn as sns
import pickle
import joblib
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from keras.models import load_model
from keras.preprocessing.sequence import pad_sequences

In [3]:
# preprocessing the data
def cleaning(text):
    text = text.lower()
    pattern = re.compile('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
    clean = re.compile('<.*?>')
    text = re.sub(clean,'',text)
    text = pattern.sub('', text)
    text = re.sub(r"i'm", "i am", text)
    text = re.sub(r"he's", "he is", text)
    text = re.sub(r"she's", "she is", text)
    text = re.sub(r"that's", "that is", text)        
    text = re.sub(r"what's", "what is", text)
    text = re.sub(r"where's", "where is", text) 
    text = re.sub(r"\'ll", " will", text)  
    text = re.sub(r"\'ve", " have", text)  
    text = re.sub(r"\'re", " are", text)
    text = re.sub(r"\'d", " would", text)
    text = re.sub(r"\'ve", " have", text)
    text = re.sub(r"won't", "will not", text)
    text = re.sub(r"don't", "do not", text)
    text = re.sub(r"did't", "did not", text)
    text = re.sub(r"can't", "can not", text)
    text = re.sub(r"it's", "it is", text)
    text = re.sub(r"couldn't", "could not", text)
    text = re.sub(r"have't", "have not", text)

    text = re.sub(r"[,.\"!@#$%^&*(){}?/;`~:<>+=-]", "", text)
    tokens = word_tokenize(text)
    table = str.maketrans('', '', string.punctuation)
    stripped = [w.translate(table) for w in tokens]
    words = [word for word in stripped if word.isalpha()]
    text = ' '.join(words)
    return text

In [5]:
### the string for testing
s = "I recently stayed at the Hyatt Regency in downtown Chicago. My stay here was wonderful thanks to such a friendly staff. I needed to do some research for work and the free Wi-Fi made this very easy. The bed was so nice and soft! Even the pillows were fluffy. I was able to put my valuables in the in-room safe. The local restaurants were amazing. I will recommend this to my friends and I am staying here again in the future."
s = [cleaning(s)]          ## cleaned text is returned to a list having a string
print(s)

['i recently stayed at the hyatt regency in downtown chicago my stay here was wonderful thanks to such a friendly staff i needed to do some research for work and the free wifi made this very easy the bed was so nice and soft even the pillows were fluffy i was able to put my valuables in the inroom safe the local restaurants were amazing i will recommend this to my friends and i am staying here again in the future']


In [6]:
### loading the tokenizer which tokenizes the sentences.
tokenizer = joblib.load("tokenizer.pkl")

In [7]:
#### convert it into sequences
s = tokenizer.texts_to_sequences(s)
print(s)

[[5, 206, 53, 16, 2, 357, 583, 8, 172, 25, 17, 31, 76, 7, 170, 924, 4, 277, 6, 110, 42, 5, 362, 4, 81, 93, 2594, 12, 184, 3, 2, 174, 352, 105, 22, 27, 464, 2, 78, 7, 41, 75, 3, 1014, 79, 2, 386, 23, 1728, 5, 7, 267, 4, 427, 17, 3548, 8, 2, 845, 863, 2, 909, 292, 23, 234, 5, 55, 115, 22, 4, 17, 364, 3, 5, 108, 118, 76, 63, 8, 2, 765]]


In [8]:
## pad the sequence

s = pad_sequences(s,maxlen=80,padding='post')
print(s.shape)

(1, 80)


In [9]:
### load the model
model = load_model("reviews.h5")

In [10]:
## predict the text
res = model.predict(s,batch_size=1)   ### batch size=1 as there is only one test sentence
print(res[0])

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
[0.14823978 0.8517602 ]


In [12]:
### 0: fake and 1: genuine

if res[0][0]>res[0][1]:
    print("The review is fake.")
else:
    print("The review is genuine.")

The review is genuine.
