# import lib

In [1]:
import os
import re
import ftfy
import nltk
import smtplib
import numpy as np
import pandas as pd
import pickle as pkl
from pathlib import Path
from nltk import PorterStemmer
from nltk.corpus import stopwords
from gensim.models import KeyedVectors
from keras.models import model_from_json
from keras.preprocessing.sequence import pad_sequences

Using TensorFlow backend.


# intialize the file name

In [2]:
input_dir = 'input'
model_dir = 'model'
model_str_file = 'model_structure.json'
model_weights_file = 'model_weights.h5'
cword_file = 'cword_dict.pkl'
tokenizer_file = 'tokens.pkl'
# tokenizer_file = 'tokenizer.pickle'

In [3]:
class_names = [
     'neg',
    'pos'   
]
print(class_names)

['neg', 'pos']


In [4]:
cList = pkl.load(open(os.path.join(input_dir,cword_file),'rb'))

In [5]:
c_re = re.compile('(%s)' % '|'.join(cList.keys()))

In [6]:
trained_tokenizer = pkl.load(open(os.path.join(model_dir,tokenizer_file),'rb'))

# convert the short word to normal

In [7]:
def expandContractions(text, c_re=c_re):
    def replace(match):
        return cList[match.group(0)]
    return c_re.sub(replace, text)

# clean the text using NLP

In [8]:
def clean_review(reviews):
    cleaned_review = []
    for review in reviews:
        review = str(review)
        if re.match("(\w+:\/\/\S+)", review) == None:
            review = ' '.join(re.sub("(@[A-Za-z]+)|(\#[A-Za-z]+)|(<Emoji:.*>)|(pic\.twitter\.com\/.*)", " ", review).split())
            review = ftfy.fix_text(review)
            review = expandContractions(review)
            review = ' '.join(re.sub("([^A-Za-z \t])", " ", review).split())
            stop_words = stopwords.words('english')
            word_tokens = nltk.word_tokenize(review) 
            filtered_sentence = [w for w in word_tokens if not w in stop_words]
            review = ' '.join(filtered_sentence)
#             review = PorterStemmer().stem(review)
            cleaned_review.append(review.lower())
    return cleaned_review

In [9]:
def load_trained_model(model_str_path, model_wt_path):
    f = Path(model_str_path)
    model_structure = f.read_text()
    trained_model = model_from_json(model_structure)
    trained_model.load_weights(model_wt_path)
    return trained_model

In [10]:
str_path = os.path.join(model_dir,model_str_file)
wt_path = os.path.join(model_dir,model_weights_file)

In [11]:
trained_model = load_trained_model(str_path, wt_path)

In [12]:
trained_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 140, 300)          6000000   
_________________________________________________________________
dropout_1 (Dropout)          (None, 140, 300)          0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 136, 32)           48032     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 68, 32)            0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 68, 32)            0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 64, 64)            10304     
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 32, 64)            0         
__________

# read data from folder

In [13]:
df_idf = pd.read_csv("input/Final_reviews.csv")

In [14]:
df_idf.head()

Unnamed: 0,Name,Score,Keyword,Time,Purchase,Text,votes,Helpful_Votes,Experience,product_name
0,Riswana Rinu,4,Four Stars,11-Jul-18,Verified Purchase,Nice one,,23.0,2,Geneva Platinum Silicone Strap Analogue Watch ...
1,Archana Lokhande,5,Royal blue color lovers....,9-Jan-19,Verified Purchase,Nice watch for womens. I love Royal blue color...,,20.0,2,Geneva Platinum Silicone Strap Analogue Watch ...
2,heena,1,Very bad,4-Dec-18,Verified Purchase,Dont buy it .stucks many times nt money worthy...,14 people found this helpful,15.0,1,Geneva Platinum Silicone Strap Analogue Watch ...
3,Atul Mishra,5,Five Stars,25-Apr-18,Verified Purchase,Nice watch & nice colour,9 people found this helpful,10.0,2,Geneva Platinum Silicone Strap Analogue Watch ...
4,Manaswini s,5,Five Stars,29-Mar-18,Verified Purchase,Super,5 people found this helpful,5.0,1,Geneva Platinum Silicone Strap Analogue Watch ...


In [15]:
df_idf.dropna(axis=0,inplace=True)

# take only the importend column 

In [16]:
df1 = df_idf[['Keyword','Score','Text','Purchase','Helpful_Votes','Experience','product_name']].copy()

In [17]:
df1.head()

Unnamed: 0,Keyword,Score,Text,Purchase,Helpful_Votes,Experience,product_name
2,Very bad,1,Dont buy it .stucks many times nt money worthy...,Verified Purchase,15.0,1,Geneva Platinum Silicone Strap Analogue Watch ...
3,Five Stars,5,Nice watch & nice colour,Verified Purchase,10.0,2,Geneva Platinum Silicone Strap Analogue Watch ...
4,Five Stars,5,Super,Verified Purchase,5.0,1,Geneva Platinum Silicone Strap Analogue Watch ...
5,Nice trendy watch for kids,5,Very nice and stylish watch for kids. The Blue...,Verified Purchase,12.0,4,Geneva Platinum Silicone Strap Analogue Watch ...
6,Wonderful Product,5,Its a nice one looks Cool on the wrist of my w...,Verified Purchase,2.0,1,Geneva Platinum Silicone Strap Analogue Watch ...


In [18]:
df1.to_csv('input/c_data.csv')

In [19]:
df1['Purchase'].unique()

array(['Verified Purchase'], dtype=object)

In [20]:
df1['Helpful_Votes'].unique()

array([ 15.,  10.,   5.,  12.,   2.,   8.,   4.,   3.,  52.,  39.,  20.,
         1.,   0.,  18., 214.,  24., 176., 196., 273., 109., 254., 190.,
        62., 265.,  63.,  41.,  67., 293.,  85., 150.,  40., 104., 116.,
       195., 137., 174., 141.,  31., 106.,  78., 160., 125.,  74.,  92.,
       135.,  44.,  73.,  57.,  14., 117., 102., 184., 147.,  88.,  94.,
       183.,  93.,  25.,  21.,  96.,  61.,  71.,  17.,  95., 193.,  59.,
       172.,  69.,  56.,  79.,  81., 123.,  89., 130.,  22., 111.,  34.,
       166.,  97.,  32., 127.,  86., 134.,  48., 191.,   9.,  54., 197.,
       101., 167., 159., 121.,  82.,  36.,  50.,  29., 145.,  28.,  64.,
       131., 148., 120.,  47.,  77., 110., 187.,  53., 114., 173., 138.,
        35.,  46.,  30., 115., 188., 142.,   6., 155.,  11.,  65.,  80.,
        13., 139., 152., 156., 204., 309., 355., 427., 424., 414., 258.,
       354., 432., 263., 245., 397., 259., 143., 208., 237., 242., 278.,
       251., 170., 218., 268., 267., 252., 339.,  3

# get the prodect name

In [21]:
df1['product_name'].unique()

array(['Geneva Platinum Silicone Strap Analogue Watch for Women & Girls - GP-379',
       "Geneva Platinum Analogue Gold Dial Women's Watch -GNV01",
       "IIk Collection Watches Stainless Steel Chain Day and Date Analogue Silver Dial Women's Watch",
       "NUBELA Analogue Prizam Glass Black Dial Girl's Watch",
       'Analogue Pink',
       "Sonata Analog Champagne Dial Women's Watch-NK87018YM01",
       "Sonata Analog White Dial Women's Watch -NJ8989PP03C",
       "Everyday Analog Black Dial Women's Watch -NK8085SM01",
       "Sonata SFAL Analog Silver Dial Women's Watch -NK8080SM01",
       "Timewear Analogue Round Beige Dial Women's Watch - 107Wdtl",
       "TIMEWEAR Analogue Brown Dial Women's Watch - 134Bdtl",
       "Sonata SFAL Analog Silver Dial Women's Watch -NK8080SM-3372",
       "Adamo Analog Blue Dial Women's Watch-9710SM01",
       "ADAMO Aritocrat Women's & Girl's Watch BG-335",
       "Imperious Analog Women's Watch 1021-1031",
       "IIK Collection Watches Analogue

In [22]:
len(df1['product_name'].unique())

21

In [23]:
p_name = "Geneva Platinum Analogue Rose Gold Dial Women's Watch- Gp-649"

# apply the condition for importend reviews

In [24]:
core_review = []

In [25]:
for i,j in df1.iterrows():
    if j['product_name'] == p_name:
        if j['Experience'] >= 2 and j['Helpful_Votes'] >=10 and j['Purchase'] == 'Verified Purchase':
            
            core_review.append(j['Text'])
#             p_review.append(j['Review'])
#             p_key.append(j['Keyword'])
            
#             n_review.append(j['Review'])
#             n_key.append(j['Keyword'])

In [26]:
cleaned_text = clean_review(core_review)
# n_cleaned_text = clean_review(n_review)

In [27]:
sequences_text_token = trained_tokenizer.texts_to_sequences(cleaned_text)
print(sequences_text_token)

[[20, 2, 485, 38, 1309, 30, 75, 50, 18, 1308, 391, 23, 2, 5, 31, 3], [13, 444, 318, 45], [13, 58, 511, 1, 52, 7, 1917], [7, 19, 3], [1, 24, 134, 29, 875, 23], [603, 1, 51, 85, 143, 1, 7, 408, 111, 5, 159, 90, 597, 128, 24, 87, 2, 544, 104], [25, 11, 242, 260, 17, 436, 831, 75, 874, 1919], [39, 20, 100, 24], [14, 49, 1920, 43, 289, 159, 3], [40, 45, 3, 210, 281, 24], [7, 11, 45, 6, 39, 68, 74, 670, 12], [7, 25, 25, 3, 85, 414, 175, 321, 228, 634, 7, 127, 3, 27, 304, 353], [6, 114, 77, 51, 25, 16, 547, 543, 15, 673, 379, 228], [86, 3, 354, 190, 235, 1173, 101, 539, 712, 1926], [13, 23, 185, 41, 28, 51, 30, 426, 894, 101, 57, 146, 22, 10, 88, 57, 500, 544, 407, 65], [7, 1, 3, 21, 9], [460, 45, 3], [1, 73, 213, 502], [283, 20, 2], [1, 14], [148, 3, 2718, 2719, 633, 214, 2720, 562, 68, 74, 11, 618, 54], [44, 20], [40, 3, 278, 123, 435, 1930, 348, 27, 1931, 724, 435], [48, 209, 774, 4, 3], [15, 61], [68, 74], [8, 136], [48, 3], [25], [120, 29, 2, 30, 2, 85, 586, 1933, 880, 254, 117, 1, 1175,

In [28]:
data = pad_sequences(sequences_text_token, maxlen=140)
print('Shape of data tensor:', data.shape)

Shape of data tensor: (145, 140)


In [59]:
p_review = []
n_review = []

In [60]:
def predict_result(data, model):
    result = model.predict(data)
    Y_pred = np.round(result.flatten())
    print(Y_pred)
    for i in range(len(Y_pred)):
        print("Text : ",cleaned_text[i])
        print("Result : ",class_names[int(Y_pred[i])])
        m = class_names[int(Y_pred[i])]
        if m == 'pos':
            p_review.append(cleaned_text[i])
        else:
            n_review.append(cleaned_text[i])

In [61]:
predict_result(data=data, model= trained_model)

[1. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 1. 1. 1. 1. 0. 1.
 0. 1. 1. 1. 0. 0. 0. 1. 0. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 0. 0. 1. 1.
 0. 1. 0. 0. 1. 0. 0. 1. 1. 1. 0. 0. 0. 1. 1. 0. 1. 1. 1. 1. 0. 1. 0. 1.
 1. 1. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 0. 1. 0. 1. 1. 0. 0. 1.
 0. 1. 0. 1. 1. 1. 0. 0. 1. 0. 1. 0. 1. 1. 1. 1. 0. 0. 1. 1. 1. 0. 1. 0.
 1. 0. 1. 1. 1. 1. 0. 1. 0. 1. 1. 0. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0. 1. 1.
 0.]
Text :  beautiful watch displayed picture catch colour n big dial adds royal look watch i happy product
Result :  pos
Text :  the strip made cheap
Result :  neg
Text :  the belt us good expected it folds
Result :  neg
Text :  it bad product
Result :  neg
Text :  good buy cost loved whole look
Result :  pos
Text :  lookwise good but chain material good it delicate so i returned better spend rs buy sonata watch rather buying
Result :  neg
Text :  ok looks hands r working free stuck n moving ahed
Result :  neg
Text :  its beautiful must buy
Result :  pos

# !!! Thank You !!!