In [1]:
import pandas as pd
from keras.preprocessing import sequence, text
from keras.models import Sequential, model_from_json
from keras.layers import Dense, Embedding, Dropout, Bidirectional, LSTM
from keras.datasets import imdb
import numpy as np
from keras.preprocessing.text import Tokenizer, text_to_word_sequence

Using TensorFlow backend.


In [2]:
np.random.seed(1337)
max_features = 25000
maxlen = 200  # cut texts after this number of words (among top max_features most common words)
batch_size = 64

(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features)

X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
X_test = sequence.pad_sequences(X_test, maxlen=maxlen)

y_train = np.array(y_train)
y_test = np.array(y_test)

model = Sequential()
model.add(Embedding(max_features,128,input_length=maxlen))
model.add(Bidirectional(LSTM(64)))
model.add(Dropout(0.3))
model.add(Dense(1,activation='sigmoid'))

model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])

tokenizer = Tokenizer(nb_words=2500, filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',lower=True,split=' ')



In [3]:
model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=7, validation_data=[X_test, y_test])

  """Entry point for launching an IPython kernel.


Train on 25000 samples, validate on 25000 samples
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


<keras.callbacks.History at 0xe5f32631d0>

In [4]:
loss,accuracy = model.evaluate(X_test,y_test)
print('loss:',loss)
print('acc:',accuracy)

#serialize model to JSON
model_json = model.to_json()
with open("model.json","w") as json_file:
    json_file.write(model_json)

#serialize weights to HDF5
model.save_weights("model.h5")
print("saved model to disk")

loss: 0.5458828162193299
acc: 0.83852
saved model to disk


In [5]:
#predicting for new datasets
from keras.preprocessing import text
from keras.models import model_from_json

json_file = open('model.json','r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)

#load weights into new model
loaded_model.load_weights("model.h5")

#compile and evaluate loaded model
loaded_model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

tokenizer = Tokenizer(nb_words=2500, filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',lower=True,split=' ')



In [6]:
def textToNumeric(text):
    #tokenizer.fit_on_texts(text)
    numericText = tokenizer.texts_to_sequences(text)
    paddedInput = sequence.pad_sequences(numericText,maxlen=maxlen)
    
    return paddedInput

In [7]:
#this has to be loaded for new text conversion into vectors
word_index = imdb.get_word_index()

In [8]:
# define the document
doc1 = "Hi I have ordered two medium pizza and two numbers of chicken wings. I have made online payment through my debit card. My order No. is 232 dated 27.06.2018 amounting to Rs. 1432. Payment has been done and then I received a call from protected telling me that chicken wings are out of stock. The payment for the same will be send back in 7 to 8 days.The main issue is that f it was not in stock then why it was not displayed. Now after payment it is being said that it is out of stock. Will you let me know how would you compensate for my grievance all My friends and Me are totally frustrated"
# tokenize the document
tokenizedText= text_to_word_sequence(doc1)
print(tokenizedText)

['hi', 'i', 'have', 'ordered', 'two', 'medium', 'pizza', 'and', 'two', 'numbers', 'of', 'chicken', 'wings', 'i', 'have', 'made', 'online', 'payment', 'through', 'my', 'debit', 'card', 'my', 'order', 'no', 'is', '232', 'dated', '27', '06', '2018', 'amounting', 'to', 'rs', '1432', 'payment', 'has', 'been', 'done', 'and', 'then', 'i', 'received', 'a', 'call', 'from', 'protected', 'telling', 'me', 'that', 'chicken', 'wings', 'are', 'out', 'of', 'stock', 'the', 'payment', 'for', 'the', 'same', 'will', 'be', 'send', 'back', 'in', '7', 'to', '8', 'days', 'the', 'main', 'issue', 'is', 'that', 'f', 'it', 'was', 'not', 'in', 'stock', 'then', 'why', 'it', 'was', 'not', 'displayed', 'now', 'after', 'payment', 'it', 'is', 'being', 'said', 'that', 'it', 'is', 'out', 'of', 'stock', 'will', 'you', 'let', 'me', 'know', 'how', 'would', 'you', 'compensate', 'for', 'my', 'grievance', 'all', 'my', 'friends', 'and', 'me', 'are', 'totally', 'frustrated']


In [9]:
numericText = np.array([word_index[word] if (word in word_index) and (word_index[word]<25000) else 0 for word in tokenizedText])
numericText

array([ 6593,    10,    25,  5172,   104,  3446,  7754,     2,   104,
        1393,     4,  5142,  5831,    10,    25,    90,  4689, 11334,
         140,    58,     0,  3152,    58,   658,    54,     6,     0,
        1964,  7508, 19839,     0,     0,     5,     0,     0, 11334,
          44,    74,   221,     2,    92,    10,  1987,     3,   680,
          36, 14844,   976,    69,    12,  5142,  5831,    23,    43,
           4,  2050,     1, 11334,    15,     1,   169,    77,    27,
        2219,   142,     8,   690,     5,   706,   501,     1,   290,
        1831,     6,    12,  1206,     9,    13,    21,     8,  2050,
          92,   135,     9,    13,    21,  4339,   147,   100, 11334,
           9,     6,   109,   298,    12,     9,     6,    43,     4,
        2050,    77,    22,   384,    69,   121,    86,    59,    22,
        7965,    15,    58,     0,    29,    58,   366,     2,    69,
          23,   481,  3568])

In [10]:
numeric_inp = sequence.pad_sequences([numericText],maxlen=maxlen)
numeric_inp

array([[    0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,  6593,
           10,    25,  5172,   104,  3446,  7754,     2,   104,  1393,
            4,  5142,  5831,    10,    25,    90,  4689, 11334,   140,
           58,     0,  3152,    58,   658,    54,     6,     0,  1964,
         7508, 19839,     0,     0,     5,     0,     0, 11334,    44,
           74,   221,     2,    92,    10,  1987,     3,   680,    36,
      

In [11]:
out = loaded_model.predict(numeric_inp)
print(out)

[[0.9746414]]


In [12]:
def sentiment(text):
    tokenizedText= text_to_word_sequence(text)
    numericText = np.array([word_index[word] if (word in word_index) and (word_index[word]<25000) else 0 for word in tokenizedText])
    numeric_inp = sequence.pad_sequences([numericText],maxlen=maxlen)
    out = loaded_model.predict(numeric_inp)
    return out

In [13]:
doc2 = "Hi I have ordered two medium pizza and two numbers of chicken wings. I have made online payment through my debit card. My order No. is 232 dated 27.06.2018 amounting to Rs. 1432. Payment has been done and then I received a call from protected telling me that chicken wings are out of stock. The payment for the same will be send back in 7 to 8 days.The main issue is that f it was not in stock then why it was not displayed. Now after payment it is being said that it is out of stock. Will you let me know how would you compensate for my grievance all My friends and Me are totally frustrated"
print(sentiment(doc2))

[[0.9746414]]


In [14]:
print(sentiment("i like it"))

[[0.87709075]]


In [None]:
7