# Sentiment Classification

The Notebook presents a simple sentiment classification system using Keras LSTM network.
## Importing necessary libraries

In [1]:
import ast
import helper
import re
import string
import numpy as np
import pickle

## Reading data and creating pairs of sentence and sentiment

In [2]:
f = open("imdb_labelled.txt")
data1 = f.readlines()
f.close()

f = open("amazon_cells_labelled.txt")
data2 = f.readlines()
f.close()

f = open("yelp_labelled.txt")
data3 = f.readlines()
f.close()

pairs = []
for one in data1:
    pairs.append(one.strip("\n").lower().split("\t"))
    
for one in data2:
    pairs.append(one.strip("\n").lower().split("\t"))
    
for one in data3:
    pairs.append(one.strip("\n").lower().split("\t"))

## Loading pretrained word embeddings

In [3]:
f = open("glove.6B.50d.txt", encoding='utf-8')
emb_dict = dict()
for line in f:
    values = line.split()
    word = values[0]
    coef = values[1:]
    emb_dict[word] = coef
f.close()

## Cleaning text

In [4]:
translation = str.maketrans("","", string.punctuation)
new_pairs = []
for val in pairs:
    sent = val[0]
    sent = re.sub(r"([\w/'+$\s-]+|[^\w/'+$\s-])\s*", r"\1 ", sent)
    sent = sent.replace('-', ' ')
    sent = sent.replace('/', ' ')
    sent = sent.translate(translation)
    new_pairs.append([sent, val[1]])

## Geting set of known words accordig to pretrained embeddings

In [5]:
vocab = set(['eos', 'pad'])
for pair in new_pairs:
    sent = pair[0]
    for word in sent.split():
        vocab.add(word) 

## Getting unknown words

In [6]:
unk = -1
unk_set = set()
for word in vocab:
    if emb_dict.get(word, -1) == -1:
        unk += 1
        unk_set.add(word)
print(unk)

208


## Replacing unknown words with ```unk``` token

In [7]:
n = []
for pair in new_pairs:
    sent = pair[0]
    sent = sent.split()
    for i, word in enumerate(sent):
        if(word in unk_set):
            sent[i] = "unk"
    new_sent = ' '.join(sent)
    n.append([new_sent, pair[1]])

## Appending ```eos``` token at end of sentencs

In [8]:
vocab = set(['eos', 'pad'])
for pair in n:
    sent = pair[0]
    for word in sent.split():
        vocab.add(word)   

## creating necessary dictionaries

In [9]:
word_to_index = dict({'pad' : 0})
print(word_to_index)
for word in vocab:
    if word != 'pad':
        word_to_index[word] = len(word_to_index)

index_to_word = dict(zip(word_to_index.values(), word_to_index.keys()))

{'pad': 0}


In [10]:
useful_emb = np.zeros((len(word_to_index), 50))
for word, index in word_to_index.items():
    useful_emb[index] = emb_dict[word]

## Creating indexed data and sentiment pair

In [11]:
indexed_pairs = []
for pair in n:
    sent = pair[0].split()
    index_sent = []
    for word in sent:
        index_sent.append(word_to_index[word])
    index_sent.append(word_to_index['eos'])
    indexed_pairs.append([index_sent, ast.literal_eval(pair[1])])

## Dumping important data structures

In [12]:
f = open("important_data.obj", "wb")
pickle.dump([vocab, word_to_index, useful_emb], f, pickle.HIGHEST_PROTOCOL)
f.close()
del(emb_dict)

## Spliting data in train and test set

In [13]:
split_point = int(len(indexed_pairs) * 0.2)
train_data = indexed_pairs[:-split_point]
valid_data = indexed_pairs[-split_point:]

## Creating batch generators for train and test sets

In [14]:
train = helper.batch_generator(train_data, 20)
valid = helper.batch_generator(valid_data, 20)

## Creating Model

In [15]:
from keras.models import Sequential
from keras.layers import Embedding, Dense, LSTM, Dropout

Using TensorFlow backend.


In [16]:
def base_model():
    m = Sequential()
    m.add(Embedding(len(word_to_index),50,weights=[useful_emb], input_shape=[None,]))
    m.add(LSTM(512, return_sequences = True))
    m.add(Dropout(0.5))
    m.add(LSTM(512))
    m.add(Dense(1, activation='sigmoid'))
    return m

In [17]:
model = base_model()
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, None, 50)          251500    
_________________________________________________________________
lstm_1 (LSTM)                (None, None, 512)         1153024   
_________________________________________________________________
dropout_1 (Dropout)          (None, None, 512)         0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 512)               2099200   
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 513       
Total params: 3,504,237
Trainable params: 3,504,237
Non-trainable params: 0
_________________________________________________________________


In [18]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])

In [19]:
from keras.callbacks import ModelCheckpoint
num_epochs = 100
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])
checkpointer = ModelCheckpoint(filepath='model-{epoch:02d}.hdf5', verbose=1)
model.fit_generator(train.batch(), len(train_data)//20, num_epochs, validation_data = valid.batch(),
                    validation_steps = len(valid_data)//20, callbacks = [checkpointer])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100


Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100


Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100


Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x1650b409dd8>

## Loading Trained model
The following section Loads the trained model and lets you predict the sentiment of your sentences.

In [20]:
from keras.models import load_model
import numpy as np
import re
import string
import pickle

f = open("important_data.obj", "rb")
vocab, word_to_index, useful_emb = pickle.load(f)
f.close()
m = load_model(r"model-99.hdf5")
translation = str.maketrans("","", string.punctuation)


In [22]:
while True:
    choice = int(input("Enter 0 to stop else Enter 1: "))
    if choice == 0:
        break
    sent = input("Enter your sentence: ")
    sent = re.sub(r"([\w/'+$\s-]+|[^\w/'+$\s-])\s*", r"\1 ", sent).lower()
    sent = sent.replace('-', ' ')
    sent = sent.replace('/', ' ')
    sent = sent.translate(translation)
    sent = sent.split()
    for i, word in enumerate(sent):
        if(word not in vocab):
            sent[i] = "unk"

    index_sent = []
    for word in sent:
        index_sent.append(word_to_index[word])
    index_sent.append(word_to_index['eos'])
    index_sent = np.reshape(index_sent, (1, len(index_sent)))
    prediction = m.predict(index_sent)
    if prediction[0][0] >= 0.5:
        print("Sentiment: Positive")
    else:
        print("Sentiment: Negative")

Enter 0 to stop else Enter 1: 1
Enter your sentence: I don't like this.
Sentiment: Negative
Enter 0 to stop else Enter 1: 1
Enter your sentence: Just love it, EXCELLENT MOVIE!!!
Sentiment: Positive
Enter 0 to stop else Enter 1: 1
Enter your sentence: Shit movie.
Sentiment: Negative
Enter 0 to stop else Enter 1: 1
Enter your sentence: I don't recommend it to anyone at all.
Sentiment: Negative
Enter 0 to stop else Enter 1: 1
Enter your sentence: EXCELLENT BUY!!!
Sentiment: Positive
Enter 0 to stop else Enter 1: 0
