In [1]:

import numpy as np
import pandas as pd
from keras.preprocessing.text import one_hot
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.callbacks import Callback
from sklearn.model_selection import train_test_split
from keras.layers import Dense,Embedding,LSTM




In [2]:
df = pd.read_csv("IMDB_Dataset.csv")
df.head()

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive


In [3]:
df["sentiment"].replace({"positive": 1, "negative": 0}, inplace=True)

In [4]:
x = np.array(df["review"].values)
y = np.array(df["sentiment"].values)

In [5]:
x_filtered = []

for review in x:

    #lowercasing the sentence
    review = review.lower()

    # removing punctuations from sentence
    for i in review:
        punc = '''  !()-[]{};:'"\,<>./?@#$%^&*_~  '''
        if i in punc :
            review = review.replace(i, " ")

    x_filtered.append(review)

In [6]:
# One-Hot Encoding each sentence
vocalbulary_size = 5000
onehot_encoded = [one_hot(review,vocalbulary_size) for review in x_filtered]

In [7]:
# Padding each encoded sentence to have a max_length=500
max_length=500
x_padded = pad_sequences(onehot_encoded,max_length,padding="post")

In [8]:
x_train,x_test,y_train,y_test = train_test_split(x_padded,y,test_size=0.2)

In [9]:
model = Sequential()
embeded_vector_size = 35
model.add(Embedding(vocalbulary_size,embeded_vector_size,input_length=max_length))
model.add(LSTM(100))
model.add(Dense(1,activation="sigmoid"))

model.compile(optimizer='adam', loss='binary_crossentropy',metrics=["accuracy"])

print(model.summary())
print("Model Creation Completed !")



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 500, 35)           175000    
                                                                 
 lstm (LSTM)                 (None, 100)               54400     
                                                                 
 dense (Dense)               (None, 1)                 101       
                                                                 
Total params: 229501 (896.49 KB)
Trainable params: 229501 (896.49 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
Model Creation Completed !


In [10]:
# Custom Keras callback to stop training when certain accuracy is achieved.
class MyThresholdCallback(Callback):
    def __init__(self, threshold):
        super(MyThresholdCallback, self).__init__()
        self.threshold = threshold

    def on_epoch_end(self, epoch, logs=None):
        val_acc = logs["val_accuracy"]
        if val_acc >= self.threshold:
            self.model.stop_training = True
            model_name = ("IMDB_sentiment_analysis_"+str(val_acc))
            model.save(model_name)

# Model converges at 0.87 accuracy with current hyperparameters.
model.fit(x_train,y_train,epochs=100,validation_data=(x_test,y_test),callbacks=[MyThresholdCallback(threshold=0.87)])

# model.save("IMDB_sentiment_analysis")

print("Model Training Completed !")

Epoch 1/100



KeyboardInterrupt: 

In [None]:
model.save("IMDB_sentiment_analysis")

In [None]:

from keras.models import load_model

trained_model = load_model("/content/IMDB_sentiment_analysis_0.8787999749183655")
predicted = trained_model.predict(x_test)[2]

sentiment = 1 if predicted > 0.6 else 0

print("PREDICTED : ",sentiment)
print("ACTUAL : ",y_test[2])


In [None]:
def get_sentiment(sentence: str):
    if isinstance(sentence, (str)):
        pass
    else:
        raise Exception("Input needs to be of type 'str' ")

    # filtering the sentence
    sentence = sentence.lower()

    punc = '''!()-[]{};:'"\, <>./?@#$%^&*_~'''

    for word in sentence:
        if word in punc:
            sentence = sentence.replace(word, " ")

    # Loading the saved trained model.
    from keras.models import load_model

    trained_model = load_model("/content/IMDB_sentiment_analysis_0.8787999749183655")

    predicted = trained_model.predict(x_test)[2]
    sentiment = 1 if predicted > 0.6 else 0

    if sentiment == 1:
        print("Positive")
    else:
        print("Negative")

    return sentiment


get_sentiment("That movie was really good!")