In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import keras
from tensorflow import keras
from keras.models import load_model

In [2]:
df = pd.read_csv("IMDb_Unseen_Reviews.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,Movie,Review Text,IMDb Rating
0,0,Ex Machina,Intelligent Movie.\nThis movie is obviously al...,9
1,1,Ex Machina,Extraordinary and thought-provoking.\n'Ex mach...,10
2,2,Ex Machina,"Poor story, only reasonable otherwise.\nIf I h...",3
3,3,Ex Machina,Had Great Potential.\nThis movie is one of the...,1
4,4,Eternals,Amazing visuals and philosophical concepts!\n\...,10


In [3]:
import re
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
stopwords_list = set(stopwords.words('english'))


TAG_RE = re.compile(r'<[^>]+>')

def remove_tags(text):
    return TAG_RE.sub('', text)
    

class CustomPreprocess():

    def __init__(self):
        pass

    def preprocess_text(self,sen):
        sen = sen.lower()
        
        # Remove html tags
        sentence = remove_tags(sen)

        # Remove punctuations and numbers
        sentence = re.sub('[^a-zA-Z]', ' ', sentence)
        
        # Single character removal
        sentence = re.sub(r"\s+[a-zA-Z]\s+", ' ', sentence)

        # Remove multiple spaces
        sentence = re.sub(r'\s+', ' ', sentence)
        
        # Remove Stopwords
        pattern = re.compile(r'\b(' + r'|'.join(stopwords_list) + r')\b\s*')
        sentence = pattern.sub('', sentence)
        
        return sentence

[nltk_data] Error loading stopwords: <urlopen error [Errno 11001]
[nltk_data]     getaddrinfo failed>


In [4]:
custom = CustomPreprocess()
unseen_reviews = df['Review Text']

unseen_processed = []
for review in unseen_reviews:
    review = custom.preprocess_text(review)
    unseen_processed.append(review)

In [5]:
unseen_processed[:3]

['intelligent movie movie obviously allegorical fascinating tale ai mainly manipulation power wanting action spectacular cgi movie aimed people like think rather passively wait entertained themes ai also surveillance excellent points data us collected phone companies search engine companies commercial operating systems makers plot seems simple extremely clever protagonist playing games trying stay one step ahead one another movie perfectly consistent internal logic plays perfectly go expecting much however see people satisfied movie sets brilliantly therefore give least recent movies getting movie succeeds another recent movie ai transcendence think called failed interesting failure third movie ai spanish movie called eva also brilliant eva moving movie philosophical movies perfect different ways ai name movie ava seems nod title spanish movie aside nice stars appeared ex machina eva casting great course several aspects movie unrealistic often absurd allegorical movie acceptable movie 

In [6]:
# Loading
import io
from tensorflow.keras.preprocessing.text import tokenizer_from_json
import json


with open('tokenizer.json') as f:
    data = json.load(f)
    loaded_tokenizer = tokenizer_from_json(data)

In [7]:
from tensorflow.keras.preprocessing.text import Tokenizer
# tokenizer = Tokenizer()
# tokenizer.fit_on_texts(unseen_processed)
# unseen_tokenized  = tokenizer.texts_to_sequences(unseen_processed)
unseen_tokenized = loaded_tokenizer.texts_to_sequences(unseen_processed)


In [8]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
unseen_padded = pad_sequences(unseen_tokenized, padding='post', maxlen=100)


In [9]:
unseen_padded[:2]

array([[  879,  1740,   282,  1469,   150,   626,     3,  1434,  1291,
            3,    62,     1,   806,  4114,  5415,  1989,   191,   806,
           51,   854,    13,    88,    11,    16,  4012,     1,   589,
         1958,  1398,    93,   113,  1014,    24,   270,     1,  2759,
           62,  1014,     1, 14035, 25602,    25,   315,  1064,   115,
         1852,   736,     1, 14035,  1776,     1,   315,  3822,    18,
          383,  3822,   569,     1,  4051,    24,   284,   164,   624,
        14035,   261,     1, 12806,    83,  5440,   294,  1776,     1,
         1011,   213,   278,  1374,  1040, 14943,  3822,   897,    17,
          151,   312,  1264,     1,  1928,   285,  1602, 14420,     1,
         3172,     1,   122,   659,   140, 13082,  1645, 10209,  1615,
          937],
       [  121,   234,   352,  8844,  6752,   630,    20,  6507,    72,
         1400,  6730,  2885, 24074, 10547,  3633,  1645,    25, 11701,
         1335, 19425,    26,   281,  1574,  1143,   288, 1370

In [10]:
model = load_model("sentiment_classifier2.h5")



In [11]:
pred = model.predict(unseen_padded)
pred

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


array([[0.9630739 ],
       [0.99672776],
       [0.12598418],
       [0.4207723 ],
       [0.9160166 ],
       [0.01306788]], dtype=float32)

In [12]:
pred_df = df.drop("Unnamed: 0", axis = 1)
pred_df

Unnamed: 0,Movie,Review Text,IMDb Rating
0,Ex Machina,Intelligent Movie.\nThis movie is obviously al...,9
1,Ex Machina,Extraordinary and thought-provoking.\n'Ex mach...,10
2,Ex Machina,"Poor story, only reasonable otherwise.\nIf I h...",3
3,Ex Machina,Had Great Potential.\nThis movie is one of the...,1
4,Eternals,Amazing visuals and philosophical concepts!\n\...,10
5,Eternals,Worst MCU film ever\n\nFollowing the events of...,3


In [13]:
pred_df["Predicted Sentiment"] = np.round(pred*10,1)
pred_df

Unnamed: 0,Movie,Review Text,IMDb Rating,Predicted Sentiment
0,Ex Machina,Intelligent Movie.\nThis movie is obviously al...,9,9.6
1,Ex Machina,Extraordinary and thought-provoking.\n'Ex mach...,10,10.0
2,Ex Machina,"Poor story, only reasonable otherwise.\nIf I h...",3,1.3
3,Ex Machina,Had Great Potential.\nThis movie is one of the...,1,4.2
4,Eternals,Amazing visuals and philosophical concepts!\n\...,10,9.2
5,Eternals,Worst MCU film ever\n\nFollowing the events of...,3,0.1


In [14]:
pred_label = []
for i in list(pred_df["Predicted Sentiment"]):
    if i <= 5:
        pred_label.append("Negative")
    
    else:
        pred_label.append("Positive")

In [15]:
pred_df["Predicted Review Sentiment"] = pred_label
pred_df

Unnamed: 0,Movie,Review Text,IMDb Rating,Predicted Sentiment,Predicted Review Sentiment
0,Ex Machina,Intelligent Movie.\nThis movie is obviously al...,9,9.6,Positive
1,Ex Machina,Extraordinary and thought-provoking.\n'Ex mach...,10,10.0,Positive
2,Ex Machina,"Poor story, only reasonable otherwise.\nIf I h...",3,1.3,Negative
3,Ex Machina,Had Great Potential.\nThis movie is one of the...,1,4.2,Negative
4,Eternals,Amazing visuals and philosophical concepts!\n\...,10,9.2,Positive
5,Eternals,Worst MCU film ever\n\nFollowing the events of...,3,0.1,Negative
