In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras.models import load_model

In [2]:
df = pd.read_csv("https://raw.githubusercontent.com/HITMANFOURTY7/ImdbSentimentanalysis-/main/IMDb_Unseen_Reviews.csv") # Corrected URL for raw file content
df.head()

Unnamed: 0.1,Unnamed: 0,Movie,Review Text,IMDb Rating
0,0,Ex Machina,Intelligent Movie.\nThis movie is obviously al...,9
1,1,Ex Machina,Extraordinary and thought-provoking.\n'Ex mach...,10
2,2,Ex Machina,"Poor story, only reasonable otherwise.\nIf I h...",3
3,3,Ex Machina,Had Great Potential.\nThis movie is one of the...,1
4,4,Eternals,Amazing visuals and philosophical concepts!\n\...,10


In [4]:
import re
import nltk
# Download the 'stopwords' corpus if it hasn't been downloaded yet
try:
    nltk.data.find('corpora/stopwords')
except LookupError:
    nltk.download('stopwords')

from nltk.corpus import stopwords
stopwords_list = set(stopwords.words('english'))


TAG_RE = re.compile(r'<[^>]+>')

def remove_tags(text):
    return TAG_RE.sub('', text)


class CustomPreprocess():

    def __init__(self):
        pass

    def preprocess_text(self,sen):
        sen = sen.lower()

        # Remove html tags
        sentence = remove_tags(sen)

        # Remove punctuations and numbers
        sentence = re.sub('[^a-zA-Z]', ' ', sentence)

        # Single character removal
        sentence = re.sub(r"\s+[a-zA-Z]\s+", ' ', sentence)

        # Remove multiple spaces
        sentence = re.sub(r'\s+', ' ', sentence)

        # Remove Stopwords
        pattern = re.compile(r'\b(' + r'|'.join(stopwords_list) + r')\b\s*')
        sentence = pattern.sub('', sentence)

        return sentence

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [5]:
custom = CustomPreprocess()
unseen_reviews = df['Review Text']

unseen_processed = []
for review in unseen_reviews:
    review = custom.preprocess_text(review)
    unseen_processed.append(review)

In [6]:
unseen_processed[:3]

['intelligent movie movie obviously allegorical fascinating tale ai mainly manipulation power wanting action spectacular cgi movie aimed people like think rather passively wait entertained themes ai also surveillance excellent points data us collected phone companies search engine companies commercial operating systems makers plot seems simple extremely clever protagonist playing games trying stay one step ahead one another movie perfectly consistent internal logic plays perfectly go expecting much however see people satisfied movie sets brilliantly therefore give least recent movies getting movie succeeds another recent movie ai transcendence think called failed interesting failure third movie ai spanish movie called eva also brilliant eva moving movie philosophical movies perfect different ways ai name movie ava seems nod title spanish movie aside nice stars appeared ex machina eva casting great course several aspects movie unrealistic often absurd allegorical movie acceptable movie 

In [13]:
import io
import json
from tensorflow.keras.preprocessing.text import tokenizer_from_json
import requests

# Use requests to get the content from the URL
url = 'https://raw.githubusercontent.com/HITMANFOURTY7/ImdbSentimentanalysis-/main/tokenizer.json'
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Pass the raw JSON string from the response text directly to tokenizer_from_json
    loaded_tokenizer = tokenizer_from_json(response.text)
else:
    print(f"Failed to download tokenizer file. Status code: {response.status_code}")

In [14]:
from tensorflow.keras.preprocessing.text import Tokenizer
# tokenizer = Tokenizer()
# tokenizer.fit_on_texts(unseen_processed)
# unseen_tokenized  = tokenizer.texts_to_sequences(unseen_processed)
unseen_tokenized = loaded_tokenizer.texts_to_sequences(unseen_processed)


In [15]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
unseen_padded = pad_sequences(unseen_tokenized, padding='post', maxlen=100)


In [16]:
unseen_padded[:2]

array([[134, 135, 136, 137, 138, 139,  61, 140, 141,  61,  62,   5,  63,
        142, 143, 144, 145,  63, 146, 147,  66, 148, 150,  32, 152,   5,
        155, 157, 158, 159, 160,  69,  70, 162,   5, 164,  62,  69,   5,
         21, 166,  52,  72, 167,  73, 168, 169,   5,  21,  74,   5,  72,
         39,  55, 170,  39,  75,   5,  76,  70, 171, 172, 173,  21, 175,
          5, 176,  60, 177, 178,  74,   5, 179, 180, 181, 182, 183, 184,
         39, 185,  80, 186, 187, 188,   5, 189, 190, 191,  45,   5, 193,
          5, 194,  36,  53, 195, 196, 197, 198, 199],
       [  1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
          1,   1, 196,  52,   1,   1,   1, 315,   1,   1,   1,   1,   1,
          1,   1,   1,   1,   1,   1,   1,   1, 272,   1,   1,   1,   1,
          1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1, 299,
          1, 298,   1,   1,   1,   1,   1,   1,   1,   1, 309,   1,   1,
          1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   

In [18]:
import io
import json
from tensorflow.keras.preprocessing.text import tokenizer_from_json
import requests
from tensorflow.keras.models import load_model # Ensure load_model is imported here

# Use requests to get the content from the URL for the tokenizer
url_tokenizer = 'https://raw.githubusercontent.com/HITMANFOURTY7/ImdbSentimentanalysis-/main/tokenizer.json'
response_tokenizer = requests.get(url_tokenizer)

# Check if the request was successful
if response_tokenizer.status_code == 200:
    # Pass the raw JSON string from the response text directly to tokenizer_from_json
    loaded_tokenizer = tokenizer_from_json(response_tokenizer.text)
else:
    print(f"Failed to download tokenizer file. Status code: {response_tokenizer.status_code}")

# --- Added code to download the model file ---

# URL of the model file
url_model = "https://github.com/HITMANFOURTY7/ImdbSentimentanalysis-/raw/main/sentiment_classifier.h5" # Use raw.githubusercontent.com for direct file access

# Define a local path to save the model file
local_model_path = "sentiment_classifier.h5"

# Use requests to download the model file
response_model = requests.get(url_model)

# Check if the download was successful
if response_model.status_code == 200:
    # Write the content to a local file
    with open(local_model_path, 'wb') as f:
        f.write(response_model.content)
    print(f"Model file downloaded successfully to {local_model_path}")

    # Load the model from the local file path
    model = load_model(local_model_path)
    print("Model loaded successfully from local file.")

else:
    print(f"Failed to download model file. Status code: {response_model.status_code}")

# --- End of added code ---

Model file downloaded successfully to sentiment_classifier.h5




Model loaded successfully from local file.


In [19]:
pred = model.predict(unseen_padded)
pred

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 549ms/step


array([[0.97149366],
       [0.28989622],
       [0.06578502],
       [0.37367192],
       [0.02111815],
       [0.37367988]], dtype=float32)

In [20]:
pred_df = df.drop("Unnamed: 0", axis = 1)
pred_df

Unnamed: 0,Movie,Review Text,IMDb Rating
0,Ex Machina,Intelligent Movie.\nThis movie is obviously al...,9
1,Ex Machina,Extraordinary and thought-provoking.\n'Ex mach...,10
2,Ex Machina,"Poor story, only reasonable otherwise.\nIf I h...",3
3,Ex Machina,Had Great Potential.\nThis movie is one of the...,1
4,Eternals,Amazing visuals and philosophical concepts!\n\...,10
5,Eternals,Worst MCU film ever\n\nFollowing the events of...,3


In [21]:
pred_df["Predicted Sentiment"] = np.round(pred*10,1)
pred_df

Unnamed: 0,Movie,Review Text,IMDb Rating,Predicted Sentiment
0,Ex Machina,Intelligent Movie.\nThis movie is obviously al...,9,9.7
1,Ex Machina,Extraordinary and thought-provoking.\n'Ex mach...,10,2.9
2,Ex Machina,"Poor story, only reasonable otherwise.\nIf I h...",3,0.7
3,Ex Machina,Had Great Potential.\nThis movie is one of the...,1,3.7
4,Eternals,Amazing visuals and philosophical concepts!\n\...,10,0.2
5,Eternals,Worst MCU film ever\n\nFollowing the events of...,3,3.7


In [22]:
pred_label = []
for i in list(pred_df["Predicted Sentiment"]):
    if i <= 5:
        pred_label.append("Negative")

    else:
        pred_label.append("Positive")

In [23]:
pred_df["Predicted Review Sentiment"] = pred_label
pred_df

Unnamed: 0,Movie,Review Text,IMDb Rating,Predicted Sentiment,Predicted Review Sentiment
0,Ex Machina,Intelligent Movie.\nThis movie is obviously al...,9,9.7,Positive
1,Ex Machina,Extraordinary and thought-provoking.\n'Ex mach...,10,2.9,Negative
2,Ex Machina,"Poor story, only reasonable otherwise.\nIf I h...",3,0.7,Negative
3,Ex Machina,Had Great Potential.\nThis movie is one of the...,1,3.7,Negative
4,Eternals,Amazing visuals and philosophical concepts!\n\...,10,0.2,Negative
5,Eternals,Worst MCU film ever\n\nFollowing the events of...,3,3.7,Negative
