In [1]:
pip install keras



In [2]:
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import Callback
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Dense,Embedding,LSTM

print("All Dependencies Installed !")

All Dependencies Installed !


In [3]:
df = pd.read_csv(r"/content/IMDB Dataset.csv")
df["sentiment"].replace({"positive": 1, "negative": 0}, inplace=True)

x = np.array(df["review"].values)
y = np.array(df["sentiment"].values)

x_filtered = []

for review in x:

    #lowercasing the sentence
    review = review.lower()

    # removing punctuations from sentence
    for i in review:
        punc = '''  !()-[]{};:'"\,<>./?@#$%^&*_~  '''
        if i in punc :
            review = review.replace(i, " ")

    x_filtered.append(review)

print("Data Preparation Stage-1 completed !")

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["sentiment"].replace({"positive": 1, "negative": 0}, inplace=True)
  df["sentiment"].replace({"positive": 1, "negative": 0}, inplace=True)


Data Preparation Stage-1 completed !


In [4]:
# One-Hot Encoding each sentence
vocalbulary_size = 5000
onehot_encoded = [one_hot(review,vocalbulary_size) for review in x_filtered]

# Padding each encoded sentence to have a max_length=500
max_length=500
x_padded = pad_sequences(onehot_encoded,max_length,padding="post")

x_train,x_test,y_train,y_test = train_test_split(x_padded,y,test_size=0.2)

print("Data Preparation Stage-2 completed !")

Data Preparation Stage-2 completed !


In [5]:
model = Sequential()
embeded_vector_size = 35
model.add(Embedding(vocalbulary_size,embeded_vector_size,input_length=max_length))
model.add(LSTM(100))
model.add(Dense(1,activation="sigmoid"))

model.compile(optimizer='adam', loss='binary_crossentropy',metrics=["accuracy"])

print(model.summary())
print("Model Creation Completed !")




None
Model Creation Completed !


In [6]:

# Model converges at 0.87 accuracy with current hyperparameters.
model.fit(x_train,y_train,epochs=50,validation_data=(x_test,y_test))

# model.save("IMDB_sentiment_analysis")

print("Model Training Completed !")

Epoch 1/50
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 21ms/step - accuracy: 0.4995 - loss: 0.6934 - val_accuracy: 0.4977 - val_loss: 0.6907
Epoch 2/50
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 21ms/step - accuracy: 0.5168 - loss: 0.6891 - val_accuracy: 0.5010 - val_loss: 0.6893
Epoch 3/50
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 20ms/step - accuracy: 0.5273 - loss: 0.6809 - val_accuracy: 0.5242 - val_loss: 0.6887
Epoch 4/50
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 20ms/step - accuracy: 0.5359 - loss: 0.6673 - val_accuracy: 0.5244 - val_loss: 0.6923
Epoch 5/50
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 20ms/step - accuracy: 0.5331 - loss: 0.6567 - val_accuracy: 0.5090 - val_loss: 0.6955
Epoch 6/50
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 20ms/step - accuracy: 0.5370 - loss: 0.6485 - val_accuracy: 0.5101 - val_loss: 0.7111
Epoc

In [7]:
trained_model = model
predicted = trained_model.predict(x_test)[2]

sentiment = 1 if predicted > 0.6 else 0

print("PREDICTED : ",sentiment)
print("ACTUAL : ",y_test[2])





[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step
PREDICTED :  1
ACTUAL :  1


In [9]:
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Assume these are predefined
trained_model = model  # Replace with your trained model
vocabulary_size = 5000  # Vocabulary size for one-hot encoding
max_length = 500       # Maximum length for padding

def get_sentiment(sentence: str):
    if not isinstance(sentence, str):
        raise Exception("Input needs to be of type 'str'")

    # Lowercasing and punctuation removal
    sentence = sentence.lower()
    punc = '''!()-[]{};:'"\,<>./?@#$%^&*_~'''
    for char in sentence:
        if char in punc:
            sentence = sentence.replace(char, " ")

    # One-hot encoding the sentence
    onehot_encoded = one_hot(sentence, vocabulary_size)

    # Padding the encoded sentence to match the required input length
    padded_sequence = pad_sequences([onehot_encoded], maxlen=max_length, padding="post")

    # Predict sentiment
    if trained_model is None:
        raise Exception("Trained model is not defined.")
    predicted = trained_model.predict(padded_sequence)[0][0]

    # Determine sentiment based on the prediction threshold
    sentiment = 1 if predicted > 0.6 else 0

    if sentiment == 1:
        print("Positive")
    else:
        print("Negative")

    return sentiment

# Example usage
# Ensure you load your trained model before calling this function
# trained_model = load_model("path_to_your_model.h5")
get_sentiment("That movie was really good!")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
Positive


1

In [10]:
get_sentiment("Terrible plot with no direction. The acting was mediocre at best. A complete waste of time.")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
Negative


0