In [17]:
#!pip install pandas numpy 
!pip install contractions

  pid, fd = os.forkpty()




In [18]:
import pandas as pd
import re
from nltk.corpus import stopwords
import contractions
import numpy as np
import random
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Bidirectional, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras import regularizers
np.random.seed(42)
random.seed(42)
tf.random.set_seed(42)

In [19]:
#Showing loaded data
file_path = '/kaggle/input/imdb-dataset-of-50k-movie-reviews/IMDB Dataset.csv'
imdb_data = pd.read_csv(file_path)
print(imdb_data.head(10))

                                              review sentiment
0  One of the other reviewers has mentioned that ...  positive
1  A wonderful little production. <br /><br />The...  positive
2  I thought this was a wonderful way to spend ti...  positive
3  Basically there's a family where a little boy ...  negative
4  Petter Mattei's "Love in the Time of Money" is...  positive
5  Probably my all-time favorite movie, a story o...  positive
6  I sure would like to see a resurrection of a u...  positive
7  This show was an amazing, fresh & innovative i...  negative
8  Encouraged by the positive comments about this...  negative
9  If you like original gut wrenching laughter yo...  positive


In [20]:
#Data cleaning
# Load stopwords
stop_words = set(stopwords.words('english'))
imdb_data['sentiment'] = imdb_data['sentiment'].map({'positive': 1, 'negative': 0})
imdb_data['sentiment'] = imdb_data['sentiment'].astype(int)

def clean_text(text):
    text = re.sub('<br />', ' ', text)  # Remove HTML tags
    text = re.sub(r'[^a-zA-Z\s]', '', text)  # Keep only letters
    text = text.lower()  # Convert to lowercase
    text = contractions.fix(text)  # Expand contractions like "don't" to "do not"
    # Remove stopwords
    text = ' '.join([word for word in text.split() if word not in stop_words])
    return text

imdb_data['review'] = imdb_data['review'].apply(clean_text)
imdb_data.head(10)

Unnamed: 0,review,sentiment
0,one reviewers mentioned watching oz episode ho...,1
1,wonderful little production filming technique ...,1
2,thought wonderful way spend time hot summer we...,1
3,basically family little boy jake thinks zombie...,0
4,petter matteis love time money visually stunni...,1
5,probably alltime favorite movie story selfless...,1
6,sure would like see resurrection dated seahunt...,1
7,show amazing fresh innovative idea first aired...,0
8,encouraged positive comments film looking forw...,0
9,like original gut wrenching laughter like movi...,1


In [21]:
#Splitting the data
X = imdb_data['review']
y = imdb_data['sentiment']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

print(f"Training set: {len(X_train)} reviews")
print(f"Testing set: {len(X_test)} reviews")
print(f"Training set: {len(y_train)} sentiments")
print(f"Testing set: {len(y_test)} sentiments")

#For finding maxlen
#review_lengths = [len(seq) for seq in X_train_seq]
#plt.hist(review_lengths, bins=100)
#plt.show()

Training set: 42500 reviews
Testing set: 7500 reviews
Training set: 42500 sentiments
Testing set: 7500 sentiments


In [22]:
#Text Vectorization
tokenizer = Tokenizer(num_words=30000)  # Use the top 30,000 words
tokenizer.fit_on_texts(X_train)

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

maxlen = 150  # Maximum review length , Check from the graph
X_train_pad = pad_sequences(X_train_seq, maxlen=maxlen)
X_test_pad = pad_sequences(X_test_seq, maxlen=maxlen)

print(f"Padded training data shape: {X_train_pad.shape}")
print(f"Padded testing data shape: {X_test_pad.shape}")

Padded training data shape: (42500, 150)
Padded testing data shape: (7500, 150)


<font size = 5 color = 'red'>Building and training an RNN model using a Bidirectional LSTM layer for sentiment analysis</font>

In [23]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Bidirectional, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers
import numpy as np

model = Sequential([
    Embedding(input_dim=10000, output_dim=200, input_length=100),  # Matches training configuration
    Dropout(0.15),
    Bidirectional(LSTM(128, dropout=0.15, recurrent_dropout=0.15, kernel_regularizer=regularizers.l2(0.01))),
    Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
    Dropout(0.15),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer=Adam(learning_rate=0.0003), loss='binary_crossentropy', metrics=['accuracy'])
model.build(input_shape=(None, 100))  # None for batch size, 100 for input length

weights_path = "/kaggle/input/sentiment/tensorflow2/default/1/model_4.weights.h5"
try:
    model.load_weights(weights_path)
    print("Model weights loaded successfully.")
except Exception as e:
    print("Error loading weights:", e)


loss, accuracy = model.evaluate(X_test_pad, y_test, verbose=1)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

predictions = model.predict(X_test_pad)
predictions = (predictions > 0.5).astype(int).flatten()

print("Sample Predictions:", predictions[:10])


  saveable.load_own_variables(weights_store.get(inner_path))


Model weights loaded successfully.
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 67ms/step - accuracy: 0.8771 - loss: 0.3444
Test Accuracy: 87.83%
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 66ms/step
Sample Predictions: [0 1 0 1 0 1 1 0 0 0]


In [25]:
# Sample reviews for testing (31 examples)
sample_reviews = [
    "The movie was fantastic! I loved it.",
    "It was a waste of time. Terrible story.",
    "An average film with decent acting but poor direction.",
    "Absolutely brilliant! A must-watch for everyone.",
    "The plot was dull and predictable. Not worth it.",
    "An exhilarating experience! The visuals were stunning.",
    "I fell asleep halfway through. It was boring.",
    "One of the best movies I've ever seen.",
    "The characters lacked depth, and the story was weak.",
    "Loved the chemistry between the leads! So heartwarming.",
    "Terrible acting and horrible script. Would not recommend.",
    "A decent movie to watch with friends.",
    "The ending was unexpected and gave me chills!",
    "I regret wasting my money on this disaster.",
    "It was funny and entertaining throughout.",
    "The special effects were impressive, but the story fell flat.",
    "A masterpiece! Truly a cinematic achievement.",
    "The dialogues were cheesy, but it was fun overall.",
    "An emotional rollercoaster! I couldn't hold back my tears.",
    "Too many plot holes. It felt rushed and incomplete.",
    "The soundtrack was amazing and fit the scenes perfectly.",
    "Mediocre at best. Nothing new or interesting.",
    "A brilliant performance by the lead actor!",
    "A confusing mess with no clear direction.",
    "The comedy was spot on! Had me laughing the entire time.",
    "The pacing was off, and it felt too long.",
    "One of the most inspiring movies I've watched.",
    "Unrealistic and overhyped. Disappointed.",
    "A thought-provoking film that stays with you.",
    "The cinematography was breathtaking, but the plot was lacking.",
    "One of the other reviewers has mentioned that after watching just 1 Oz episode you'll be hooked. They are right, as this is exactly what happened with me. \n\nThe first thing that struck me about Oz was its brutality and unflinching scenes of violence, which set in right from the word GO. Trust me, this is not a show for the faint hearted or timid. This show pulls no punches with regards to drugs, sex or violence. Its is hardcore, in the classic use of the word.\n\nIt is called OZ as that is the nickname given to the Oswald Maximum Security State Penitentiary. It focuses mainly on Emerald City, an experimental section of the prison where all the cells have glass fronts and face inwards, so privacy is not high on the agenda. Em City is home to many...Aryans, Muslims, gangstas, Latinos, Christians, Italians, Irish and more...so scuffles, death stares, dodgy dealings and shady agreements are never far away.\n\nI would say the main appeal of the show is due to the fact that it goes where other shows wouldn't dare. Forget pretty pictures painted for mainstream audiences, forget charm, forget romance...OZ doesn't mess around. The first episode I ever saw struck me as so nasty it was surreal, I couldn't say I was ready for it, but as I watched more, I developed a taste for Oz, and got accustomed to the high levels of graphic violence. Not just violence, but injustice (crooked guards who'll be sold out for a nickel, inmates who'll kill on order and get away with it, well-mannered, middle-class inmates being turned into prison bitches due to their lack of street skills or prison experience). Watching Oz, you may become comfortable with what is uncomfortable viewing...that's if you can get in touch with your darker side."
]

    #Convert the sample reviews to sequences
sample_sequences = tokenizer.texts_to_sequences(sample_reviews)
    #Pad the sequences and make predictions
sample_padded = pad_sequences(sample_sequences, maxlen=100)
sample_predictions = model.predict(sample_padded, verbose=1)

print("Sample Review Predictions:")
for review, prediction in zip(sample_reviews, sample_predictions):
    sentiment = "Positive" if prediction > 0.5 else "Negative"
    print(f"Review: {review}")
    print(f"Predicted Sentiment: {sentiment}")
    print(f"Prediction Score: {prediction[0]:.2f}")
    print("-" * 50)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
Sample Review Predictions:
Review: The movie was fantastic! I loved it.
Predicted Sentiment: Positive
Prediction Score: 0.92
--------------------------------------------------
Review: It was a waste of time. Terrible story.
Predicted Sentiment: Negative
Prediction Score: 0.12
--------------------------------------------------
Review: An average film with decent acting but poor direction.
Predicted Sentiment: Negative
Prediction Score: 0.32
--------------------------------------------------
Review: Absolutely brilliant! A must-watch for everyone.
Predicted Sentiment: Positive
Prediction Score: 0.88
--------------------------------------------------
Review: The plot was dull and predictable. Not worth it.
Predicted Sentiment: Negative
Prediction Score: 0.29
--------------------------------------------------
Review: An exhilarating experience! The visuals were stunning.
Predicted Sentiment: Positive
Prediction Score: 