# IMDB Sentiment Classification using LSTM

In [None]:
#IMDB DATASET
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn import metrics
from mlxtend.plotting import plot_confusion_matrix
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")


In [None]:
df = pd.read_csv('imdb_top_1000.csv')
df.head()
df = df.dropna(subset=['Overview', 'IMDB_Rating'])
df['Label'] = (df['IMDB_Rating'] >= 8).astype(int)
texts = df['Overview'].astype(str)
labels = df['Label'].values
df['Label'].value_counts()


In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [None]:
vocab_size = 10000
max_len = 200
tokenizer = Tokenizer(num_words=vocab_size, oov_token='<OOV>')
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
padded = pad_sequences(sequences, maxlen=max_len)


In [None]:
x_train, x_test, y_train, y_test = train_test_split(padded, labels, test_size=0.2, random_state=42)
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, 128, input_length=max_len),
    tf.keras.layers.LSTM(128, return_sequences=False),   
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(64, activation = 'relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(32, activation = 'relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')  
])


In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(x_train, y_train, batch_size=32, epochs=5, validation_data=(x_test, y_test))


In [None]:
loss, accuracy = model.evaluate(x_test, y_test)
print("Test Accuracy:", accuracy)
y_pred = model.predict(x_test).flatten()
y_pred = (y_pred > 0.5).astype(int)
print(metrics.classification_report(y_test, y_pred))

In [None]:
cm = metrics.confusion_matrix(y_test, y_pred, labels=[0, 1])
plot_confusion_matrix(cm, class_names=['Negative', 'Positive'])
plt.title("Confusion Matrix")
plt.show()
