Binary classification using Deep Neural Networks Example: Classify movie reviews into
positive reviews and negative reviews, just based on the text content of the reviews.
Use IMDB dataset.

In [3]:
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, Dropout, GlobalAveragePooling1D
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load dataset
df = pd.read_csv("IMDB Dataset.csv")

# Preprocess data
texts = df['review']
labels = LabelEncoder().fit_transform(df['sentiment'])

# Tokenize text
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(texts)
X = tokenizer.texts_to_sequences(texts)
X = pad_sequences(X, maxlen=200)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)

# Build model
model = Sequential([
    Embedding(input_dim=10000, output_dim=64, input_length=200),
    GlobalAveragePooling1D(),  # Pool the sequence to a fixed-size vector
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Binary classification output
])

# Compile and train
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=5, batch_size=64)

# Evaluate
accuracy = model.evaluate(X_test, y_test)
print(f"Accuracy: {accuracy[1]:.4f}")

# Step 9: Predictions
predictions = (model.predict(X_test[:5]) > 0.5).astype(int)

# Step 10: Print predicted and actual sentiments
for i in range(5):
    print(f"Predicted: {'Positive' if predictions[i] == 1 else 'Negative'} | Actual: {'Positive' if y_test[i] == 1 else 'Negative'}")


Epoch 1/5




[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 12ms/step - accuracy: 0.7008 - loss: 0.5638
Epoch 2/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 12ms/step - accuracy: 0.8898 - loss: 0.2755
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 12ms/step - accuracy: 0.9140 - loss: 0.2293
Epoch 4/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 12ms/step - accuracy: 0.9275 - loss: 0.2027
Epoch 5/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 12ms/step - accuracy: 0.9310 - loss: 0.1888
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8846 - loss: 0.2831
Accuracy: 0.8851
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 171ms/step
Predicted: Negative | Actual: Positive
Predicted: Positive | Actual: Positive
Predicted: Negative | Actual: Negative
Predicted: Positive | Actual: Positive
Predicted: Negative | Actual: Negative
