In [4]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

# Load dataset (IMDB reviews dataset)
max_words = 10000  # Top 10,000 words in the vocabulary
maxlen = 100       # Maximum length of each review
batch_size = 64

# Load and prepare the dataset
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=max_words)

# Pad the sequences to ensure equal length input
train_data = pad_sequences(train_data, maxlen=maxlen)
test_data = pad_sequences(test_data, maxlen=maxlen)

# Convert labels to categorical (binary classification: positive/negative)
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)


In [2]:
from tensorflow.keras import layers, models

# Build the CNN model for text classification
model_cnn = models.Sequential()
model_cnn.add(layers.Embedding(max_words, 128, input_length=maxlen))
model_cnn.add(layers.Conv1D(128, 5, activation='relu'))  # 1D Convolutional layer
model_cnn.add(layers.MaxPooling1D(pool_size=4))  # Max-pooling layer
model_cnn.add(layers.Conv1D(128, 5, activation='relu'))
model_cnn.add(layers.GlobalMaxPooling1D())  # Global max pooling
model_cnn.add(layers.Dense(128, activation='relu'))
model_cnn.add(layers.Dense(2, activation='softmax'))  # Binary classification


In [3]:

# Compile the model
model_cnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the CNN model
cnn_history = model_cnn.fit(train_data, train_labels, epochs=5, batch_size=batch_size, validation_data=(test_data, test_labels))


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [4]:
# Evaluate the model on the test set
test_loss_cnn, test_accuracy_cnn = model_cnn.evaluate(test_data, test_labels)
print(f"Test Accuracy (CNN): {test_accuracy_cnn * 100:.2f}%")


Test Accuracy (CNN): 83.36%


In [26]:
print(train_data.shape)

(25000, 100)


In [27]:
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.layers import Dropout, BatchNormalization

# Build the RNN model for text classification with improvements
model_rnn = models.Sequential()
model_rnn.add(layers.Embedding(input_dim=max_words, output_dim=256, input_length=100))  # Increased embedding dimension
model_rnn.add(layers.LSTM(256, return_sequences=True, activation='tanh'))  # Increased LSTM units, tanh activation
model_rnn.add(Dropout(0.3))  # Add Dropout to prevent overfitting
model_rnn.add(BatchNormalization())  # Batch normalization
model_rnn.add(layers.LSTM(128))  # Another LSTM layer
model_rnn.add(Dropout(0.3))  # Add another Dropout
model_rnn.add(BatchNormalization())  # Batch normalization

# Dense layers
model_rnn.add(layers.Dense(128, activation='relu'))
model_rnn.add(Dropout(0.4))  # Dropout layer after dense
model_rnn.add(layers.Dense(2, activation='softmax'))  # Binary classification

In [28]:
# Compile the model
model_rnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [30]:
# Train the RNN model
rnn_history = model_rnn.fit(train_data, train_labels, epochs=5, batch_size=batch_size, validation_data=(test_data, test_labels))

# Evaluate the model on the test set
test_loss_rnn, test_accuracy_rnn = model_rnn.evaluate(test_data, test_labels)
print(f"Test Accuracy (RNN): {test_accuracy_rnn * 100:.2f}%")

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Accuracy (RNN): 82.86%


In [5]:
# from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score

# # Assuming you have trained both CNN and RNN models and have predictions and true labels
# # For example, y_true is the true labels, cnn_preds and rnn_preds are predictions from both models.

# def print_performance_metrics(y_true, y_pred, model_name):
#     # Accuracy
#     accuracy = accuracy_score(y_true, y_pred)
#     # Precision
#     precision = precision_score(y_true, y_pred, average='binary')
#     # Recall
#     recall = recall_score(y_true, y_pred, average='binary')
#     # F1 Score
#     f1 = f1_score(y_true, y_pred, average='binary')
#     # Confusion Matrix
#     conf_matrix = confusion_matrix(y_true, y_pred)
#     # ROC-AUC Score (for binary classification)
#     roc_auc = roc_auc_score(y_true, y_pred)

#     # Printing the metrics
#     print(f"{model_name} Performance Metrics:")
#     print(f"Accuracy: {accuracy:.4f}")
#     print(f"Precision: {precision:.4f}")
#     print(f"Recall: {recall:.4f}")
#     print(f"F1 Score: {f1:.4f}")
#     print(f"Confusion Matrix:\n{conf_matrix}")
#     print(f"ROC-AUC Score: {roc_auc:.4f}")
#     print("----------------------------------------------------")


# cnn_preds = model_cnn.predict(test_data)  # Get predictions from CNN model
# rnn_preds = model_rnn.predict(test_data)  # Get predictions from RNN model

# # Assuming y_test contains the true labels
# print_performance_metrics(test_labels, test_data, "CNN Model")
# print_performance_metrics(test_labels, test_data, "RNN Model")
