In [None]:
!pip install tensorflow



In [None]:
import numpy as np
import pandas as pd
# Import the Tokenizer from tensorflow.keras.preprocessing.text
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, MaxPooling1D, Dense, Flatten, Dropout
from tensorflow.keras.layers import GlobalMaxPooling1D
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.utils import to_categorical

In [None]:
# 2. Load the dataset (IMDB dataset from Keras)
from keras.datasets import imdb

# Parameters
vocab_size = 10000  # Limit the vocabulary to top 10,000 words
max_sequence_length = 300  # Limit the length of input sequences
embedding_dim = 100  # Embedding vector size

# Load the IMDB dataset
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=vocab_size)

# Convert labels to categorical (for binary classification)
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

# Pad sequences to ensure uniform input size
X_train = pad_sequences(X_train, maxlen=max_sequence_length)
X_test = pad_sequences(X_test, maxlen=max_sequence_length)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
# 3. Build the CNN Model
model = Sequential()

# Add Embedding layer
model.add(Embedding(input_dim=vocab_size,
                    output_dim=embedding_dim,
                    input_length=max_sequence_length))

# Add Conv1D layer
model.add(Conv1D(filters=128, kernel_size=5, activation='relu'))
model.add(MaxPooling1D(pool_size=2))

# Add another Conv1D layer
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(MaxPooling1D(pool_size=2))

# Add Flatten layer
model.add(Flatten())

# Add fully connected Dense layer
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))

# Output layer (binary classification)
model.add(Dense(2, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])




In [None]:
# 4. Train the model
history = model.fit(X_train, y_train, epochs=5, batch_size=64, validation_split=0.2)

Epoch 1/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 260ms/step - accuracy: 0.6227 - loss: 0.5925 - val_accuracy: 0.8844 - val_loss: 0.2803
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 267ms/step - accuracy: 0.9273 - loss: 0.1978 - val_accuracy: 0.8962 - val_loss: 0.2582
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 269ms/step - accuracy: 0.9672 - loss: 0.0965 - val_accuracy: 0.8952 - val_loss: 0.3113
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 270ms/step - accuracy: 0.9837 - loss: 0.0489 - val_accuracy: 0.8816 - val_loss: 0.4538
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m148s[0m 289ms/step - accuracy: 0.9940 - loss: 0.0229 - val_accuracy: 0.8858 - val_loss: 0.5889


In [None]:
# 5. Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy * 100:.2f}%')


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 40ms/step - accuracy: 0.8710 - loss: 0.6456
Test Accuracy: 87.03%


In [None]:
# 6. Predictions
y_pred = np.argmax(model.predict(X_test), axis=1)
y_true = np.argmax(y_test, axis=1)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 37ms/step


In [None]:
# 7. Classification Report
print("Classification Report:")
print(classification_report(y_true, y_pred))

Classification Report:
              precision    recall  f1-score   support

           0       0.85      0.90      0.87     12500
           1       0.90      0.84      0.87     12500

    accuracy                           0.87     25000
   macro avg       0.87      0.87      0.87     25000
weighted avg       0.87      0.87      0.87     25000



In [None]:
# 8. Accuracy Summary
print(f"Final Training Accuracy: {history.history['accuracy'][-1] * 100:.2f}%")
print(f"Final Validation Accuracy: {history.history['val_accuracy'][-1] * 100:.2f}%")

Final Training Accuracy: 99.30%
Final Validation Accuracy: 88.58%
