In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [2]:
# Load the IMDB dataset
max_features = 10000  # Consider only the top 10,000 most common words in the dataset
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [3]:
# Preprocess the data
max_len = 500  # Maximum length of each review
x_train = pad_sequences(x_train, maxlen=max_len)
x_test = pad_sequences(x_test, maxlen=max_len)

In [4]:
# Define the model architecture
model = keras.Sequential()
model.add(keras.layers.Embedding(max_features, 32, input_length=max_len))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Conv1D(64, 5, activation='relu'))
model.add(keras.layers.GlobalMaxPooling1D())
model.add(keras.layers.Dense(1, activation='sigmoid'))

In [5]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [6]:
# Train the model
model.fit(x_train, y_train, epochs=5, batch_size=64, validation_data=(x_test, y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f0cd6f1dae0>

In [7]:
# Evaluate the model
loss, accuracy = model.evaluate(x_test, y_test)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

Test Loss: 0.317460298538208
Test Accuracy: 0.8763999938964844


In [8]:
# Making Predictions for testing data
np.set_printoptions(suppress=True)
result = model.predict(x_test)



In [9]:
result

array([[0.00477739],
       [0.9829747 ],
       [0.8253482 ],
       ...,
       [0.05884642],
       [0.28146031],
       [0.9926024 ]], dtype=float32)

In [10]:
y_pred = np.zeros(len(result))
for i, score in enumerate(result):
    y_pred[i] = np.round(score)

In [11]:
print(y_pred[10])

1.0


In [12]:
print(y_test[10])

1
