In [46]:
import tensorflow as tf
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix

# Step 1: Download the IMDB dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words = 10000)

# Step 2: Preprocess the data
def vectorize_sequences(sequences, dimension = 10000):
    results = np.zeros((len(sequences), dimension), dtype = np.float32)
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1.
        
    return results

x_train = vectorize_sequences(x_train)
x_test = vectorize_sequences(x_test)

y_train = np.reshape(np.asarray(y_train, dtype = np.float32), (len(x_train), 1))
y_test = np.reshape(np.asarray(y_test, dtype = np.float32), (len(x_test), 1))

# Step 3: Define the model architecture
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(input_shape = (10000,), units = 32, activation = 'relu'))
model.add(tf.keras.layers.Dense(units = 32, activation = 'relu'))
model.add(tf.keras.layers.Dense(units = 1, activation='sigmoid'))

# Step 4: Compile the model
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

# Step 5: Train the model
model.fit(x_train, y_train, epochs = 10, batch_size = 16)

# Step 6: Evaluate the model
loss, accuracy = model.evaluate(x_test, y_test)
print('Loss:', loss)
print('Accuracy:', accuracy)

# Step 7: Generate predictions for the test data
predictions = model.predict(x_test)

# Step 8: Convert the predictions to a binary class
predictions = (predictions > 0.5).astype(int)

# Step 9: Print the classification report
print(classification_report(y_test, predictions))

# Step 10: Print the confusion matrix
print(confusion_matrix(y_test, predictions))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 1.2013814449310303
Accuracy: 0.8602799773216248
              precision    recall  f1-score   support

         0.0       0.85      0.88      0.86     12500
         1.0       0.88      0.84      0.86     12500

    accuracy                           0.86     25000
   macro avg       0.86      0.86      0.86     25000
weighted avg       0.86      0.86      0.86     25000

[[11004  1496]
 [ 1997 10503]]
