In [2]:
# Import necessary libraries
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, Flatten
from tensorflow.keras.callbacks import CSVLogger
import matplotlib.pyplot as plt


In [3]:
# Load IMDb dataset (Only top 10,000 most common words will be kept)
vocab_size = 10000
maxlen = 200


In [5]:
# Load IMDb dataset (Only top 10,000 most common words will be kept)
vocab_size = 10000  # Vocabulary size
maxlen = 200        # Maximum length of reviews

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)

# Pad sequences to ensure uniform input length
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)

In [7]:
# Initialize the model
model = Sequential()

# Add an embedding layer
model.add(Embedding(input_dim=vocab_size, output_dim=32, input_length=maxlen))

# Flatten the output of the embedding layer
model.add(Flatten())

# Add a fully connected layer with ReLU activation
model.add(Dense(64, activation = 'relu'))

# Output layer with a single neuron (sigmoid for binary classification)
model.add(Dense(1, activation = 'sigmoid'))
# Compile the model
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [8]:
# Initialize CSVLogger to save training logs
csv_logger = CSVLogger('training_log.csv', append = True)

# Train the model and save training logs
history = model.fit(x_train, y_train, epochs = 10, batch_size = 64, validation_split = 0.2, callbacks =[csv_logger])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [9]:
# Evaluate the model on test data
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test Loss: {test_loss:.4f}")


Test Accuracy: 0.8628
Test Loss: 0.5750


In [10]:
#interpret the values using non-visual techniques
import numpy as np

# Calculate summary statistics for accuracy and loss
train_accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']
train_loss = history.history['loss']
val_loss = history.history['val_loss']

def summarize_metrics(metric_values, metric_name):
    print(f"\n--- {metric_name} ---")
    print(f"Max {metric_name}: {np.max(metric_values):.4f}")
    print(f"Min {metric_name}: {np.min(metric_values):.4f}")
    print(f"Average {metric_name}: {np.mean(metric_values):.4f}")
    print(f"Final Epoch {metric_name}: {metric_values[-1]:.4f}")

# Summarize accuracy and loss metrics
summarize_metrics(train_accuracy, "Training Accuracy")
summarize_metrics(val_accuracy, "Validation Accuracy")
summarize_metrics(train_loss, "Training Loss")
summarize_metrics(val_loss, "Validation Loss")



--- Training Accuracy ---
Max Training Accuracy: 1.0000
Min Training Accuracy: 0.7665
Average Training Accuracy: 0.9717
Final Epoch Training Accuracy: 1.0000

--- Validation Accuracy ---
Max Validation Accuracy: 0.8698
Min Validation Accuracy: 0.8572
Average Validation Accuracy: 0.8665
Final Epoch Validation Accuracy: 0.8686

--- Training Loss ---
Max Training Loss: 0.4501
Min Training Loss: 0.0001
Average Training Loss: 0.0604
Final Epoch Training Loss: 0.0001

--- Validation Loss ---
Max Validation Loss: 0.5591
Min Validation Loss: 0.3110
Average Validation Loss: 0.4680
Final Epoch Validation Loss: 0.5591
