In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras.datasets import mnist
from sklearn.metrics import accuracy_score


In [2]:

# Load MNIST dataset
(X_train_full, y_train_full), (X_test, y_test) = mnist.load_data()
X_train_full, X_test = X_train_full / 255.0, X_test / 255.0

# Define the baseline model
def build_model(num_hidden_units=128, include_last_layer=True):
    model = Sequential([Flatten(input_shape=(28, 28))])
    model.add(Dense(num_hidden_units, activation='relu'))
    if include_last_layer:
        model.add(Dense(10, activation='softmax'))
    return model

# Train and evaluate the baseline model
baseline_model = build_model()
baseline_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
baseline_model.fit(X_train_full, y_train_full, epochs=3, batch_size=32, verbose=0)
baseline_accuracy = baseline_model.evaluate(X_test, y_test, verbose=0)[1]

# Ablation 1: Remove the hidden layer
model_no_hidden = build_model(num_hidden_units=0, include_last_layer=True)
model_no_hidden.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model_no_hidden.fit(X_train_full, y_train_full, epochs=3, batch_size=32, verbose=0)
accuracy_no_hidden = model_no_hidden.evaluate(X_test, y_test, verbose=0)[1]

# Ablation 2: Remove the output layer
model_no_output = build_model(include_last_layer=False)
model_no_output.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
y_test_no_output = np.argmax(model_no_output.predict(X_test), axis=1)
accuracy_no_output = accuracy_score(y_test, y_test_no_output)

# Compare results
print("Baseline Accuracy:", baseline_accuracy)
print("Accuracy without Hidden Layer:", accuracy_no_hidden)
print("Accuracy without Output Layer:", accuracy_no_output)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


  super().__init__(**kwargs)


[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Baseline Accuracy: 0.9735999703407288
Accuracy without Hidden Layer: 0.11349999904632568
Accuracy without Output Layer: 0.0074
