In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import os

tf.random.set_seed(42)
np.random.seed(42)

# Load Fashion-MNIST
(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()
x_train = x_train.reshape((-1, 784)).astype("float32") / 255.0
x_test = x_test.reshape((-1, 784)).astype("float32") / 255.0

# Train/val split
val_size = int(0.1 * len(x_train))
x_val = x_train[:val_size]
y_val = y_train[:val_size]
x_train2 = x_train[val_size:]
y_train2 = y_train[val_size:]

num_classes = 10
y_train_cat = keras.utils.to_categorical(y_train2, num_classes)
y_val_cat   = keras.utils.to_categorical(y_val, num_classes)
y_test_cat  = keras.utils.to_categorical(y_test, num_classes)

# Build MLP
def build_mlp(depth, width):
    model = keras.Sequential()
    model.add(layers.Input(shape=(784,)))
    for _ in range(depth):
        model.add(layers.Dense(width, activation="relu"))
    model.add(layers.Dense(num_classes, activation="softmax"))
    return model

# Train model
def run(depth, width):
    model = build_mlp(depth, width)
    model.compile(
        optimizer=keras.optimizers.Adam(1e-3),
        loss="categorical_crossentropy",
        metrics=["accuracy"]
    )
    history = model.fit(
        x_train2, y_train_cat,
        validation_data=(x_val, y_val_cat),
        epochs=20,
        batch_size=128,
        verbose=0
    )
    test_loss, test_acc = model.evaluate(x_test, y_test_cat, verbose=0)
    return history.history, test_acc, model.count_params()

# Experiment grid
depths = [1, 2, 3, 4]
widths = [32, 128, 512]
results = []

os.makedirs("results", exist_ok=True)

# Run all experiments
for d in depths:
    for w in widths:
        print(f"Running depth={d}, width={w}")
        hist, acc, params = run(d, w)
        results.append({
            "depth": d,
            "width": w,
            "test_accuracy": float(acc),
            "params": params,
            "history": hist
        })

# Save raw results
with open("results/mlp_results.json", "w") as f:
    json.dump(results, f)

# ---- PLOTTING ----

# Plot train/val curves for EVERY model
def plot_curves():
    for r in results:
        title = f"d{r['depth']}_w{r['width']}"
        h = r["history"]

        plt.figure(figsize=(10,4))
        plt.subplot(1,2,1)
        plt.plot(h["loss"], label="train")
        plt.plot(h["val_loss"], label="val")
        plt.title(title + " Loss")
        plt.legend()

        plt.subplot(1,2,2)
        plt.plot(h["accuracy"], label="train")
        plt.plot(h["val_accuracy"], label="val")
        plt.title(title + " Accuracy")
        plt.legend()

        plt.tight_layout()
        plt.savefig(f"results/{title}_curves.png")
        plt.close()

plot_curves()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Running depth=1, width=32
Running depth=1, width=128
Running depth=1, width=512
Running depth=2, width=32
Running depth=2, width=128
Running depth=2, width=512
Running depth=3, width=32
Running depth=3, width=128
Running depth=3, width=512
Ru