# Deep MLP Lab Notebook
**Scope:** MNIST LR-finder + MLP, Fashion-MNIST 100-layer activation comparison, CIFAR-10 20-layer DNN (ELU + He) with and without BatchNorm.  


In [1]:

import sys
IN_COLAB = 'google.colab' in sys.modules
if IN_COLAB:
    print('Running in Colab: installing packages...')
    !pip install -q tensorflow fpdf==1.7.2
else:
    print('Not in Colab: make sure tensorflow and fpdf are installed in your environment.')

Running in Colab: installing packages...
  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for fpdf (setup.py) ... [?25l[?25hdone


In [18]:
!pip install fpdf

from fpdf import FPDF
import os

class LabReportPDF(FPDF):
    def header(self):
        self.set_font("Arial", "B", 14)
        self.cell(0, 10, "Deep Learning Lab Report", ln=True, align="C")
        self.ln(3)

def save_full_report(sections, output_path="/content/full_lab_report.pdf"):
    """
    sections: dict where
        key   = section title (string)
        value = section text (string)
    """
    pdf = LabReportPDF()
    pdf.add_page()
    pdf.set_auto_page_break(auto=True, margin=10)

    for title, text in sections.items():
        # Section title
        pdf.set_font("Arial", "B", 12)
        pdf.ln(5)
        pdf.cell(0, 8, title, ln=True)
        pdf.ln(2)

        # Section content
        pdf.set_font("Arial", size=11)
        pdf.multi_cell(0, 6, text)
        pdf.ln(3)

    pdf.output(output_path)
    print("Saved:", output_path)




In [2]:
# Imports and output folder
import os, sys, traceback
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, callbacks, initializers, optimizers
from fpdf import FPDF
print('TensorFlow version:', tf.__version__)

OUT = Path('/mnt/data/deep_mlp_lab_output') if 'google.colab' not in sys.modules else Path('/content/deep_mlp_lab_output')
OUT.mkdir(parents=True, exist_ok=True)
print('Output folder:', OUT)

TensorFlow version: 2.19.0
Output folder: /content/deep_mlp_lab_output


In [3]:
# Utilities: save model summary, quick plotting helpers
def save_model_summary(model, filepath):
    with open(filepath, 'w', encoding='utf-8') as fh:
        model.summary(print_fn=lambda s: fh.write(s + '\n'))

def save_plot(fig, path):
    fig.savefig(path, bbox_inches='tight')
    plt.close(fig)

## MNIST: LR range test and MLP training

This section performs a learning-rate range test (LR finder) and a short training run of an MLP. The LR finder increases the learning rate exponentially each batch and records loss; plot loss vs LR (log-scale) to choose a good LR.

In [4]:
# Load MNIST and prepare data
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
x_train_flat = x_train.reshape((-1, 28*28))
x_test_flat = x_test.reshape((-1, 28*28))

from tensorflow.keras import backend as K
class LRFinder(tf.keras.callbacks.Callback):
    def __init__(self, min_lr=1e-6, max_lr=1, steps=100):
        super().__init__()
        self.min_lr = min_lr
        self.max_lr = max_lr
        self.steps = steps
        self.batch_count = 0
        self.lrs = []      # must be a list!
        self.losses = []   # must be a list!

    def on_train_batch_end(self, batch, logs=None):
        self.batch_count += 1

        # Progress of LR scan
        progress = self.batch_count / self.steps

        # Compute new LR
        lr = self.min_lr * (self.max_lr / self.min_lr) ** progress

        # Update optimizer LR (TF 2.15+ uses learning_rate)
        self.model.optimizer.learning_rate.assign(lr)

        # Record
        self.lrs.append(lr)
        self.losses.append(logs["loss"])

        # Stop when finished scanning
        if self.batch_count >= self.steps:
            self.model.stop_training = True


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [5]:
# Build a simple MLP and run LR finder (short)
def build_mnist_mlp(hidden_units=[512,256,128], dropout=0.2):
    inp = keras.Input(shape=(28*28,))
    x = inp
    for h in hidden_units:
        x = layers.Dense(h, activation='relu')(x)
        x = layers.Dropout(dropout)(x)
    out = layers.Dense(10, activation='softmax')(x)
    return keras.Model(inp, out)

model = build_mnist_mlp()
save_model_summary(model, OUT / 'mnist_model_summary.txt')

steps = 600  # reduce for faster runs in Colab free tier if needed
lr_finder = LRFinder(min_lr=1e-6, max_lr=1, steps=steps)
opt = optimizers.SGD(learning_rate=1e-6)
model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

batch_size = 128
train_dataset = tf.data.Dataset.from_tensor_slices((x_train_flat, y_train)).shuffle(5000).batch(batch_size).repeat().take(steps)
history = model.fit(train_dataset, epochs=1, steps_per_epoch=steps, callbacks=[lr_finder], verbose=1)

# Plot LR vs loss
fig, ax = plt.subplots(figsize=(6,4))
ax.plot(lr_finder.lrs, lr_finder.losses)
ax.set_xscale('log')
ax.set_xlabel('Learning Rate (log scale)')
ax.set_ylabel('Loss')
ax.set_title('LR range test (MNIST)')
save_plot(fig, OUT / 'mnist_lr_finder.png')
print('Saved:', OUT / 'mnist_lr_finder.png')

[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 15ms/step - accuracy: 0.1533 - loss: 2.3021
Saved: /content/deep_mlp_lab_output/mnist_lr_finder.png


In [6]:
# Train MLP on MNIST with chosen LR (example 1e-3) for a few epochs
model = build_mnist_mlp()
opt = optimizers.Adam(learning_rate=1e-3)
model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
save_model_summary(model, OUT / 'mnist_model_summary_after_lr.txt')

hist = model.fit(x_train_flat, y_train, validation_split=0.1, epochs=12, batch_size=128, verbose=2)
fig, ax = plt.subplots()
ax.plot(hist.history['loss'], label='train loss')
ax.plot(hist.history['val_loss'], label='val loss')
ax.plot(hist.history['accuracy'], label='train acc')
ax.plot(hist.history['val_accuracy'], label='val acc')
ax.legend()
ax.set_title('MNIST training curves')
save_plot(fig, OUT / 'mnist_training_curves.png')

test_loss, test_acc = model.evaluate(x_test_flat, y_test, verbose=0)
print('MNIST test_acc:', test_acc)


Epoch 1/12
422/422 - 14s - 34ms/step - accuracy: 0.9036 - loss: 0.3165 - val_accuracy: 0.9693 - val_loss: 0.1028
Epoch 2/12
422/422 - 15s - 36ms/step - accuracy: 0.9617 - loss: 0.1286 - val_accuracy: 0.9718 - val_loss: 0.0954
Epoch 3/12
422/422 - 7s - 17ms/step - accuracy: 0.9715 - loss: 0.0922 - val_accuracy: 0.9797 - val_loss: 0.0677
Epoch 4/12
422/422 - 8s - 18ms/step - accuracy: 0.9778 - loss: 0.0710 - val_accuracy: 0.9798 - val_loss: 0.0668
Epoch 5/12
422/422 - 9s - 22ms/step - accuracy: 0.9806 - loss: 0.0610 - val_accuracy: 0.9807 - val_loss: 0.0645
Epoch 6/12
422/422 - 8s - 18ms/step - accuracy: 0.9842 - loss: 0.0499 - val_accuracy: 0.9830 - val_loss: 0.0645
Epoch 7/12
422/422 - 7s - 17ms/step - accuracy: 0.9873 - loss: 0.0417 - val_accuracy: 0.9797 - val_loss: 0.0712
Epoch 8/12
422/422 - 7s - 17ms/step - accuracy: 0.9862 - loss: 0.0424 - val_accuracy: 0.9823 - val_loss: 0.0648
Epoch 9/12
422/422 - 8s - 18ms/step - accuracy: 0.9882 - loss: 0.0361 - val_accuracy: 0.9845 - val_los

## Fashion-MNIST: 100-layer MLP with different activations

We train short runs (3 epochs) of a 100-layer MLP using `sigmoid`, `relu`, `elu`, and `selu`. Expect vanishing gradients for `sigmoid`; SELU may help if using `lecun_normal` and appropriate input scaling.

In [7]:
# Load Fashion-MNIST and build 100-layer MLP builder
(x_train_f, y_train_f), (x_test_f, y_test_f) = keras.datasets.fashion_mnist.load_data()
x_train_f = x_train_f.reshape((-1, 28*28)).astype('float32')/255.0
x_test_f = x_test_f.reshape((-1, 28*28)).astype('float32')/255.0

def build_deep_mlp_100(activation='relu', units=64, use_selu=False):
    inp = keras.Input(shape=(28*28,))
    x = inp
    for i in range(100):
        if use_selu:
            x = layers.Dense(units, activation='selu', kernel_initializer='lecun_normal')(x)
        else:
            x = layers.Dense(units, activation=activation, kernel_initializer='he_normal')(x)
    out = layers.Dense(10, activation='softmax')(x)
    return keras.Model(inp, out)

print('Fashion-MNIST shapes:', x_train_f.shape, y_train_f.shape)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Fashion-MNIST shapes: (60000, 784) (60000,)


In [12]:
# Run short experiments for different activations
activations = ['sigmoid','relu','elu','selu']
results = {}
for act in activations:
    print('\n--- Activation:', act)
    use_selu = (act == 'selu')
    model_f = build_deep_mlp_100(activation=act if not use_selu else None, units=32, use_selu=use_selu)
    model_f.compile(optimizer=optimizers.SGD(learning_rate=1e-2), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    save_model_summary(model_f, OUT / f'fmnist_summary_{act}.txt')
    try:
        h = model_f.fit(x_train_f, y_train_f, validation_split=0.1, epochs=3, batch_size=256, verbose=2)
        results[act] = h.history
        fig, ax = plt.subplots()
        ax.plot(h.history['loss'], label='train loss')
        ax.plot(h.history['val_loss'], label='val loss')
        ax.set_title(f'Fashion-MNIST 100-layer ({act})')
        ax.legend()
        save_plot(fig, OUT / f'fmnist_{act}_loss.png')
        print('Saved plot for', act)
    except Exception as e:
        print('Run failed for', act, e)


--- Activation: sigmoid


Epoch 1/3
211/211 - 10s - 49ms/step - accuracy: 0.0991 - loss: 2.3383 - val_accuracy: 0.1050 - val_loss: 2.3037
Epoch 2/3
211/211 - 3s - 14ms/step - accuracy: 0.0978 - loss: 2.3032 - val_accuracy: 0.0942 - val_loss: 2.3028
Epoch 3/3
211/211 - 4s - 17ms/step - accuracy: 0.0984 - loss: 2.3027 - val_accuracy: 0.0942 - val_loss: 2.3027
Saved plot for sigmoid

--- Activation: relu


Epoch 1/3
211/211 - 10s - 45ms/step - accuracy: 0.1670 - loss: 2.2301 - val_accuracy: 0.1962 - val_loss: 2.0046
Epoch 2/3
211/211 - 3s - 12ms/step - accuracy: 0.2243 - loss: 1.9249 - val_accuracy: 0.1995 - val_loss: 2.1588
Epoch 3/3
211/211 - 4s - 19ms/step - accuracy: 0.2502 - loss: 1.7909 - val_accuracy: 0.2725 - val_loss: 1.5978
Saved plot for relu

--- Activation: elu


Epoch 1/3
211/211 - 12s - 56ms/step - accuracy: 0.0998 - loss: nan - val_accuracy: 0.1050 - val_loss: nan
Epoch 2/3
211/211 - 3s - 15ms/step - accuracy: 0.0994 - loss: nan - val_accuracy: 0.1050 - val_loss: nan
Epoch 3/3
211/211 - 3s - 15ms/step - accuracy: 0.0994 - loss: nan - val_accuracy: 0.1050 - val_loss: nan
Saved plot for elu

--- Activation: selu


Epoch 1/3
211/211 - 10s - 46ms/step - accuracy: 0.1051 - loss: 2.3170 - val_accuracy: 0.0942 - val_loss: 2.3071
Epoch 2/3
211/211 - 3s - 15ms/step - accuracy: 0.1001 - loss: 2.3063 - val_accuracy: 0.1008 - val_loss: 2.3042
Epoch 3/3
211/211 - 4s - 19ms/step - accuracy: 0.1028 - loss: 2.3050 - val_accuracy: 0.1032 - val_loss: 2.3053
Saved plot for selu


## CIFAR-10: 20-layer DNN with ELU + He init

We build a fully-connected DNN with 20 hidden layers of ELU units (He initialization). We'll train with Nadam and EarlyStopping, and compare performance with and without Batch Normalization.

In [13]:
# Load CIFAR-10(Abbreviation of Canadian Institute For Advanced Research; It s an image dataset )
(x_train_c, y_train_c), (x_test_c, y_test_c) = keras.datasets.cifar10.load_data()
x_train_c = x_train_c.astype('float32') / 255.0
x_test_c = x_test_c.astype('float32') / 255.0
y_train_c = y_train_c.flatten()
y_test_c = y_test_c.flatten()

def build_cifar_dnn(layers_count=20, units=100, use_batchnorm=False):
    inp = keras.Input(shape=(32,32,3))
    x = layers.Flatten()(inp)
    for i in range(layers_count):
        x = layers.Dense(units, activation='elu', kernel_initializer=initializers.he_normal())(x)
        if use_batchnorm:
            x = layers.BatchNormalization()(x)
    out = layers.Dense(10, activation='softmax')(x)
    return keras.Model(inp, out)

print('CIFAR shapes:', x_train_c.shape, y_train_c.shape)

CIFAR shapes: (50000, 32, 32, 3) (50000,)


In [14]:
# Train CIFAR DNN with Nadam, compare BN on/off
for use_bn in [False, True]:
    print('\n=== BatchNorm:', use_bn)
    model_c = build_cifar_dnn(layers_count=20, units=64, use_batchnorm=use_bn)
    save_model_summary(model_c, OUT / f'cifar_summary_{"bn" if use_bn else "nobn"}.txt')
    model_c.compile(optimizer=optimizers.Nadam(learning_rate=1e-3), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    es = callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
    try:
        h = model_c.fit(x_train_c, y_train_c, validation_split=0.1, epochs=8, batch_size=256, callbacks=[es], verbose=2)
        fig, ax = plt.subplots(figsize=(6,4))
        ax.plot(h.history['loss'], label='train loss')
        ax.plot(h.history['val_loss'], label='val loss')
        ax.plot(h.history['accuracy'], label='train acc')
        ax.plot(h.history['val_accuracy'], label='val acc')
        ax.legend()
        ax.set_title(f'CIFAR10 DNN (BN={use_bn})')
        save_plot(fig, OUT / f'cifar_dnn_bn_{use_bn}.png')
        test_loss, test_acc = model_c.evaluate(x_test_c, y_test_c, verbose=0)
        print('Test acc (BN={}):'.format(use_bn), test_acc)
    except Exception as e:
        print('CIFAR run failed (BN={}):'.format(use_bn), e)


=== BatchNorm: False


Epoch 1/8
176/176 - 16s - 90ms/step - accuracy: 0.2319 - loss: 2.1422 - val_accuracy: 0.2652 - val_loss: 2.0156
Epoch 2/8
176/176 - 3s - 20ms/step - accuracy: 0.3265 - loss: 1.8421 - val_accuracy: 0.3248 - val_loss: 1.8588
Epoch 3/8
176/176 - 5s - 27ms/step - accuracy: 0.3631 - loss: 1.7563 - val_accuracy: 0.3734 - val_loss: 1.7131
Epoch 4/8
176/176 - 3s - 19ms/step - accuracy: 0.3828 - loss: 1.6955 - val_accuracy: 0.4026 - val_loss: 1.6687
Epoch 5/8
176/176 - 3s - 19ms/step - accuracy: 0.4067 - loss: 1.6423 - val_accuracy: 0.3892 - val_loss: 1.6767
Epoch 6/8
176/176 - 4s - 25ms/step - accuracy: 0.4218 - loss: 1.6019 - val_accuracy: 0.4250 - val_loss: 1.5952
Epoch 7/8
176/176 - 3s - 19ms/step - accuracy: 0.4374 - loss: 1.5606 - val_accuracy: 0.4166 - val_loss: 1.6501
Epoch 8/8
176/176 - 3s - 19ms/step - accuracy: 0.4532 - loss: 1.5291 - val_accuracy: 0.4176 - val_loss: 1.6270
Test acc (BN=False): 0.436599999666214

=== BatchNorm: True


Epoch 1/8
176/176 - 29s - 164ms/step - accuracy: 0.3106 - loss: 1.9246 - val_accuracy: 0.3016 - val_loss: 2.0609
Epoch 2/8
176/176 - 5s - 27ms/step - accuracy: 0.4059 - loss: 1.6552 - val_accuracy: 0.3796 - val_loss: 1.7760
Epoch 3/8
176/176 - 6s - 33ms/step - accuracy: 0.4447 - loss: 1.5562 - val_accuracy: 0.3684 - val_loss: 1.8115
Epoch 4/8
176/176 - 5s - 30ms/step - accuracy: 0.4682 - loss: 1.4894 - val_accuracy: 0.4238 - val_loss: 1.6208
Epoch 5/8
176/176 - 5s - 28ms/step - accuracy: 0.4875 - loss: 1.4383 - val_accuracy: 0.3920 - val_loss: 1.7263
Epoch 6/8
176/176 - 6s - 33ms/step - accuracy: 0.5042 - loss: 1.3976 - val_accuracy: 0.4386 - val_loss: 1.6049
Epoch 7/8
176/176 - 13s - 72ms/step - accuracy: 0.5161 - loss: 1.3598 - val_accuracy: 0.4058 - val_loss: 1.6978
Epoch 8/8
176/176 - 5s - 28ms/step - accuracy: 0.5286 - loss: 1.3286 - val_accuracy: 0.4348 - val_loss: 1.5804
Test acc (BN=True): 0.4410000145435333


In [15]:
# Compile a short PDF report with the generated images and console snippets
pdf = FPDF(orientation='P', unit='mm', format='A4')
pdf.set_auto_page_break(auto=True, margin=10)
pdf.add_page()
pdf.set_font('Arial', size=12)
pdf.cell(0, 6, 'Lab report: Deep MLP experiments (29 Nov 2025 - 9 Dec 2025)', ln=True)
pdf.ln(2)
pdf.set_font('Arial', size=10)
pdf.multi_cell(0, 5, 'This PDF contains plots produced by the automated runs. Re-run longer experiments for final submission.')

def add_image_if_exists(p: Path, w=180):
    if p.exists():
        pdf.add_page()
        pdf.set_font('Arial', size=11)
        pdf.cell(0,6, p.name, ln=True)
        pdf.image(str(p), w=w)

images = ['mnist_lr_finder.png','mnist_training_curves.png'] + [f'fmnist_{a}_loss.png' for a in ['sigmoid','relu','elu','selu']] + [f'cifar_dnn_bn_{b}.png' for b in [False, True]]
for img in images:
    add_image_if_exists(OUT / img)

pdf.add_page()
pdf.set_font('Arial', size=10)
pdf.cell(0,6, 'Note: console outputs and model summaries are saved as text files in the output folder.', ln=True)

report_path = OUT / 'lab_report_short.pdf'
pdf.output(str(report_path))
print('Saved PDF report to:', report_path)

Saved PDF report to: /content/deep_mlp_lab_output/lab_report_short.pdf


In [16]:
!ls -R /content


/content:
deep_mlp_lab_output  sample_data

/content/deep_mlp_lab_output:
cifar_dnn_bn_False.png	 fmnist_summary_relu.txt
cifar_dnn_bn_True.png	 fmnist_summary_selu.txt
cifar_summary_bn.txt	 fmnist_summary_sigmoid.txt
cifar_summary_nobn.txt	 lab_report_short.pdf
fmnist_elu_loss.png	 mnist_lr_finder.png
fmnist_relu_loss.png	 mnist_model_summary_after_lr.txt
fmnist_selu_loss.png	 mnist_model_summary.txt
fmnist_sigmoid_loss.png  mnist_training_curves.png
fmnist_summary_elu.txt

/content/sample_data:
anscombe.json		      mnist_test.csv
california_housing_test.csv   mnist_train_small.csv
california_housing_train.csv  README.md
