# Laboratory 3

## Task
Find a model with the greatest generalization power for the MNIST dataset. Start with the logistic regression model on MNIST from the lecture. To evaluate models use 3-fold and 5-fold validation. In order to create new models try:
- various learning rates;
- to increase models capacity by adding more layers or using layers with more parameters;
- to regularize models by adding weight regularization (L1, L2, and L1_L2) as well as adding dropout.

In [None]:
import os
import tensorflow as tf

# Suppress TensorFlow logging
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # 0=all, 1=info, 2=warning, 3=error

# Now your TensorFlow imports and code
print("TensorFlow version:", tf.__version__)

import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score

In [None]:
(x_train, y_train), _ = keras.datasets.mnist.load_data()
x_train = x_train.reshape((-1, 28*28)).astype("float32") / 255.0
y_train = keras.utils.to_categorical(y_train, 10)

In [None]:
def create_model(learning_rate=0.01, layers_config=[10], regularizer=None, dropout_rate=0.0):
    model = keras.Sequential()
    for units in layers_config[:-1]:
        model.add(layers.Dense(units, activation='relu', kernel_regularizer=regularizer))
        if dropout_rate > 0:
            model.add(layers.Dropout(dropout_rate))
    model.add(layers.Dense(layers_config[-1], activation='softmax', kernel_regularizer=regularizer))
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [None]:
def cross_validate_model(model_fn, k=5, epochs=10, batch_size=128):
    kf = KFold(n_splits=k, shuffle=True, random_state=42)
    acc_scores = []
    for train_idx, val_idx in kf.split(x_train):
        x_tr, x_val = x_train[train_idx], x_train[val_idx]
        y_tr, y_val = y_train[train_idx], y_train[val_idx]
        model = model_fn()
        model.fit(x_tr, y_tr, epochs=epochs, batch_size=batch_size, verbose=0)
        val_pred = np.argmax(model.predict(x_val), axis=1)
        val_true = np.argmax(y_val, axis=1)
        acc_scores.append(accuracy_score(val_true, val_pred))
    return np.mean(acc_scores), np.std(acc_scores)

In [None]:
print("== Bazowy model: regresja logistyczna ==")
baseline_fn = lambda: create_model(layers_config=[10], learning_rate=0.01)
mean_acc_3, std_acc_3 = cross_validate_model(baseline_fn, k=3)
mean_acc_5, std_acc_5 = cross_validate_model(baseline_fn, k=5)
print(f"3-fold: {mean_acc_3:.4f} ± {std_acc_3:.4f}, 5-fold: {mean_acc_5:.4f} ± {std_acc_5:.4f}")


In [None]:
print("\n== Różne learning rate ==")
lrs = [0.001, 0.01, 0.1]
for lr in lrs:
    model_fn = lambda: create_model(layers_config=[10], learning_rate=lr)
    acc, _ = cross_validate_model(model_fn, k=3)
    print(f"Learning rate {lr}: Accuracy = {acc:.4f}")

In [None]:
print("\n== Większe modele (więcej warstw) ==")
configs = [[128, 10], [256, 128, 10], [512, 256, 10]]
for config in configs:
    model_fn = lambda: create_model(layers_config=config)
    acc, _ = cross_validate_model(model_fn, k=3)
    print(f"Architektura {config}: Accuracy = {acc:.4f}")

In [None]:
print("\n== Regularizacja wag ==")
regs = {
    'L1': regularizers.l1(0.001),
    'L2': regularizers.l2(0.001),
    'L1_L2': regularizers.l1_l2(l1=0.001, l2=0.001)
}
for name, reg in regs.items():
    model_fn = lambda: create_model(layers_config=[128, 10], regularizer=reg)
    acc, _ = cross_validate_model(model_fn, k=3)
    print(f"Regularizer {name}: Accuracy = {acc:.4f}")

In [None]:
print("\n== Dropout ==")
for rate in [0.2, 0.5]:
    model_fn = lambda: create_model(layers_config=[128, 10], dropout_rate=rate)
    acc, _ = cross_validate_model(model_fn, k=3)
    print(f"Dropout rate {rate}: Accuracy = {acc:.4f}")