# Introduction to MNIST
Here we build a <b>feedforward Neural Network</b> made up of <b>Fully Connected</b> layers (called <b>Dense</b> layers in Keras)<br>
to classify the handwritten digits of the MNIST dataset


The **MNIST** (Modified National Institute of Standards and Technology) dataset is a classic benchmark in the field of machine learning and computer vision. It consists of:

- **60,000** training images
- **10,000** test images
- All are **grayscale** handwritten digits from **0 to 9**
- Each image is **28x28 pixels**

Why MNIST?

MNIST is widely used for:

- Learning and prototyping image classification models
- Exploring neural network architectures


It's a great starting point for building and training deep learning models using fully connected or convolutional neural networks.

In [32]:
# Usual libraries
import numpy as np
from matplotlib import pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import models
from keras.layers import Dense, Input
# library useful for preprocessing
from tensorflow.keras.utils import to_categorical



# Load and inspect MNIST dataset <br>
The MNIST dataset is included in **TensorFlow**, we can retrieve it from TF:

In [33]:
# 1. Load dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# 2. Preprocess: Flatten images and normalize to [0, 1]
x_train = x_train.reshape(-1, 28*28).astype("float32") / 255.0
x_test = x_test.reshape(-1, 28*28).astype("float32") / 255.0


In [None]:
y_train

In [None]:
# Convert to one-hot encoding
y_train_oh = to_categorical(y_train, num_classes=10)
y_test_oh = to_categorical(y_test, num_classes=10)

y_train[0], y_train_oh[0]

### Data inspection


In [None]:
print("Training set shape:", x_train.shape)
print("Test set shape:", x_test.shape)

In [None]:
# Show class distribution
unique, counts = np.unique(y_train, return_counts=True)
print("Class distribution (train):")
for digit, count in zip(unique, counts):
    print(f"Digit {digit}: {count} samples")

In [None]:
# Show a few sample images
plt.figure(figsize=(8, 4))
for i in range(10):
    plt.subplot(2, 5, i+1)
    plt.imshow(x_train[i].reshape(28,28), cmap="gray")
    plt.title(f"Label: {y_train[i]}")
    plt.axis("off")
plt.tight_layout()
plt.show()

# Build and train my network
Neural network made up of only FullyConnected layers (or Dense layers)<br>
The TensorFlow implementation is very fast during training and it is very easy to generalize<br>

Try to:
<ul>
<li> Change the hidden layer size and study the effect [(2), (5), (10), (25)]</li>
<li> Add more layers. For example add the line "Dense(5, activation="relu")," after the first Dense layer</li>
<li> Add Early Stopping</li>
<li> Change activation function of hidden layers: sigmoid -> relu</li>
</ul>


In [None]:
model = models.Sequential([
    Input(shape=(784,)),
    # build your model here :) (using dense layers, caution on output layer activation function and number of neurons)
])

# 4. Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])


In [None]:
keras.utils.plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

In [None]:
# Start my training -
batches, epoche, pazienza = 32, 20, 5
storia = model.fit(x_train, y_train_oh,
          epochs=epoche,
          batch_size=batches,
          validation_split=0.1)

print("\n",storia.history.keys())

# Plots of training history and test my network

In [None]:
"""################################################### Plots ####################################################"""

loss_train = storia.history["loss"]
loss_val = storia.history["val_loss"]
acc_train = storia.history["accuracy"]
acc_val = storia.history["val_accuracy"]

# Create a figure with 2 subplots (1 row, 2 columns)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

# Plot Accuracy on the first axis (ax1)
ax1.plot(range(len(acc_train)), acc_train, label="acc_train")
ax1.plot(range(len(acc_val)), acc_val, label="acc_val")
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Accuracy')
ax1.set_title('Accuracy vs Epoch')
ax1.legend()

# Plot Loss on the second axis (ax2)
ax2.plot(range(len(loss_train)), loss_train, label="loss_train")
ax2.plot(range(len(loss_val)), loss_val, label="loss_val")
ax2.set_yscale('log')  # Log scale for loss
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Loss (log scale)')
ax2.set_title('Loss vs Epoch')
ax2.legend()

plt.tight_layout()


#plt.savefig("Loss_Accuracy_Ising2d.png")

# Show the plot
plt.show()



In [None]:
"""################################################### Test ####################################################"""
# Evaluate the model on the test set

test_loss, test_acc = model.evaluate(x_test, y_test_oh)
print(f"Test accuracy: {test_acc:.4f}")

In [None]:
predictions = model.predict(x_test)
# Show a few sample images - predicted vs real labels
plt.figure(figsize=(8, 4))
for i in range(10):
    plt.subplot(2, 5, i+1)
    plt.imshow(x_test[i].reshape(28,28), cmap="gray")
    plt.title(f"Label: {y_test[i]}\n Prediction: {np.argmax(predictions[i])}")
    plt.axis("off")

plt.show()