In [None]:
import os
import sys
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# Add the parent directory to the system path
sys.path.append(os.path.abspath(".."))


# Import the module from the custom library
from _lib import utils
from _lib import layers
from _lib import metrics
from _lib import callbacks
from _lib import optimizers
from _lib import activations
from _lib import loss_functions
from _lib import FeedForward

### Constants and hyperparameters

In [None]:
train_test_split_pct = 0.2 # Percentage of samples to use for testing
train_valid_split = 0.1 # Percentage of samples to use for validation
learning_rate = 0.001 # Learning rate for the optimizer
batch_size = 1000 # Number of samples to use for each batch
epochs = 50 # Number of epochs to train the model
seed = 1234 # Seed for reproducibility

In [None]:
# Set the seed for reproducibility
np.random.seed(seed)

### Data loading

In [None]:
# Load the dataset
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.fashion_mnist.load_data() # type: ignore

# Add a channel dimension to the images
X_train = np.expand_dims(X_train, axis=-1) # type: ignore
X_test = np.expand_dims(X_test, axis=-1) # type: ignore

# Extract the number of classes in the dataset
num_classes = len(np.unique(y_train)) # type: ignore

# Split the training set into training and validation sets
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=train_valid_split, random_state=seed) # type: ignore

# Print the dataset information
print("Number of classes:", num_classes)
print('Training set:', X_train.shape, y_train.shape)
print('Validation set:', X_valid.shape, y_valid.shape)
print('Testing set:', X_test.shape, y_test.shape)

### Data preprocessing

In [None]:
# Normalization
def normalize(X: np.ndarray) -> np.ndarray:
    """
    Normalize the input data by dividing by the maximum value in the training set.
    
    Parameters:
    - X (np.ndarray): The input data to normalize
    
    Returns:
    - np.ndarray: The normalized input data
    """
    
    # Normalize the input data
    return X / 255.0

# Normalize the input data
X_train = normalize(X_train)
X_valid = normalize(X_valid)
X_test = normalize(X_test)

In [None]:
# Convert the target values to one-hot encoded vectors
y_train_encoded = utils.one_hot_encoding(y=y_train, n_classes=num_classes)
y_valid_encoded = utils.one_hot_encoding(y=y_valid, n_classes=num_classes)

# Print one sample encoding
print("Sample target value:", y_train[0])
print("One-hot encoded value:", y_valid_encoded[0])

### Data visualization

In [None]:
def plot_samples(samples: list[np.ndarray], labels: list[np.ndarray]) -> None:
    """
    Plot the samples in a grid.
    
    Parameters:
    - samples (list[np.ndarray]): The samples to plot
    - labels (list[np.ndarray]): The labels of the samples
    """
        
    # Plot the samples in a grid
    fig, axes = plt.subplots(1, len(samples), figsize=(20, 5))
    for i, ax in enumerate(axes):
        ax.imshow(samples[i], cmap='gray')
        ax.set_title(f'Label: {labels[i]}')
        ax.axis('off')
        
# Plot the first 10 samples
plot_samples(list(X_train[:10]), y_train[:10])

In [None]:
# Convert the dataset to tensors
X_train = tf.convert_to_tensor(X_train)
X_valid = tf.convert_to_tensor(X_valid)
X_test = tf.convert_to_tensor(X_test)
y_train_encoded = tf.convert_to_tensor(y_train_encoded)
y_valid_encoded = tf.convert_to_tensor(y_valid_encoded)
y_test = tf.convert_to_tensor(y_test)

### Building the model

In [None]:

model = FeedForward([
    layers.Conv2D(num_filters=32, kernel_size=(3, 3), activation=activations.ReLU(), padding='same'),
    layers.MaxPool2D(size=(2, 2), stride=(2, 2)),
    layers.Dropout(rate=0.2),
    layers.Conv2D(num_filters=64, kernel_size=(3, 3), activation=activations.ReLU(), padding='same'),
    layers.MaxPool2D(size=(2, 2), stride=(2, 2)),
    layers.Dropout(rate=0.2),
    layers.Flatten(),
    layers.BatchNormalization(momentum=0.9),
    layers.Dense(num_units=128, activation=activations.ReLU()),
    layers.Dense(num_units=num_classes, activation=activations.Softmax())
])

# Initialize the optimizer
optimizer = optimizers.Adam(learning_rate=learning_rate)

# Initialize the loss function
loss_fn = loss_functions.CrossEntropy()

In [None]:
# Call the model with a first batch to initialize the weights
# This is not necessary, but it is useful to know the input size
model(utils.get_batch(X_train, batch_size, 0));

In [None]:
# Display the model summary
model.summary()

### Training the model

In [None]:
history = model.fit(
    X_train = X_train,
    y_train = y_train_encoded,
    optimizer = optimizer,
    loss_fn = loss_fn,
    X_valid = X_valid,
    y_valid = y_valid_encoded,
    batch_size = batch_size,
    epochs = epochs,
    metrics = [metrics.accuracy],
    callbacks = [callbacks.EarlyStopping(monitor='val_loss', patience=5)]
)

In [None]:
# Plot the training and validation loss
utils.plot_history(history["loss"], history["val_loss"], "Training and Validation Loss", "Epoch", "Loss")

### Model evaluation

In [None]:
# Predict the labels
predictions = model(X_test)

# Apply the argmax function to the predictions
predictions = tf.argmax(predictions, axis=1)

In [None]:
# Compute the accuracy
accuracy = metrics.accuracy(predictions, y_test)
confusion_matrix = metrics.confusion_matrix(num_classes, predictions, y_test)

# Print the accuracy
print(f"Accuracy: {accuracy:.2f}")

# Plot the confusion matrix
utils.plot_confusion_matrix(confusion_matrix)