# Practice on Keras Functional API

In [1]:
# imports
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import EarlyStopping
# for types hints
from typing import Tuple, Callable, List
from tensorflow import Tensor

2023-08-16 01:34:18.309046: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# check for gpus
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
# list their names
tf.config.experimental.list_physical_devices('GPU')

Num GPUs Available:  1


2023-08-16 01:34:24.167306: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:03:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-08-16 01:34:24.190791: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:03:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-08-16 01:34:24.191032: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:03:00.0/numa_node
Your kernel may have been built without NUMA support.


[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [3]:
# mnist dataset
def load_and_preprocess_mnist() -> Tuple[Tuple[Tensor, Tensor], Tuple[Tensor, Tensor]]:
    """
    Load the MNIST dataset, preprocess images, and perform one-hot encoding of labels.

    :return: Tuple of training data (x_train, y_train) and testing data (x_test, y_test).
    """
    # Load MNIST dataset
    (x_train, y_train), (x_test, y_test) =  tf.keras.datasets.mnist.load_data()

    # Reshape and normalize images
    x_train = x_train.reshape(60000, 784).astype('float32') / 255
    x_test = x_test.reshape(10000, 784).astype('float32') / 255

    # One-hot encoding of labels
    y_train = tf.keras.utils.to_categorical(y_train, 10)
    y_test = tf.keras.utils.to_categorical(y_test, 10)

    return (x_train, y_train), (x_test, y_test)

In [4]:

(x_train, y_train), (x_test, y_test) = load_and_preprocess_mnist()


In [5]:
# building the model
def build_model() -> tf.keras.Model:
    """
    Build a simple MLP model for MNIST classification.
    :return: A tf.keras Model with inputs and outputs defined.
    """
    inputs = tf.keras.Input(shape=(784,), name='input')  # input layer
    x = tf.keras.layers.Dense(64, activation='relu', name='hidden1')(inputs)  # hidden layer
    x = tf.keras.layers.Dense(64, activation='relu', name='hidden2')(x)  # hidden layer
    outputs = tf.keras.layers.Dense(10, activation='softmax', name='output')(x)  # output layer
    
    return tf.keras.Model(inputs=inputs, outputs=outputs)

model_ce = build_model()
model_fl = build_model()
model_rl = build_model()

2023-08-16 01:34:32.179234: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:03:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-08-16 01:34:32.179521: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:03:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-08-16 01:34:32.179724: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:03:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-08-16 01:34:33.454333: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:03:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-08-16 01:34:33.454829: I tensorflow/compile

In [6]:
optimizer = tf.keras.optimizers.SGD()
ce_loss = tf.keras.losses.CategoricalCrossentropy()
metrics = [tf.keras.metrics.CategoricalAccuracy()]
# compiling the model
model_ce.compile(optimizer=optimizer, loss=ce_loss, metrics=metrics)
# training the model
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=6, restore_best_weights=True)
model_ce.fit(x_train, y_train, batch_size=64, epochs=100, validation_split=0.2, callbacks=[early_stopping_callback])
# evaluating the model
test_scores = model_ce.evaluate(x_test, y_test, verbose=2)
print('Test loss:', test_scores[0])
print('Test accuracy:', test_scores[1])

# saving the model
path = './weights/mnist_ce_model.keras'
model_ce.save(path)

Epoch 1/100


2023-08-16 01:34:46.185768: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:606] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2023-08-16 01:34:46.189082: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fa4ea086020 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-08-16 01:34:46.189115: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 3060, Compute Capability 8.6
2023-08-16 01:34:46.387316: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8600
2023-08-16 01:34:46.396035: W tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc:543] Can't find libdevice directory ${CUDA_DIR}/nvvm/libdevice. This may result in compilation or runtime failures, if the program we try to run uses routines from libdevice.
Searched for CUDA in the following directories:
  ./cuda_sdk_lib
  /usr/lo

Epoch 2/100
Epoch 3/100
Epoch 4/100

KeyboardInterrupt: 

In [8]:
focal_loss = tf.keras.losses.CategoricalFocalCrossentropy()
# compiling the model
optimizer = tf.keras.optimizers.SGD()
model_fl.compile(optimizer=optimizer, loss=focal_loss, metrics=metrics)
# training the model
model_fl.fit(x_train, y_train, batch_size=64, epochs=100, validation_split=0.2, callbacks=[early_stopping_callback])
# evaluating the model
test_scores = model_fl.evaluate(x_test, y_test, verbose=2)
print('Test loss:', test_scores[0])
print('Test accuracy:', test_scores[1])
# saving the model
path = './weights/mnist_focal_model.keras'
model_fl.save(path)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [7]:
# defining rational loss function
# RL(p_t) = 1/p_t * -p * log(p_t)
    # TODO: plot the function
def rational_loss(y_true, y_pred):
    """
    Rational Loss for multi-class classification, tf.keras style.
    RL(p_t) = - 1/p_t * log(p_t), where p_t is the probability associated with the true class.

    :param y_true: Ground truth labels, shape of [batch_size, num_classes].
    :param y_pred: Predicted class probabilities, shape of [batch_size, num_classes].
    :return: A scalar representing the mean rational loss over the batch.
    NOTE: written assuming GPU support to make use of fast Tensor operations.
    """
    # Create a Categorical Cross-Entropy loss instance
    cce = tf.keras.losses.CategoricalCrossentropy(
        reduction=tf.keras.losses.Reduction.NONE # Keep unreduced loss tensor
    )
    # clip the predicted probabilities to avoid log(0)
    _y_pred = tf.clip_by_value(y_pred, 1e-7, 1.0)
    cross_entropy = cce(y_true, _y_pred) # batch_sizex1
    # find the probability associated with the true class
    _y_true = tf.argmax(y_true, axis=1)
    # get the predicted probability of the true class
    _y_pred = tf.gather(_y_pred, _y_true, axis=1)
    # rational loss by dividing the cross entropy by the predicted probability of the true class
    _rational_loss = cross_entropy / _y_pred # Rational loss
    
    return K.mean(_rational_loss)



In [9]:
# test rational loss
y_true = np.array([[0, 1, 0], [0, 0, 1]])
y_pred = np.array([[0.05, 0.95, 0], [0.1, 0.8, 0.1]])
print(rational_loss(y_true, y_pred))



tf.Tensor(5756468.518475293, shape=(), dtype=float64)


In [None]:
# def generate_data() -> Tuple[np.ndarray, np.ndarray]:
#     """
#     Generates synthetic y_true and y_pred data.
#     :return: y_true and y_pred arrays.
#     """
#     num_samples = 100
#     num_classes = 3
#     y_true = np.eye(num_classes)[np.random.choice(num_classes, num_samples)]
#     y_pred = np.random.rand(num_samples, num_classes)
#     y_pred /= y_pred.sum(axis=1, keepdims=True)
#     return y_true, y_pred
# 
# def plot_rational_loss() -> None:
#     """
#     Plots the rational loss for the generated data.
#     """
#     y_true, y_pred = generate_data()
#     rational_loss_fixed = rational_loss()
#     losses: List[float] = [rational_loss_fixed(y_t.reshape(1, -1), y_p.reshape(1, -1)).numpy() for y_t, y_p in zip(y_true, y_pred)]
#     plt.plot(losses)
#     plt.title("Rational Loss for Multi-Class Classification")
#     plt.xlabel("Sample")
#     plt.ylabel("Loss")
#     plt.show()
# 
# plot_rational_loss()

In [10]:
# compiling the model
model_rl.compile(optimizer= tf.keras.optimizers.SGD(), loss=rational_loss, metrics=metrics)
# training the model
history = model_rl.fit(x_train, y_train, batch_size=64, epochs=100, validation_split=0.2, callbacks=[early_stopping_callback])
# evaluating the model
test_scores = model_rl.evaluate(x_test, y_test, verbose=2)
print('Test loss:', test_scores[0])
print('Test accuracy:', test_scores[1])
# saving the model
path = './weights/mnist_rational_model.keras'
model_rl.save(path)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
313/313 - 1s - loss: 11.6380 - categorical_accuracy: 0.9718 - 724ms/epoch - 2ms/step
Test loss: 11.638002395629883
Test accuracy: 0.9718000292778015


In [11]:
# rebuilding the models
model_ce = build_model()
model_fl = build_model()
model_rl = build_model()


In [12]:
# imbalance 
def create_imbalanced_data(x, y, imbalance_rate=0.5):
    """
    Create an imbalanced dataset based on a given probability distribution.
    The probability for class d is given by: P(d) = 0.5^d / 2*(1 - 0.5^10)

    :param x: Features, shape of [total_samples, feature_dim].
    :param y: One-hot encoded labels, shape of [total_samples, num_classes].
    :param imbalance_rate: Base rate for the exponential decay of class frequency (default 0.5).
    :return: Tuple of imbalanced features and labels, shapes of [selected_samples, feature_dim] and [selected_samples, num_classes].
    """
    total_samples = len(y)
    a = imbalance_rate
    normalization_factor = 2 * (1 - a**10)

    indices_by_class = [np.where(y[:, d] == 1)[0] for d in range(10)]
    selected_indices = []

    for d in range(10):
        probability_d = (a**d) / normalization_factor
        frequency = int(total_samples * probability_d)
        np.random.shuffle(indices_by_class[d]) # Shuffle to ensure random selection
        selected_indices.extend(indices_by_class[d][:frequency])

    return x[selected_indices], y[selected_indices]


x_train_imbalanced, y_train_imbalanced = create_imbalanced_data(x_train, y_train)


In [13]:
# compiling the model
model_ce.compile(optimizer=tf.keras.optimizers.SGD(), loss=tf.keras.losses.CategoricalCrossentropy(), metrics=metrics)
model_fl.compile(optimizer=tf.keras.optimizers.SGD(), loss=tf.keras.losses.CategoricalFocalCrossentropy(), metrics=metrics)
model_rl.compile(optimizer=tf.keras.optimizers.SGD(), loss=rational_loss, metrics=metrics)

In [15]:
print("Training on imbalanced data:")
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=6, restore_best_weights=True)
print("Cross Entropy:")
model_ce.fit(x_train_imbalanced, y_train_imbalanced, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stopping_callback])
print("Focal Loss:")
model_fl.fit(x_train_imbalanced, y_train_imbalanced, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stopping_callback])
print("Rational Loss:")
model_rl.fit(x_train_imbalanced, y_train_imbalanced, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stopping_callback])

Training on imbalanced data:
Cross Entropy:
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Focal Loss:
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Rational Loss:
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100


<keras.src.callbacks.History at 0x7fa5681637c0>

In [17]:
# Balanced data
print("test on balanced data:")
print(model_ce.evaluate(x_test, y_test))
print(model_fl.evaluate(x_test, y_test))
print(model_rl.evaluate(x_test, y_test))

# Imbalanced data
print("test on imbalanced data:")
x_test_imbalanced, y_test_imbalanced = create_imbalanced_data(x_test, y_test)
print(model_ce.evaluate(x_test_imbalanced, y_test_imbalanced))
print(model_fl.evaluate(x_test_imbalanced, y_test_imbalanced))
print(model_rl.evaluate(x_test_imbalanced, y_test_imbalanced))

test on balanced data:
[5.327250957489014, 0.4049000144004822]
[0.8436211943626404, 0.39590001106262207]
[32818.21484375, 0.4074999988079071]
test on imbalanced data:
[1.2605335712432861, 0.83990877866745]
[0.20391705632209778, 0.8244013786315918]
[3080.645263671875, 0.843557596206665]


In [18]:
def accuracy_by_bins(model, x, y):
    """
    Calculate and print the accuracy of the given model for specific bins of classes.
    The bins are defined as: 0-1, 2-7, 8-9.

    :param model: Trained tf.keras model to evaluate.
    :param x: Input features, shape of [num_samples, feature_dim].
    :param y: One-hot encoded labels, shape of [num_samples, num_classes].
    """
    predictions = model.predict(x).argmax(axis=-1)
    true_labels = y.argmax(axis=-1)
    bins = [(0, 1), (2, 7), (8, 9)]
    for bin_start, bin_end in bins:
        mask = (true_labels >= bin_start) & (true_labels <= bin_end) 
        bin_accuracy = np.mean(predictions[mask] == true_labels[mask])
        print(f"Accuracy for bin {bin_start}-{bin_end}: {bin_accuracy}")

print("Accuracy by bins for balanced data:")
print("Cross Entropy:")
accuracy_by_bins(model_ce, x_test, y_test)
print("Focal Loss:")
accuracy_by_bins(model_fl, x_test, y_test)
print("Rational Loss:")
accuracy_by_bins(model_rl, x_test, y_test)

Accuracy by bins for balanced data:
Cross Entropy:
Accuracy for bin 0-1: 0.9919621749408983
Accuracy for bin 2-7: 0.33056590986106404
Accuracy for bin 8-9: 0.0
Focal Loss:
Accuracy for bin 0-1: 0.9843971631205674
Accuracy for bin 2-7: 0.3180277871907828
Accuracy for bin 8-9: 0.0
Rational Loss:
Accuracy for bin 0-1: 0.9914893617021276
Accuracy for bin 2-7: 0.33514063029481533
Accuracy for bin 8-9: 0.0
