# Practice on Keras Functional API

In [1]:
# imports
import keras
from keras import layers
from keras import backend as K
import numpy as np

In [2]:
# mnist dataset
# loading training MNIST dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
# reshaping and normalizing the images
x_train = x_train.reshape(60000, 784).astype('float32')/255  
x_test = x_test.reshape(10000, 784).astype('float32')/255
# one-hot encoding of labels
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)


In [3]:
# building the model
def build_model():
    inputs = keras.Input(shape=(784,), name='input')  # input layer
    x = layers.Dense(64, activation='relu', name='hidden1')(inputs)  # hidden layer
    x = layers.Dense(64, activation='relu', name='hidden2')(x)  # hidden layer
    outputs = layers.Dense(10, activation='softmax', name='output')(x)  # output layer
    return keras.Model(inputs=inputs, outputs=outputs)

model_ce = build_model()
model_fl = build_model()
model_rl = build_model()

model_rl.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input (InputLayer)          [(None, 784)]             0         
                                                                 
 hidden1 (Dense)             (None, 64)                50240     
                                                                 
 hidden2 (Dense)             (None, 64)                4160      
                                                                 
 output (Dense)              (None, 10)                650       
                                                                 
Total params: 55050 (215.04 KB)
Trainable params: 55050 (215.04 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [4]:
optimizer = keras.optimizers.SGD()
ce_loss = keras.losses.CategoricalCrossentropy()
metrics = [keras.metrics.CategoricalAccuracy()]

In [5]:
# compiling the model
model_ce.compile(optimizer=optimizer, loss=ce_loss, metrics=metrics)

In [6]:
# training the model
model_ce.fit(x_train, y_train, batch_size=64, epochs=6, validation_split=0.2)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<keras.src.callbacks.History at 0x1f9addd9370>

In [7]:
# evaluating the model
test_scores = model_ce.evaluate(x_test, y_test, verbose=2)
print('Test loss:', test_scores[0])
print('Test accuracy:', test_scores[1])


313/313 - 0s - loss: 0.2423 - categorical_accuracy: 0.9313 - 296ms/epoch - 944us/step
Test loss: 0.24232876300811768
Test accuracy: 0.9312999844551086


In [8]:
# saving the model
path = './weights/mnist_ce_model.keras'
model_ce.save(path)

In [9]:
focal_loss = keras.losses.CategoricalFocalCrossentropy()

In [10]:
# compiling the model
optimizer = keras.optimizers.SGD()
model_fl.compile(optimizer=optimizer, loss=focal_loss, metrics=metrics)

In [11]:
# training the model
model_fl.fit(x_train, y_train, batch_size=64, epochs=6, validation_split=0.2)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<keras.src.callbacks.History at 0x1f9af151730>

In [12]:
# evaluating the model
test_scores = model_fl.evaluate(x_test, y_test, verbose=2)
print('Test loss:', test_scores[0])
print('Test accuracy:', test_scores[1])

313/313 - 0s - loss: 0.0503 - categorical_accuracy: 0.8933 - 254ms/epoch - 812us/step
Test loss: 0.05033818632364273
Test accuracy: 0.8932999968528748


In [13]:
# saving the model
path = './weights/mnist_focal_model.keras'
model_fl.save(path)

In [27]:
# defining rational loss function
# RL(p_t) = - 1/p_t * log(p_t)
def rational_loss():
    """
    Rational Loss for multi-class classification, Keras style.
    RL(p_t) = - 1/p_t * log(p_t), where p_t is the probability associated with the true class.

    :return: A callable rational_loss_fixed(y_true, y_pred) to be used as a Keras loss function.
    """
    def rational_loss_fixed(y_true, y_pred):
        """
        The actual loss computation.
        :param y_true: Ground truth labels, shape of [batch_size, num_classes].
        :param y_pred: Predicted class probabilities, shape of [batch_size, num_classes].
        :return: A scalar representing the mean rational loss over the batch.
        """
        # Clip the prediction value to prevent NaN's and Inf's
        y_pred = K.clip(y_pred, K.epsilon(), 1. - K.epsilon())
        # Calculate Cross Entropy
        cross_entropy = -y_true * K.log(y_pred)
        # Compute the Rational Loss
        loss = (-1 / y_pred) * cross_entropy
        # Average the loss over the batch
        return K.mean(K.sum(loss, axis=-1))

    return rational_loss_fixed


In [15]:
# compiling the model
rational_loss = rational_loss()
optimizer = keras.optimizers.SGD()
model_rl.compile(optimizer=optimizer, loss=rational_loss, metrics=metrics)

In [16]:
# training the model
history = model_rl.fit(x_train, y_train, batch_size=64, epochs=6, validation_split=0.2)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


In [17]:
# evaluating the model
test_scores = model_rl.evaluate(x_test, y_test, verbose=2)
print('Test loss:', test_scores[0])
print('Test accuracy:', test_scores[1])

313/313 - 0s - loss: -1.4490e+08 - categorical_accuracy: 0.1010 - 316ms/epoch - 1ms/step
Test loss: -144901712.0
Test accuracy: 0.10100000351667404


In [18]:
# saving the model
path = './weights/mnist_rational_model.keras'
model_rl.save(path)

In [19]:
# # defining focal  loss function
# # FL(p_t) = -alpha_t * (1-p_t)^gamma * log(p_t)
# def focal_loss(alpha=0.25, gamma=2.0):
#     """
#     Focal Loss, Keras styles
#     :param alpha: Weighting factor for the positive class, typically in the range [0, 1].
#     :param gamma: Focusing parameter to down-weight well-classified examples, typically in the range [0, 5].
#     :return: A callable focal_loss_fixed(y_true, y_pred) to be used as a Keras loss function.
#     """
#     def focal_loss_fixed(y_true, y_pred):
#         """
#         The actual loss computation.
#         :param y_true: Ground truth labels, shape of [batch_size, num_classes].
#         :param y_pred: Predicted class probabilities, shape of [batch_size, num_classes].
#         :return: 
#         """
#         # Clip the prediction value to prevent NaN's and Inf's
#         y_pred = K.clip(y_pred, K.epsilon(), 1. - K.epsilon())
#         # Calculate Cross Entropy
#         cross_entropy = -y_true * K.log(y_pred)
#         # Calculate Focal Loss
#         loss = alpha * K.pow(1 - y_pred, gamma) * cross_entropy
#         # Compute mean loss in batch
#         return K.mean(K.sum(loss, axis=-1))
#     return focal_loss_fixed


In [28]:
# building the model
def build_model():
    inputs = keras.Input(shape=(784,), name='input')  # input layer
    x = layers.Dense(64, activation='relu', name='hidden1')(inputs)  # hidden layer
    x = layers.Dense(64, activation='relu', name='hidden2')(x)  # hidden layer
    outputs = layers.Dense(10, activation='softmax', name='output')(x)  # output layer
    return keras.Model(inputs=inputs, outputs=outputs)

model_ce = build_model()
model_fl = build_model()
model_rl = build_model()


In [29]:
# imbalance 
def create_imbalanced_data(x, y, imbalance_rate=0.5):
    indices_by_class = [np.where(y[:, d] == 1)[0] for d in range(10)]
    selected_indices = []
    
    for d in range(10):
        frequency = int(len(indices_by_class[d]) * (imbalance_rate ** d))
        selected_indices.extend(indices_by_class[d][:frequency])
    
    return x[selected_indices], y[selected_indices]

x_train_imbalanced, y_train_imbalanced = create_imbalanced_data(x_train, y_train)


In [30]:
model_ce.compile(optimizer=keras.optimizers.SGD(), loss=keras.losses.CategoricalCrossentropy(), metrics=[keras.metrics.CategoricalAccuracy()])
model_fl.compile(optimizer=keras.optimizers.SGD(), loss=keras.losses.CategoricalFocalCrossentropy(), metrics=[keras.metrics.CategoricalAccuracy()])
model_rl.compile(optimizer=keras.optimizers.SGD(), loss=rational_loss(), metrics=[keras.metrics.CategoricalAccuracy()])


In [32]:
print("Training on imbalanced data:")
print("Cross Entropy:")
model_ce.fit(x_train_imbalanced, y_train_imbalanced, epochs=10, batch_size=32)
print("Focal Loss:")
model_fl.fit(x_train_imbalanced, y_train_imbalanced, epochs=10, batch_size=32)
print("Rational Loss:")
model_rl.fit(x_train_imbalanced, y_train_imbalanced, epochs=10, batch_size=32)

Training on imbalanced data:
Cross Entropy:
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Focal Loss:
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Rational Loss:
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x1f9b40b59d0>

In [33]:
# Balanced data
print(model_ce.evaluate(x_test, y_test))
print(model_fl.evaluate(x_test, y_test))
print(model_rl.evaluate(x_test, y_test))

# Imbalanced data
x_test_imbalanced, y_test_imbalanced = create_imbalanced_data(x_test, y_test)
print(model_ce.evaluate(x_test_imbalanced, y_test_imbalanced))
print(model_fl.evaluate(x_test_imbalanced, y_test_imbalanced))
print(model_rl.evaluate(x_test_imbalanced, y_test_imbalanced))

[1.35547935962677, 0.6879000067710876]
[0.346059113740921, 0.5706999897956848]
[-144901712.0, 0.10100000351667404]
[0.10905645787715912, 0.9706601500511169]
[0.022728214040398598, 0.9550122022628784]
[-151250064.0, 0.06161369010806084]


In [34]:
def accuracy_by_bins(model, x, y):
    predictions = model.predict(x).argmax(axis=-1)
    true_labels = y.argmax(axis=-1)
    bins = [(0, 1), (2, 7), (8, 9)]
    for bin_start, bin_end in bins:
        mask = (true_labels >= bin_start) & (true_labels <= bin_end)
        bin_accuracy = np.mean(predictions[mask] == true_labels[mask])
        print(f"Accuracy for bin {bin_start}-{bin_end}: {bin_accuracy}")

print("Accuracy by bins for balanced data:")
print("Cross Entropy:")
accuracy_by_bins(model_ce, x_test, y_test)
print("Focal Loss:")
accuracy_by_bins(model_fl, x_test, y_test)
print("Rational Loss:")
accuracy_by_bins(model_rl, x_test, y_test)

Accuracy by bins for balanced data:
Cross Entropy:
Accuracy for bin 0-1: 0.9947990543735225
Accuracy for bin 2-7: 0.8053202304303626
Accuracy for bin 8-9: 0.011094301563287948
Focal Loss:
Accuracy for bin 0-1: 0.9947990543735225
Accuracy for bin 2-7: 0.6104710267705863
Accuracy for bin 8-9: 0.0
Rational Loss:
Accuracy for bin 0-1: 0.0
Accuracy for bin 2-7: 0.17112843104032532
Accuracy for bin 8-9: 0.0
