In [20]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import mnist
from sklearn.model_selection import train_test_split

In [21]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [22]:
X_train = X_train.reshape(-1, 784) / 255.0  # Reshape and normalize
X_val = X_val.reshape(-1, 784) / 255.0

In [27]:
y_train = y_train.astype('float32')
y_val = y_val.astype('float32')
y_test = y_test.astype('float32')

In [23]:
def bi_tempered_logistic_loss(t1, t2, label_smoothing=0.0):
    def loss(y_true, y_pred):
        one = tf.constant(1.0, dtype=y_pred.dtype)
        two = tf.constant(2.0, dtype=y_pred.dtype)

        temp1 = tf.pow(one + tf.pow(t1 * (y_true - y_pred), two), -one / t1)
        temp2 = tf.pow(one + tf.pow(t2 * (y_true - y_pred), two), -one / t2)

        loss_values = temp1 * (t1 - y_true * y_pred) - temp2 * (t2 - y_true * y_pred)
        loss_values *= (one - label_smoothing)

        return tf.reduce_sum(loss_values)

    return loss


In [24]:
# Create a simple neural network model
model = models.Sequential([
    layers.Input(shape=(784,)),
    layers.Dense(128, activation='relu'),
    layers.Dense(10, activation='softmax')
])

In [25]:
model.compile(optimizer='adam', loss=bi_tempered_logistic_loss(t1=0.7, t2=1.3, label_smoothing=0.1), metrics=['accuracy'])

In [28]:
model.compile(optimizer='adam', loss=bi_tempered_logistic_loss(t1=0.7, t2=1.3, label_smoothing=0.1), metrics=['accuracy'])

In [29]:
# Evaluate the model
test_loss, test_acc = model.evaluate(X_test.reshape(-1, 784) / 255.0, y_test)
print(f"Test accuracy: {test_acc:.4f}")

Test accuracy: 0.0691
