[Reference](https://medium.com/h7w/kullback-leibler-divergence-with-keras-227ef84f2a1b)

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import backend as K

# Model configuration
img_width, img_height = 32, 32
batch_size = 250
no_epochs = 25
no_classes = 10
validation_split = 0.2
verbosity = 1

# Load CIFAR10 dataset
(input_train, target_train), (input_test, target_test) = cifar10.load_data()

# Reshape data based on backend image data format
if K.image_data_format() == 'channels_first':
    input_train = input_train.reshape(input_train.shape[0], 3, img_width, img_height)
    input_test = input_test.reshape(input_test.shape[0], 3, img_width, img_height)
    input_shape = (3, img_width, img_height)
else:
    input_train = input_train.reshape(input_train.shape[0], img_width, img_height, 3)
    input_test = input_test.reshape(input_test.shape[0], img_width, img_height, 3)
    input_shape = (img_width, img_height, 3)

# Normalize data
input_train = input_train.astype('float32') / 255
input_test = input_test.astype('float32') / 255

# Convert class vectors to binary class matrices
target_train = to_categorical(target_train, no_classes)
target_test = to_categorical(target_test, no_classes)

# Build the model
model = Sequential([
    Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.5),
    Conv2D(64, kernel_size=(3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.5),
    Flatten(),
    Dense(256, activation='relu'),
    Dense(no_classes, activation='softmax')
])

# Compile the model
model.compile(
    loss=tf.keras.losses.KLDivergence(),
    optimizer=tf.keras.optimizers.Adam(),
    metrics=['accuracy']
)

# Train the model
model.fit(
    input_train, target_train,
    batch_size=batch_size,
    epochs=no_epochs,
    verbose=verbosity,
    validation_split=validation_split
)

# Evaluate the model
score = model.evaluate(input_test, target_test, verbose=0)
print(f'Test loss: {score[0]:.4f} / Test accuracy: {score[1]:.4f}')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/25
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 340ms/step - accuracy: 0.2338 - loss: 2.0634 - val_accuracy: 0.4431 - val_loss: 1.5970
Epoch 2/25
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 322ms/step - accuracy: 0.4457 - loss: 1.5359 - val_accuracy: 0.5257 - val_loss: 1.3871
Epoch 3/25
[1m 72/160[0m [32m━━━━━━━━━[0m[37m━━━━━━━━━━━[0m [1m24s[0m 283ms/step - accuracy: 0.5055 - loss: 1.3720