In [None]:
from sklearn.datasets import load_digits 
import numpy as np
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
import warnings
import matplotlib.pyplot as plt
from keras.models import Model
from keras.layers import Input, Dense
warnings.filterwarnings('ignore')

In [None]:
digits = load_digits()

X = digits.data
y = digits.target

# Flatten the images
# n_samples = len(X)
# X = X.reshape((n_samples, -1))
# One-hot encode the labels because softmax is used in the model
y = to_categorical(y)

# Normalize the input
X = X / X.max()

In [None]:
model = Sequential()

model.add(Dense(64, activation='relu', input_shape=(64,)))
model.add(Dense(32, activation='relu'))
# There are 10 classes in the digits data set
model.add(Dense(10, activation='softmax'))


In [None]:
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)


In [None]:
history = model.fit(X, y, validation_split=0.2, epochs=10, batch_size=32)

In [None]:
fig, ax = plt.subplots(figsize=(12, 8))
ax.plot(history.history['loss'], label='Train')
ax.plot(history.history['val_loss'], label='Test')
ax.set_title('Model loss')
ax.set_ylabel('Loss')
ax.set_xlabel('Epoch')
ax.legend(loc='upper left')
plt.show()



In [None]:
fig, ax = plt.subplots(figsize=(12, 8))
ax.plot(history.history['accuracy'], label='Train')
ax.plot(history.history['val_accuracy'], label='Test')
ax.set_title('Model accuracy')
ax.set_ylabel('Accuracy')
ax.set_xlabel('Epoch')
ax.legend(loc='upper left')


In [None]:
inputs = Input(shape=(64,))

dense_layer1 = Dense(64, activation='relu')(inputs)
dense_layer2 = Dense(32, activation='relu')(dense_layer1)
outputs = Dense(10, activation='softmax')(dense_layer2)

model_f = Model(inputs=inputs, outputs=outputs)
model_f.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)
model_f.summary()

In [None]:
history_f = model_f.fit(
    X, y, validation_split=0.2, epochs=10, batch_size=32
)

In [None]:
fig, ax = plt.subplots(figsize=(12, 8))
ax.plot(history_f.history['loss'], label='Train')
ax.plot(history_f.history['val_loss'], label='Test')
ax.set_title('Model loss')
ax.set_ylabel('Loss')
ax.set_xlabel('Epoch')
ax.legend(loc='upper left')


In [None]:
fig, ax = plt.subplots(figsize=(12, 8))
ax.plot(history_f.history['accuracy'], label='Train')
ax.plot(history_f.history['val_accuracy'], label='Test')
ax.set_title('Model accuracy')
ax.set_ylabel('Accuracy')
ax.set_xlabel('Epoch')
ax.legend(loc='upper left')


$$L = - \sum y * log(\hat{y})$$
Here, y_true is a vector of true target values (which will be one-hot encoded, so for our example it would be [1,0,0] for cat, [0,1,0] for dog, and [0,0,1] for bird), y_pred is a vector of predicted probabilities output by the model (like [0.7, 0.2, 0.1]