# Numerically stable softmax implementation

In [7]:
import numpy as np
from numpy.typing import NDArray

from typing import Annotated
import matplotlib.pyplot as plt
import logging

import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Sequential
from tensorflow.keras.losses import SparseCategoricalCrossentropy

from sklearn.datasets import make_blobs


print(f'Tensorflow version : {tf.__version__}')
print(f'Using GPU: {"no" if len(tf.config.list_physical_devices("GPU")) == 0 else "yes"}.')

# prevent tensorflow from using all the GPU memory
gpus = tf.config.list_physical_devices('GPU')
if gpus:
  try:
    tf.config.experimental.set_memory_growth(gpus[0], True)
  except RuntimeError as e:
    print(e)
logging.getLogger('tensorflow').setLevel(logging.ERROR)
tf.autograph.set_verbosity(0)

Tensorflow version : 2.18.0
Using GPU: yes.


## Softmax implementation

In [None]:
def softmax_(z):
  ez = np.exp(z)
  return ez/np.sum(ez)

In [13]:
m = 100
n_classes = 4
centers = [[-5, 2], [-2, -2], [1, 2], [5, -2]]
std = 1.0

X_train, y_train = make_blobs(n_samples=m, centers=centers, cluster_std=std, random_state=30)

In [14]:
model = Sequential([
  Dense(units=25, activation='relu'),
  Dense(units=15, activation='relu'),
  Dense(units=4, activation='linear')
])

model.compile(
  loss=SparseCategoricalCrossentropy(from_logits=True),
  optimizer=Adam(0.001)
)

In [15]:
model.fit(X_train, y_train, epochs=10);

Epoch 1/10


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 135ms/step - loss: 1.1790
Epoch 2/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 1.1077 
Epoch 3/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 1.0494 
Epoch 4/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.9813 
Epoch 5/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.9395 
Epoch 6/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.8981 
Epoch 7/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.8506 
Epoch 8/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.7952 
Epoch 9/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.7625 
Epoch 10/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.7134 
