**Multiclass Classification using Softmax**

In [13]:
import numpy as np 
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.datasets import make_blobs


In [14]:
#Creating datasets
centers = [[-5,2],[-2,-2],[1,2],[5,-2]]
x_train,y_train = make_blobs(n_samples = 2000, centers = centers, cluster_std = 1, random_state = 30)

In [15]:
model = Sequential([
    Dense(25,activation='relu'),
    Dense(15,activation='relu'),
    Dense(4,activation='softmax')
])
model.compile(
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(0.001),
    )

model.fit(x_train,y_train,epochs=10)

Epoch 1/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 1.1940  
Epoch 2/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.4867
Epoch 3/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.2727
Epoch 4/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.1568
Epoch 5/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.1023  
Epoch 6/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0775
Epoch 7/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0646
Epoch 8/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0563
Epoch 9/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0506
Epoch 10/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0464


<keras.src.callbacks.history.History at 0x26ade4b1950>

In [16]:
p1 = model.predict(x_train)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


In [17]:
print(p1[:2])
print(np.min(p1),np.max(p1))

[[2.6655039e-03 2.0241598e-03 9.6710533e-01 2.8204979e-02]
 [9.9223059e-01 4.4951993e-03 3.2399064e-03 3.4327015e-05]]
5.2998803e-12 0.9999999


**Above method is correct but numerically most accurate way of performing softmax is shown below**

In [19]:
preffered_model = Sequential([
    Dense(25,activation='relu'),
    Dense(15,activation='relu'),
    Dense(4,activation='linear')
])

preffered_model.compile(
    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer = tf.keras.optimizers.Adam(0.001),
)

preffered_model.fit(x_train,y_train,epochs=10)

Epoch 1/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 1.0424  
Epoch 2/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.4774
Epoch 3/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.2180
Epoch 4/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.1155
Epoch 5/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0769
Epoch 6/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0600
Epoch 7/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0511
Epoch 8/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0452
Epoch 9/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0406
Epoch 10/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0370


<keras.src.callbacks.history.History at 0x26ade4b1450>

In [20]:
p2 = preffered_model.predict(x_train)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


In [22]:
print(p2[:2])
print(np.min(p2),np.max(p2))

[[-3.1439283 -2.93258    3.0885155 -0.7924393]
 [ 3.7578244 -1.6191897 -4.7432337 -3.5065913]]
-11.425682 8.890912


Now the output will not be giving the probability. The outputs should be processed by a softmax to convert them into probabilities.


In [24]:
p2_prob = tf.nn.softmax(p2).numpy()

print(p2_prob[:2])
print(np.min(p2_prob), np.max(p2_prob))

[[1.9166839e-03 2.3677617e-03 9.7558802e-01 2.0127473e-02]
 [9.9450547e-01 4.5962068e-03 2.0213629e-04 6.9616392e-04]]
1.1673042e-08 0.99999976


To select the most likely category, the softmax is not required. One can find the index of the largest output using np.argmax().

In [23]:
for i in range(5):
    print(f"{p2[i]}, probable category: {np.argmax(p2[i])}")

[-3.1439283 -2.93258    3.0885155 -0.7924393], probable category: 2
[ 3.7578244 -1.6191897 -4.7432337 -3.5065913], probable category: 0
[ 2.4551783  -0.80495524 -3.7412927  -3.008836  ], probable category: 0
[-3.1424901  3.3365047 -1.3896923 -2.9190047], probable category: 1
[-0.93381345 -3.5995638   4.3585854  -3.1257517 ], probable category: 2
