# 1. Build Softmax Classification

In [11]:
import tensorflow as tf
import numpy as np

In [19]:
# 8 by 4
x_train = [[1, 2, 1, 1],
           [2, 1, 3, 2],
           [3, 1, 3, 4],
           [4, 1, 5, 5],
           [1, 7, 5, 5], 
           [1, 2, 5, 6],
           [1, 6, 6, 6],
           [1, 7, 7, 7]]

# One-hot encoding
# 8 by 3
y_train = [[0, 0, 1],
           [0, 0, 1],
           [0, 0, 1],
           [0, 1, 0],
           [0, 1, 0],
           [0, 1, 0],
           [1, 0, 0],
           [1, 0, 0]]

# convert into numpy and float format
x_data = np.array(x_train, dtype=np.float32)
y_data = np.array(y_train, dtype=np.float32)

# number of classes = number of labels = number of y
nb_classes = 3

dataset = tf.data.Dataset.from_tensor_slices((x_data, y_data)).batch(len(x_data))

W = tf.Variable(tf.random.normal([4, nb_classes]), name='weight')
b = tf.Variable(tf.random.normal([nb_classes]), name='bias')

# 2. Build Softmax function

In [20]:
# Hypothesis
def softmax_func(features):
    hypothesis = tf.nn.softmax(tf.matmul(features, W) + b)
    return hypothesis

# Cost function (Cross-Entropy)
def cross_entropy(features, labels):
    hypothesis = softmax_func(features)
    cost = tf.reduce_mean(tf.reduce_sum(y_data * (-tf.math.log(hypothesis)), axis = 1))
    return cost

# Gradient descent
def grad(hypothesis, features, labels):
    with tf.GradientTape() as tape:
        cost = cross_entropy(features, labels)
    return tape.gradient(cost, [W, b])

# 3. Train

In [26]:
optimizer = tf.keras.optimizers.SGD(learning_rate = 0.1)

EPOCHS = 3000
for step in range(EPOCHS + 1):
    for features, labels in iter(dataset):
        hypothesis = softmax_func(features)
        grads = grad(hypothesis, features, labels)
        optimizer.apply_gradients(grads_and_vars = zip(grads, [W, b]))
        
    if step % 300 == 0:
        print("step: {}, cost: {}".format(step, cross_entropy(features, labels)))

step: 0, cost: 0.10299590229988098
step: 300, cost: 0.09613937884569168
step: 600, cost: 0.090118408203125
step: 900, cost: 0.08479099720716476
step: 1200, cost: 0.08004526793956757
step: 1500, cost: 0.07579214870929718
step: 1800, cost: 0.07195950299501419
step: 2100, cost: 0.06848882138729095
step: 2400, cost: 0.06533147394657135
step: 2700, cost: 0.06244741380214691
step: 3000, cost: 0.05980305373668671


# 4. One-hot Encoding: Test

##### Test

In [37]:
# set test data
test_data = [[1, 11, 7, 9]]
test_x = np.array(test_data, dtype=np.float32)
test = softmax_func(test_x)

# print test
print("test:\n", test)

# print largest one's index with argmax()
print("One-hot index:\n", tf.argmax(test, 1))

test:
 tf.Tensor([[4.0315525e-05 9.9995947e-01 2.2392535e-07]], shape=(1, 3), dtype=float32)
One-hot index:
 tf.Tensor([1], shape=(1,), dtype=int64)


In [38]:
# Test
pass_softmax_x = softmax_func(x_data)
print("softmax_func(x_data):\n", pass_softmax_x)

softmax_func(x_data):
 tf.Tensor(
[[1.6182792e-09 3.7362202e-05 9.9996269e-01]
 [6.4792075e-05 2.9270148e-02 9.7066510e-01]
 [6.6916205e-13 6.2354673e-02 9.3764532e-01]
 [2.0618432e-10 9.4404304e-01 5.5956893e-02]
 [1.0743184e-01 8.8830066e-01 4.2675058e-03]
 [5.7056814e-02 9.4294125e-01 1.9359466e-06]
 [8.7466675e-01 1.2533224e-01 9.1694784e-07]
 [9.8458242e-01 1.5417588e-02 1.7384544e-09]], shape=(8, 3), dtype=float32)


In [39]:
# One-hot encoding data (y_data)
print(y_data)

[[0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [1. 0. 0.]]


In [41]:
# Compare "Predicted" and "Real"
print("Predicted:\n", tf.argmax(pass_softmax_x, 1))
print("Real:\n", tf.argmax(y_data, 1))

Predicted:
 tf.Tensor([2 2 2 1 1 1 0 0], shape=(8,), dtype=int64)
Real:
 tf.Tensor([2 2 2 1 1 1 0 0], shape=(8,), dtype=int64)
