In [9]:
import tensorflow as tf
import numpy as np

# Load the MNIST dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Normalize pixel values to [0,1]
x_train, x_test = x_train / 255.0, x_test / 255.0

# Flatten images: (28, 28) → (784,)
x_train_flat = x_train.reshape(x_train.shape[0], -1)
x_test_flat = x_test.reshape(x_test.shape[0], -1)

# Add bias term: Append a column of ones (Now shape: (N, 785))
x_train_bias = np.hstack([x_train_flat[:2000], np.ones((2000, 1))])
x_test_bias = np.hstack([x_test_flat[:500], np.ones((500, 1))])

# Select corresponding labels & reshape to column vectors
y_train_sampled = y_train[:2000].reshape(-1, 1)
y_test_sampled = y_test[:500].reshape(-1, 1)

# Print final dataset shapes
print(f"Training set: {x_train_bias.shape}, Labels: {y_train_sampled.shape}")
print(f"Test set: {x_test_bias.shape}, Labels: {y_test_sampled.shape}")

Training set: (2000, 785), Labels: (2000, 1)
Test set: (500, 785), Labels: (500, 1)


In [16]:
W = np.random.randn(785, 10) * 0.001
num_classes = W.shape[1]

In [17]:
def softmax_linear_unvectorized(X, y, W, reg):
  num_train = X.shape[0]
  num_classes = W.shape[1]
  loss = 0.0
  dW = np.zeros_like(W)

  for i in range(num_train):
    scores = np.dot(X[i], W)
    scores -= np.max(scores)
    exp_scores = np.exp(scores)
    softmax_probs = exp_scores / np.sum(exp_scores)
    loss += -np.log(softmax_probs[y[i]])

    for j in range(num_classes):
      if j == y[i]:
        dW[:, j] += (softmax_probs[j] - 1) * X[i]
      else:
        dW[:, j] += softmax_probs[j] * X[i]

  loss /= num_train
  dW /= num_train

  loss += reg * np.sum(W * W)
  dW += 2 * reg * W

  return loss, dW

In [20]:
def softmax_train_unvectorized(X, y, W, reg, alpha=0.01, iters=100):
  num_train = X.shape[0]
  for i in range(iters):
    loss, dW = softmax_linear_unvectorized(X, y, W, reg)
    W -= alpha * dW

    if i % 10 == 0:
      print(f"Epoch {i}: Loss = {float(loss):.4f}")
  return W

In [21]:
W = softmax_train_unvectorized(x_train_bias, y_train_sampled, W, reg=0.0001, alpha=0.01, iters=100)

  print(f"Epoch {i}: Loss = {float(loss):.4f}")


Epoch 0: Loss = 2.2917
Epoch 10: Loss = 2.1863
Epoch 20: Loss = 2.0887
Epoch 30: Loss = 1.9983
Epoch 40: Loss = 1.9144
Epoch 50: Loss = 1.8366
Epoch 60: Loss = 1.7645
Epoch 70: Loss = 1.6977
Epoch 80: Loss = 1.6359
Epoch 90: Loss = 1.5786


In [23]:
y_pred = np.dot(x_test_bias, W)
y_pred = np.argmax(y_pred, axis=1)
accuracy = np.mean(y_pred == y_test_sampled)

In [24]:
accuracy

np.float64(0.102612)