In [8]:
!pip install tensorflow



In [9]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

In [25]:
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
(x_train, y_train), (x_test, y_test) = mnist.load_data()
y_train = to_categorical(y_train).astype('float32')
y_test = to_categorical(y_test).astype('float32')

In [11]:
x_train.shape

(60000, 28, 28)

In [12]:
y_train.shape

(60000, 10)

In [13]:
y_train[0]

array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.])

In [24]:
W = tf.Variable(tf.zeros([784, 10], dtype=tf.float32), name='weights')
b = tf.Variable(tf.zeros([10], dtype=tf.float32), name='biases')

def softmax_regression(x_input):
    # Ensure x_input is float32 for matmul
    return tf.nn.softmax(tf.matmul(x_input, W) + b)

def cross_entropy_loss(y_pred, y_true):
    y_true = tf.cast(y_true, tf.float32) # Ensure y_true is float32
    # Clip y_pred to avoid log(0) and ensure numerical stability
    y_pred_clipped = tf.clip_by_value(y_pred, 1e-10, 1.0)
    return -tf.reduce_sum(y_true * tf.math.log(y_pred_clipped))

optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)

@tf.function
def train_step(x_batch, y_batch):
    with tf.GradientTape() as tape:
        y_pred = softmax_regression(x_batch)
        loss = cross_entropy_loss(y_pred, y_batch)
    gradients = tape.gradient(loss, [W, b])
    optimizer.apply_gradients(zip(gradients, [W, b]))
    return loss, y_pred

In [26]:
# Preprocess the data
x_train_flat = x_train.reshape(-1, 784).astype('float32') / 255.0
x_test_flat = x_test.reshape(-1, 784).astype('float32') / 255.0

print(f"x_train_flat shape: {x_train_flat.shape}")
print(f"x_test_flat shape: {x_test_flat.shape}")

x_train_flat shape: (60000, 784)
x_test_flat shape: (10000, 784)


### Training Loop

In [27]:
batch_size = 100
epochs = 10
train_dataset = tf.data.Dataset.from_tensor_slices((x_train_flat, y_train)).shuffle(len(x_train_flat)).batch(batch_size)
test_dataset = tf.data.Dataset.from_tensor_slices((x_test_flat, y_test)).batch(batch_size)

for epoch in range(epochs):
    epoch_loss_avg = tf.keras.metrics.Mean()
    epoch_accuracy = tf.keras.metrics.CategoricalAccuracy()

    for x_batch, y_batch in train_dataset:
        loss, y_pred = train_step(x_batch, y_batch)
        epoch_loss_avg.update_state(loss)
        epoch_accuracy.update_state(y_batch, y_pred)

    print(f"Epoch {epoch+1:03d}: Loss: {epoch_loss_avg.result():.3f}, Accuracy: {epoch_accuracy.result():.3f}")

Epoch 001: Loss: 40.852, Accuracy: 0.882
Epoch 002: Loss: 31.502, Accuracy: 0.909
Epoch 003: Loss: 30.073, Accuracy: 0.915
Epoch 004: Loss: 29.402, Accuracy: 0.917
Epoch 005: Loss: 28.920, Accuracy: 0.918
Epoch 006: Loss: 28.631, Accuracy: 0.919
Epoch 007: Loss: 28.313, Accuracy: 0.920
Epoch 008: Loss: 28.126, Accuracy: 0.920
Epoch 009: Loss: 27.884, Accuracy: 0.921
Epoch 010: Loss: 27.736, Accuracy: 0.922


### Evaluation

In [28]:
test_accuracy = tf.keras.metrics.CategoricalAccuracy()

for x_batch_test, y_batch_test in test_dataset:
    y_pred_test = softmax_regression(x_batch_test)
    test_accuracy.update_state(y_batch_test, y_pred_test)

print(f"Test Accuracy: {test_accuracy.result()}")

Test Accuracy: 0.9121999740600586
