In [48]:
from sklearn.datasets import load_digits
import pandas as pd
import numpy as np

In [49]:
X, y = load_digits(return_X_y=True)
X = np.c_[np.ones(X.shape[0]), X]
n_samples, n_features = X.shape
n_classes = len(np.unique(y))

data = load_digits()
cols = ["bias"] + list(data.feature_names)
df = pd.DataFrame(X, columns=cols)
df["target"] = y

print(n_samples, n_features, n_classes)

1797 65 10


In [50]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [51]:
def softmax(z):
    z-= np.max(z, axis=1, keepdims=True)
    exp_z = np.exp(z)
    return exp_z / np.sum(exp_z, axis = 1, keepdims=True)

def one_hot(y, num_classes):
    out = np.zeros((y.shape[0], num_classes))
    out[np.arange(y.shape[0]), y] = 1
    return out

In [52]:
def train_softmax(X, y, theta, lr = 0.01, epochs = 100, batch_size = 32):
    y_onehot = one_hot(y, n_classes)
    n = X.shape[0]

    for _ in range(epochs):
        idx = np.random.permutation(n)
        X, y_onehot = X[idx], y_onehot[idx]

        for i in range(0, n, batch_size):
            X_batch = X[i: i+batch_size]
            y_batch = y_onehot[i: i+batch_size]

            logits= X_batch @ theta
            probs = softmax(logits)
            grad = X_batch.T @ (probs-y_batch)/ X_batch.shape[0]

            theta -= lr*grad
    return theta



In [64]:
theta = np.zeros((n_features, n_classes))
theta = train_softmax(X_train, y_train, theta)

probs = softmax(X_test @ theta)
y_pred = np.argmax(probs, axis=1)

In [65]:
from sklearn.metrics import accuracy_score

print(accuracy_score(y_test, y_pred))

0.9722222222222222
