In [None]:
# multinomial Logistic regression for multiple features
import numpy as np
import pandas as pd

df = pd.read_csv("datasets/breast-cancer-wisconsin.csv")

x = X = df.drop(columns=["id", "diagnosis", "Unnamed: 32"], errors="ignore").to_numpy(
    dtype=float
)
y_true = df["diagnosis"].map({"M": 1, "B": 0}).to_numpy()
y = np.eye(len(np.unique(y_true)))[y_true]

def train_test_split_numpy(X, y, test_size=0.2, seed=42):
    np.random.seed(seed)

    indices = np.arange(len(X))
    np.random.shuffle(indices)

    test_count = int(len(X) * test_size)

    test_idx = indices[:test_count]
    train_idx = indices[test_count:]

    return X[train_idx], X[test_idx], y[train_idx], y[test_idx]



def softmax(x, axis=None):
    x = np.asarray(x)
    if axis is None:
        axis = x.ndim - 1
    x_max = np.max(x, axis=axis, keepdims=True)
    e_x = np.exp(x - x_max)
    return e_x / np.sum(e_x, axis=axis, keepdims=True)

learning_rate = 0.001


def train_model(x, y, learning_rate, epochs=5000):
    x = (x - np.mean(x, axis=0)) / np.std(x, axis=0)
    unique_classes = y.shape[1]
    n_dataset_col = x.shape[1]
    w_vector = np.zeros((unique_classes, n_dataset_col))
    bias = np.zeros((unique_classes, 1))

    for _ in range(epochs):
        z = x @ w_vector.T + bias.T

        probabilities = softmax(z)

        loss = -np.mean(np.sum(y * np.log(probabilities + 1e-9), axis=1))

        if loss < 1e-6:
            print("stopping the learning")
            break

        p_y = probabilities - y
        wieght_gradient = p_y.T @ x
        bias_gradient = np.sum(p_y, axis=0, keepdims=True).T

        w_vector = w_vector - (learning_rate * wieght_gradient)
        bias = bias - (learning_rate * bias_gradient)
    return w_vector, bias



def predict(x, trained_weights, trained_bias):
    x = (x - np.mean(x, axis=0)) / np.std(x, axis=0)
    z = x @ trained_weights.T + trained_bias.T
    probabilities = softmax(z)
    predictions = np.argmax(probabilities, axis=1)
    return predictions


X_train, X_test, y_train, y_test = train_test_split_numpy(x, y)

weights, bias = train_model(X_train, y_train, learning_rate=0.01)
predictions = predict(X_test, weights, bias)

y_test = np.argmax(y_test, axis=1) 
test_acc = np.mean(y_test == predictions)
print(f"Test Accuracy: {test_acc * 100:.2f}%")






Test Accuracy: 92.92%


Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,...,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890,
1,842517,M,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,...,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902,
2,84300903,M,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,...,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,...,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300,
4,84358402,M,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,...,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,926424,M,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,...,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115,
565,926682,M,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,...,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637,
566,926954,M,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,...,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820,
567,927241,M,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,...,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400,


In [326]:
# Ordinal Logistic regression for multiple features
import numpy as np

X_dataset = np.array([[1, 5], [3, 3], [4, 1]])

Y_dataset = np.array([[1], [2], [3]])


def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def train_model(X, Y, epochs=500000):
    num_classes = len(np.unique(Y))
    num_samples, num_features = X.shape
    y_hot_encoded = np.eye(num_classes)[Y.flatten() - 1]
    num_thresholds = y_hot_encoded.shape[1] - 1
    w_vector = np.zeros((num_features))
    bias = 0
    learning_rate = 0.01
    thresholds = np.cumsum(
        np.exp(np.zeros(num_thresholds))
    )  # Ensures thresholds are increasing
    prev_loss = float('inf')
    tolerance = 1e-6  # minimum change in loss

    for i in range(epochs):
        # Compute Latent Score (z)
        z = X @ w_vector + bias

        # Broadcoast thresholds (2,) against z (3,)
        cum_probs = sigmoid(thresholds.reshape(1, -1) - z.reshape(-1, 1))

        # Add a column of 1.0 for P(Y <= max_category)
        cum_probs = np.hstack((cum_probs, np.ones((num_samples, 1))))

        # Individual Probabilities: P(Y=j) = P(Y<=j) - P(Y<=j-1)
        probs = np.diff(cum_probs, prepend=0, axis=1)

        # compute loss
        loss = -np.sum(y_hot_encoded * np.log(probs + 1e-15))

        if abs(prev_loss - loss) < tolerance:
            print(
                f"Stopping at iteration {i + 1}, loss change < {tolerance} : {abs(prev_loss - loss)}"
            )
            break
        prev_loss = loss

        #  1{Y<=J} indicator function (1 if true, 0 if False); J is threshold
        indicator = (Y <= np.arange(1, num_classes)).astype(int)
        # ( P(Y<=J) - 1{Y<=J} )
        error = np.delete(cum_probs, -1, axis=1) - indicator
        # bias gradient sum of ( P(Y<=J) - 1{Y<=J} )
        dz = -np.sum(error, axis=1)

        # compute weight gradient sum of ( P(Y<=J) - 1{Y<=J} ) * X
        dw = X.T @ dz
        db = np.sum(dz)
        d_thresholds = np.sum(error, axis=0)

        # # # update the weight , bias & thresholds
        w_vector = w_vector - learning_rate * dw
        bias = bias - learning_rate * db
        thresholds = np.sort(thresholds - learning_rate * d_thresholds)
    return w_vector, bias, thresholds


def predict(new_datatset, w, b, thresholds):
    # Compute Latent Score (z)
    z = new_datatset @ w + b

    # Broadcoast thresholds (2,) against z (3,)
    cum_probs = sigmoid(thresholds.reshape(1, -1) - z.reshape(-1, 1))

    # Add a column of 1.0 for P(Y <= max_category)
    cum_probs = np.hstack((cum_probs, np.ones((new_datatset.shape[0], 1))))

    # Individual Probabilities: P(Y=j) = P(Y<=j) - P(Y<=j-1)
    probs = np.diff(cum_probs, prepend=0, axis=1)
    predictions = np.argmax(probs, axis=1) + 1
    return f"Predictions: {predictions}"

w_vector, bias, thresholds = train_model(X_dataset, Y_dataset)
X_test = np.array([[2, 4]])
predict(X_dataset, w_vector, bias, thresholds)


Stopping at iteration 17062, loss change < 1e-06 : 9.999003555104247e-07


'Predictions: [1 2 3]'