In [5]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

def load_iris_local(file_path="iris.csv", test_size=0.2, random_state=42):
    """
    Returns  X_train, X_dev, y_train, y_dev
    Shapes:   (n_features, m_samples)
    """
    df = pd.read_csv(file_path)
    y  = df["species"].factorize()[0]     # map species strings → 0,1,2
    X  = df.iloc[:, :4].values            # first 4 columns

    X = StandardScaler().fit_transform(X)

    X_train, X_dev, y_train, y_dev = train_test_split(
        X, y, test_size=test_size, random_state=random_state, stratify=y
    )
    return X_train.T, X_dev.T, y_train, y_dev

X_train, X_dev, y_train, y_dev = load_iris_local()
print("train shape:", X_train.shape, "| dev shape:", X_dev.shape)


train shape: (4, 120) | dev shape: (4, 30)


In [6]:

def relu(Z):
    return np.maximum(0, Z)

def relu_deriv(Z):
    return (Z > 0).astype(float)

def softmax(Z):
    exp_Z = np.exp(Z - Z.max(axis=0, keepdims=True))  # numerical-stability trick
    return exp_Z / exp_Z.sum(axis=0, keepdims=True)

def one_hot(y, num_classes=3):
    m  = y.size
    oh = np.zeros((num_classes, m))
    oh[y, np.arange(m)] = 1
    return oh

def accuracy(preds, y):
    return np.mean(preds == y) * 100


In [7]:
def init_params(n_x, n_h, n_y, seed=42):
    rng = np.random.default_rng(seed)
    W1 = rng.uniform(-0.5, 0.5, size=(n_h, n_x))
    b1 = np.zeros((n_h, 1))
    W2 = rng.uniform(-0.5, 0.5, size=(n_y, n_h))
    b2 = np.zeros((n_y, 1))
    return {"W1": W1, "b1": b1, "W2": W2, "b2": b2}


In [8]:
def forward_prop(params, X):
    W1, b1, W2, b2 = params["W1"], params["b1"], params["W2"], params["b2"]
    Z1 = W1 @ X + b1
    A1 = relu(Z1)
    Z2 = W2 @ A1 + b2
    A2 = softmax(Z2)
    cache = {"Z1": Z1, "A1": A1, "Z2": Z2, "A2": A2}
    return A2, cache

def backward_prop(params, cache, X, Y):
    m   = X.shape[1]
    W2  = params["W2"]
    A1, A2 = cache["A1"], cache["A2"]

    dZ2 = A2 - Y                       # (n_y, m)
    dW2 = (1/m) * dZ2 @ A1.T
    db2 = (1/m) * dZ2.sum(axis=1, keepdims=True)

    dZ1 = W2.T @ dZ2 * relu_deriv(cache["Z1"])
    dW1 = (1/m) * dZ1 @ X.T
    db1 = (1/m) * dZ1.sum(axis=1, keepdims=True)

    grads = {"dW1": dW1, "db1": db1, "dW2": dW2, "db2": db2}
    return grads


In [9]:
def update_params(params, grads, lr):
    params["W1"] -= lr * grads["dW1"]
    params["b1"] -= lr * grads["db1"]
    params["W2"] -= lr * grads["dW2"]
    params["b2"] -= lr * grads["db2"]
    return params

def train_nn(X_train, y_train, X_dev, y_dev,
             n_h=10, iterations=1000, lr=0.05, print_every=100):
    
    n_x, n_y = X_train.shape[0], len(np.unique(y_train))
    params   = init_params(n_x, n_h, n_y)
    Y_train  = one_hot(y_train, n_y)
    Y_dev    = one_hot(y_dev,   n_y)

    for i in range(1, iterations + 1):
        A2, cache = forward_prop(params, X_train)
        grads     = backward_prop(params, cache, X_train, Y_train)
        params    = update_params(params, grads, lr)

        if i % print_every == 0 or i == 1:
            train_pred = np.argmax(A2, axis=0)
            dev_pred   = np.argmax(forward_prop(params, X_dev)[0], axis=0)
            print(f"Iter {i:4d} | "
                  f"train acc: {accuracy(train_pred, y_train):5.2f}% | "
                  f"dev acc: {accuracy(dev_pred, y_dev):5.2f}%")

    return params


In [10]:
params = train_nn(X_train, y_train, X_dev, y_dev,
                  n_h=10, iterations=1000, lr=0.05)

# Test on one dev sample
idx        = 0
sample     = X_dev[:, idx:idx+1]               # keep 2-D
true_label = y_dev[idx]
pred_label = np.argmax(forward_prop(params, sample)[0])

print(f"\nSample {idx}: model ⇒ {pred_label}, true ⇒ {true_label}")


Iter    1 | train acc: 15.00% | dev acc: 16.67%
Iter  100 | train acc: 89.17% | dev acc: 80.00%
Iter  200 | train acc: 93.33% | dev acc: 90.00%
Iter  300 | train acc: 95.00% | dev acc: 90.00%
Iter  400 | train acc: 95.83% | dev acc: 96.67%
Iter  500 | train acc: 96.67% | dev acc: 96.67%
Iter  600 | train acc: 96.67% | dev acc: 96.67%
Iter  700 | train acc: 97.50% | dev acc: 96.67%
Iter  800 | train acc: 97.50% | dev acc: 96.67%
Iter  900 | train acc: 97.50% | dev acc: 96.67%
Iter 1000 | train acc: 97.50% | dev acc: 96.67%

Sample 0: model ⇒ 0, true ⇒ 0
