In [23]:
import numpy as np

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [2]:
data = load_iris(as_frame=True)
df = data.frame
df = df[df.iloc[:, -1] != 2]

X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

In [10]:
print(f"Example of some features {X.shape}\n{X[:8]}")
print(X.shape, "\n")
print(f"Labels {y.shape}\n {y}")

Example of some features (100, 4)
[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]
 [5.4 3.9 1.7 0.4]
 [4.6 3.4 1.4 0.3]
 [5.  3.4 1.5 0.2]]
(100, 4) 

Labels (100,)
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=666)

print(f"Size of bundle:\nTrain: X -> {X_train.shape} y -> {y_train.shape}\nTest:  X -> {X_test.shape} y -> {y_test.shape}")

Size of bundle:
Train: X -> (80, 4) y -> (80,)
Test:  X -> (20, 4) y -> (20,)


In [72]:
NUM_FEATURES  = X_train.shape[1]
WEIGHT_VECTOR_SIZE = num_features
WEIGHT_VECTOR = np.zeros(WEIGHT_VECTOR_SIZE)
EPOCHS = 10
LEARNING_RATE = 0.05
bias = 0

In [73]:
# Sigmoid activation function
def sigmoid_activation(z):
    return (1 / (1 + np.exp(-z)))    

# Linear calculation:
# Algo -> z = X . W + b
def linear_calc(X, W, b):
    return sigmoid_activation(np.dot(X, W) + b)

# Classify based on given probability
def probability_norm(prob):
    if (prob >= .5):
        return 1
    else:
        return 0

# Binary cross entropy loss function
def loss(y, y_hat):
    binary_cross_entropy = -np.mean((y * np.log(y_hat)) + (1 - y) * (np.log(1 - y_hat)))
    return binary_cross_entropy
    
# Compute gradient using:
#     dLoss/dW = (1/m) X^T (y_hat - y)
#     dLoss/db = (1/m) SUM (y_hat - y)
def compute_gradient(X, y, y_hat, delta, NUM_SAMPLES):
    dW = (1 / NUM_SAMPLES) * np.dot(X.T, delta)
    db = (1 / NUM_SAMPLES) * np.sum(delta)
    return dW, db

# Update parameters using:
#     W = W - η*(dLoss/dW)
#     b = b - η*(dLoss/db)
# where η is the learning rate
def grad_descent(learning_rate, W, dW, b, db):
    W -= learning_rate * dW
    b -= learning_rate * db
    return W, b

In [123]:
# Train function
# Forward pass -> run linear calc + sigmoid
# Compute gradients w.r.t. W and b
# Update W and b
# Calculate loss using binary-cross-entropy
def train(X, y, W, b, num_samples, learning_rate, epochs):
    for epoch in range(epochs):
        y_hat = linear_calc(X, W, b)
        delta = y_hat - y
        dW, db = compute_gradient(X, y, y_hat, delta, num_samples)
        W, b = W, b
        W, b = grad_descent(learning_rate, W, dW, b, db)
        bce = loss(y, y_hat)
        print(f"Epoch {epoch}, Loss {bce}")
    return W, b

# Test function:
# Foward pass -> run linear calc again
# Predict     -> Sigmoid function + >=0.5 is 1 otherwise 0
# Accuracy    -> where y_hat == y
def test(X, y, W, b):
    y_hat = linear_calc(X, W, b)
    print(f"Raw predictions:\n{y_hat}\n")
    prediction = np.zeros(len(y_hat))
    
    for i in range(len(y_hat)):
        prediction[i] = probability_norm(y_hat[i])
    
    print(f"Real labels:\n{y}\n")
    print(f"Predicted labels:\n{prediction.astype(int)}\n")
    
    accuracy = np.mean(prediction == y) * 100
    print(f"Accuracy: {accuracy}")

In [124]:
num_samples = X_train.shape[0]
W, b = train(X_train, y_train, WEIGHT_VECTOR, bias, num_samples, LEARNING_RATE, EPOCHS)

Epoch 0, Loss 0.08006641523600662
Epoch 1, Loss 0.07963348992674682
Epoch 2, Loss 0.07920537276462819
Epoch 3, Loss 0.07878195912194548
Epoch 4, Loss 0.07836315476257882
Epoch 5, Loss 0.07794887307432223
Epoch 6, Loss 0.0775390331565949
Epoch 7, Loss 0.07713355849350853
Epoch 8, Loss 0.07673237602830621
Epoch 9, Loss 0.07633541551360337


In [125]:
test(X_test, y_test, W, b)

Raw predictions:
[0.96079601 0.95692111 0.06379348 0.05566304 0.10665098 0.08205874
 0.06585298 0.98500068 0.95899577 0.96249864 0.05462559 0.03644135
 0.10459533 0.0585129  0.9576279  0.93585425 0.86331649 0.13373765
 0.0651067  0.10110097]

Real labels:
[1 1 0 0 0 0 0 1 1 1 0 0 0 0 1 1 1 0 0 0]

Predicted labels:
[1 1 0 0 0 0 0 1 1 1 0 0 0 0 1 1 1 0 0 0]

Accuracy: 100.0
