In [11]:
import numpy as np

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [12]:
data = load_iris(as_frame=True)
df = data.frame
df = df[df.iloc[:, -1] != 2]

X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

In [13]:
print(f"Example of some features {X.shape}\n{X[:8]}")
print(X.shape, "\n")
print(f"Labels {y.shape}\n {y}")

Example of some features (100, 4)
[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]
 [5.4 3.9 1.7 0.4]
 [4.6 3.4 1.4 0.3]
 [5.  3.4 1.5 0.2]]
(100, 4) 

Labels (100,)
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=666)

print(f"Size of bundle:\nTrain: X -> {X_train.shape} y -> {y_train.shape}\nTest:  X -> {X_test.shape} y -> {y_test.shape}")

Size of bundle:
Train: X -> (80, 4) y -> (80,)
Test:  X -> (20, 4) y -> (20,)


In [15]:
NUM_FEATURES  = X_train.shape[1]
WEIGHT_VECTOR_SIZE = NUM_FEATURES
WEIGHT_VECTOR = np.zeros(WEIGHT_VECTOR_SIZE)
EPOCHS = 10
LEARNING_RATE = 0.05
bias = 0

In [16]:
# Sigmoid activation function
def sigmoid_activation(z):
    return (1 / (1 + np.exp(-z)))    

# Linear calculation:
# Algo -> z = X . W + b
def linear_calc(X, W, b):
    return sigmoid_activation(np.dot(X, W) + b)

# Classify based on given probability
def probability_norm(prob):
    if (prob >= .5):
        return 1
    else:
        return 0

# Binary cross entropy loss function
def loss(y, y_hat):
    binary_cross_entropy = -np.mean((y * np.log(y_hat)) + (1 - y) * (np.log(1 - y_hat)))
    return binary_cross_entropy
    
# Compute gradient using:
#     dLoss/dW = (1/m) X^T (y_hat - y)
#     dLoss/db = (1/m) SUM (y_hat - y)
def compute_gradient(X, y, y_hat, delta, NUM_SAMPLES):
    dW = (1 / NUM_SAMPLES) * np.dot(X.T, delta)
    db = (1 / NUM_SAMPLES) * np.sum(delta)
    return dW, db

# Update parameters using:
#     W = W - η*(dLoss/dW)
#     b = b - η*(dLoss/db)
# where η is the learning rate
def grad_descent(learning_rate, W, dW, b, db):
    W -= learning_rate * dW
    b -= learning_rate * db
    return W, b

In [29]:
# Train function
# Forward pass -> run linear calc + sigmoid
# Compute gradients w.r.t. W and b
# Update W and b
# Calculate loss using binary-cross-entropy
def train(X, y, W, b, num_samples, learning_rate, epochs):
    for epoch in range(epochs):
        y_hat = linear_calc(X, W, b)
        delta = y_hat - y
        dW, db = compute_gradient(X, y, y_hat, delta, num_samples)
        W, b = W, b
        W, b = grad_descent(learning_rate, W, dW, b, db)
        bce = loss(y, y_hat)
        if (epoch % 2 == 0):
            print(f"Epoch {epoch}, Loss {bce}")
    return W, b

# Test function:
# Foward pass -> run linear calc again
# Predict     -> Sigmoid function + >=0.5 is 1 otherwise 0
# Accuracy    -> where y_hat == y
def test(X, y, W, b):
    y_hat = linear_calc(X, W, b)
    print(f"Raw predictions:\n{y_hat}\n")
    prediction = np.zeros(len(y_hat))
    
    for i in range(len(y_hat)):
        prediction[i] = probability_norm(y_hat[i])
    
    print(f"Real labels:\n{y}\n")
    print(f"Predicted labels:\n{prediction.astype(int)}\n")
    
    accuracy = np.mean(prediction == y) * 100
    print(f"Accuracy: {accuracy}")

In [30]:
num_samples = X_train.shape[0]
W, b = train(X_train, y_train, WEIGHT_VECTOR, bias, num_samples, LEARNING_RATE, EPOCHS)

Epoch 0, Loss 0.016302014996397152
Epoch 2, Loss 0.016269718988608267
Epoch 4, Loss 0.016237558997578422
Epoch 6, Loss 0.01620553386978019
Epoch 8, Loss 0.016173642501807004


In [31]:
test(X_test, y_test, W, b)

Raw predictions:
[0.9950419  0.99423452 0.00963671 0.00753428 0.02456861 0.01525324
 0.01017588 0.99905875 0.99458876 0.99545853 0.00723438 0.00354505
 0.02366478 0.00825707 0.99430807 0.98839256 0.95354915 0.03728897
 0.00984678 0.02222751]

Real labels:
[1 1 0 0 0 0 0 1 1 1 0 0 0 0 1 1 1 0 0 0]

Predicted labels:
[1 1 0 0 0 0 0 1 1 1 0 0 0 0 1 1 1 0 0 0]

Accuracy: 100.0


In [32]:
# Obvious that its overfitting. Should probably use a different dataset.
learning_rates = [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05]
epochs = [5, 10, 15, 20, 25, 30, 35]
for _, i in enumerate(learning_rates):
    for _, j in enumerate(epochs):
        print(f"Learning rate: {i}, Epochs: {j}\n----------------------------------")
        W, b = train(X_train, y_train, WEIGHT_VECTOR, bias, num_samples, i, j)
        test(X_test, y_test, W, b)
        print(f"\n\n")

Learning rate: 0.0001, Epochs: 5
----------------------------------
Epoch 0, Loss 0.016143320938796453
Epoch 2, Loss 0.01614325756292061
Epoch 4, Loss 0.016143194187574126
Raw predictions:
[0.99504616 0.99423946 0.00964471 0.00754053 0.02458874 0.01526584
 0.01018431 0.99905956 0.99459341 0.99546244 0.00724039 0.003548
 0.02368419 0.00826393 0.99431296 0.98840244 0.95358711 0.03731917
 0.00985495 0.02224576]

Real labels:
[1 1 0 0 0 0 0 1 1 1 0 0 0 0 1 1 1 0 0 0]

Predicted labels:
[1 1 0 0 0 0 0 1 1 1 0 0 0 0 1 1 1 0 0 0]

Accuracy: 100.0



Learning rate: 0.0001, Epochs: 10
----------------------------------
Epoch 0, Loss 0.016143163937430233
Epoch 2, Loss 0.016143100562852678
Epoch 4, Loss 0.016143037188804457
Epoch 6, Loss 0.016142973815285573
Epoch 8, Loss 0.016142910442295988
Raw predictions:
[0.99504628 0.99423961 0.00964445 0.00754033 0.02458822 0.01526548
 0.01018405 0.9990596  0.99459354 0.99546256 0.00724018 0.00354789
 0.02368368 0.0082637  0.9943131  0.98840269 0.95358773 