In [20]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [21]:
data = pd.read_csv('data_banknote_authentication.csv')
data = np.array(data)
# print(data[0:5])

In [22]:
X, y = data[:,0:-1], data[:,-1].reshape(-1,1)
X_train, X_dev, y_train, y_dev = train_test_split(X, y, test_size=0.25, random_state=23)
X_train, X_dev, y_train, y_dev  = X_train.T, X_dev.T, y_train.T, y_dev.T
print(f'Input Shape: {X_train.shape}\nOutput Shape: {y_train.shape}')

Input Shape: (4, 1028)
Output Shape: (1, 1028)


In [23]:
# Number of features
n = X_train.shape[0]
# Number of examples
m = X_train.shape[1]
print(f"Number of features: {n}\nNumber of examples: {m}")

Number of features: 4
Number of examples: 1028


In [24]:
# Sigmoid helper function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [25]:
# Initialize training parameters W and b
def initialize_params():
    W = np.random.randn(1,n)
    b = 0
    return W, b

In [26]:
# Forward pass
def forward_prop(X, W, b):
    z = W @ X + b
    y_hat = sigmoid(z)
    return y_hat

In [27]:
# Compute the cost (negative log likelihood)
def compute_cost(y_hat, y):
    epsilon = 1e-8
    J = np.sum(- (y * np.log(y_hat + epsilon)) - ((1 - y) * np.log(1 - y_hat + epsilon)))
    return J

In [28]:
# Backprop pass
def backprop(y, y_hat, X):
    dJ__dy_hat = (-y / y_hat) + (1 - y)/(1 - y_hat)
    dy_hat__dz = y_hat * (1 - y_hat)
    dz__dw = X
    dz__db = 1

    dJ__dw = (dJ__dy_hat * dy_hat__dz) @ dz__dw.T / m
    dJ__db = np.sum(dJ__dy_hat * dy_hat__dz) * dz__db / m

    return dJ__dw, dJ__db

In [29]:
# Update parameters
def update_params(W, b, dJdW, dJdb, alpha):
    W = W - alpha * dJdW
    b = b - alpha * dJdb
    return W, b

In [30]:
# Train the logistic regression classifier from pieces above
def logistic_regression(X, y, num_iterations=1000, print_stage=100, alpha=0.01):

    W, b = initialize_params()

    for i in range(num_iterations + 1):

        y_hat = forward_prop(X, W, b)
        J = compute_cost(y_hat, y)
        if i % print_stage == 0:
            print(f'Iter {i} cost: {J}')
        dJdW, dJdb = backprop(y, y_hat, X)
        W, b = update_params(W, b, dJdW, dJdb, alpha)

    return W, b

In [39]:
W, b = logistic_regression(X_train, y_train, num_iterations=10000, alpha=0.001)

Iter 0 cost: 862.4545904654178
Iter 100 cost: 516.6193471955128
Iter 200 cost: 371.80030839714067
Iter 300 cost: 317.6968003771568
Iter 400 cost: 295.63771592293574
Iter 500 cost: 283.20688281264256
Iter 600 cost: 273.9446893030025
Iter 700 cost: 265.9874289370192
Iter 800 cost: 258.7372425448183
Iter 900 cost: 251.97449927460195
Iter 1000 cost: 245.6038815793248
Iter 1100 cost: 239.57485365916244
Iter 1200 cost: 233.854708925655
Iter 1300 cost: 228.4187938821
Iter 1400 cost: 223.24671399891577
Iter 1500 cost: 218.32073607918022
Iter 1600 cost: 213.6250383685114
Iter 1700 cost: 209.14530701552502
Iter 1800 cost: 204.8684851271897
Iter 1900 cost: 200.7825963226932
Iter 2000 cost: 196.87660955425198
Iter 2100 cost: 193.14032994616468
Iter 2200 cost: 189.56430790972553
Iter 2300 cost: 186.13976210516353
Iter 2400 cost: 182.85851340320826
Iter 2500 cost: 179.71292783338663
Iter 2600 cost: 176.6958669984606
Iter 2700 cost: 173.80064475660984
Iter 2800 cost: 171.02098920182505
Iter 2900 cost

In [40]:
def compute_accuracy(y, y_preds):
    total = y_preds.shape[1]
    correct = np.sum(abs(y_preds - y) < 0.5)
    return correct / total

In [41]:
y_dev_preds = forward_prop(X_dev, W, b)

accuracy = compute_accuracy(y_dev, y_dev_preds)
print(f'Dev set accuracy: {accuracy}')

Dev set accuracy: 0.9795918367346939
