In [22]:
import numpy as np

ONE SAMPLE

In [23]:
X = np.array([
    [1.0, 0.5],
    [2.0, 1.0],
    [3.0, 1.5],
    [2.0, 2.0],
    [1.0, 2.5],
    [2.0, 3.0],
    [1.0, 3.5],
    [2.0, 4.0],
])
Y = np.array([0, 0, 1, 0, 0, 1, 1, 1])

In [24]:
X.shape, Y.shape

((8, 2), (8,))

In [25]:
# Creating bias with [1, X]]
n_samples = X.shape[0]
X = np.hstack([np.ones((n_samples, 1)), X])
X

array([[1. , 1. , 0.5],
       [1. , 2. , 1. ],
       [1. , 3. , 1.5],
       [1. , 2. , 2. ],
       [1. , 1. , 2.5],
       [1. , 2. , 3. ],
       [1. , 1. , 3.5],
       [1. , 2. , 4. ]])

In [26]:
X.T

array([[1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. ],
       [1. , 2. , 3. , 2. , 1. , 2. , 1. , 2. ],
       [0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. ]])

In [27]:
n_features = X.shape[1]
theta = np.random.rand(n_features)
theta.shape

(3,)

In [28]:
# Compute logistic function
def logistic_fn(x):
    return 1 / (1 + np.exp(-x))

# prediction -> y_hat shape = (8,)
def predict(x, theta):
    z = np.dot(x, theta)
    y_hat = logistic_fn(z)
    return z, y_hat

# Compute loss function
def compute_loss(y, y_hat):
    loss = (-1 * y) * np.log(y_hat) - (1 - y) * np.log(1 - y_hat)
    return loss

# Compute gradient
def compute_gradient(x, y, y_hat):
    # gradient = np.dot(x.T, (y_hat - y))
    gradient = np.dot((y_hat - y).T, x) 
    return gradient

# Update weight
def update_weight(theta, gradient, LEARNING_RATE=0.1):
    new_theta = theta - (gradient * LEARNING_RATE)
    return new_theta 

In [31]:
# Parameter
n_epochs = 10
LEARNING_RATE = 0.001

losses = []
for epoch in range(n_epochs):
    epoch_loss = []
    for i in range(n_samples):
        # Get a sample
        x = X[i, :]
        y = Y[i]

        # prediction
        z, y_hat = predict(x, theta)

        # Compute loss
        loss = compute_loss(y, y_hat)
        epoch_loss.append(loss)

        # Compute gradient
        gradient = compute_gradient(x, y, y_hat)

        # update weight
        theta = update_weight(theta, gradient, LEARNING_RATE)
    losses.append(sum(epoch_loss) / len(epoch_loss))


theta, losses

(array([0.19055788, 0.41884781, 0.58275507]),
 [np.float64(1.036593246541216),
  np.float64(1.0296399526589533),
  np.float64(1.022752422594704),
  np.float64(1.0159312552422604),
  np.float64(1.0091770343958446),
  np.float64(1.00249032794435),
  np.float64(0.9958716870689467),
  np.float64(0.9893216454459773),
  np.float64(0.9828407184571166),
  np.float64(0.9764294024088148)])

In [32]:
X_test = np.array([1.0, 2.0, 0.25])
predict(X_test, theta)

(np.float64(1.1739422613615733), np.float64(0.763856858621262))

In [33]:
X_test = np.array([1.0, 1.0, 4.5])
predict(X_test, theta)

(np.float64(3.23180349857381), np.float64(0.9620137137129092))