In [187]:
import numpy as np
import copy
import math

In [188]:
X_train = np.array([[0.5, 1.5], [1,1], [1.5, 0.5], [3, 0.5], [2, 2], [1, 2.5]])  
y_train = np.array([0, 0, 0, 1, 1, 1])                                           
w_tmp = np.array([1.,1.])
b_tmp = -3

## Defining the function

In [189]:
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

In [190]:
def f_wb(w, b, x):
    if x.ndim == 1:
        return sigmoid(np.dot(w, x) + b)
    elif x.ndim > 1:
        return (sigmoid(np.sum(w * x, axis = 1) + b)).reshape(-1, 1)
    
    return w * x + b

In [191]:
f_wb(w_tmp, b_tmp, X_train)

array([[0.26894142],
       [0.26894142],
       [0.26894142],
       [0.62245933],
       [0.73105858],
       [0.62245933]])

## Loss function (BinaryCrossentropy)

Note: We have to reshape our inputs to ensure that concatenation happens as intended

In [192]:
def BinaryCrossentropy(y_pred, y_true):
    m = len(y_true)

    # Reshaping
    y_pred = y_pred.reshape(-1, 1)
    y_true = y_true.reshape(-1, 1)

    return np.sum(-y_true * np.log(y_pred) - (1. - y_true) * np.log(1. - y_pred)) / m

Checking the implementation of `BinaryCrossentropy` function

In [193]:
BinaryCrossentropy(f_wb(w_tmp, b_tmp, X_train), y_train)

0.36686678640551745

## Gradient for logistic regression

In [194]:
def Gradient(w_in = None, b_in = None, function = f_wb, x = None, y = None):
    w = w_in
    b = b_in
    m = len(y)
    f = function
    y = y.reshape(-1, 1)

    df_dw = np.sum((f(w, b, x) - y) * x, axis = 0) / m
    df_db = np.sum(f(w, b, x) - y) / m

    return df_dw, df_db

Checking the implementation of `Gradient` function

In [195]:
X_tmp = np.array([[0.5, 1.5], [1,1], [1.5, 0.5], [3, 0.5], [2, 2], [1, 2.5]])
y_tmp = np.array([0, 0, 0, 1, 1, 1])
w_tmp = np.array([2.,3.])
b_tmp = 1.
df_dw, df_db = Gradient(w_tmp, b_tmp, f_wb, X_tmp, y_tmp)
print(df_dw, df_db)

[0.49833339 0.49883943] 0.49861806546328574


## Gradient descent for logistic regression

In [196]:
def GradentDescentLogistic(w_in = None, b_in = None, function = f_wb, learning_rate = 0.001, x = None, y = None, iterations = 1000, verbose = 0):
    w = copy.deepcopy(w_in)
    b = b_in
    f = function
    a = learning_rate
    loss = []

    for i in range(iterations):
        df_dw, df_dw = Gradient(w, b, f, x, y)
        w = w - a * df_dw
        b = b - a * df_db
        loss.append(BinaryCrossentropy(f(w, b, x), y))
        
        if verbose == 1:
            if i % math.floor(iterations / 10) == 0:
                print(f"Iteration {i}    Loss: " + str(BinaryCrossentropy(f(w, b, x), y)))

    return w, b, loss

In [197]:
w_tmp = np.zeros_like(X_train[-1])
b_tmp = 0.
a = 0.1
iterations = 10000

w_trained, b_trained, loss = GradentDescentLogistic(w_tmp, b_tmp, f_wb, a, X_train, y_train, iterations, verbose = 1)

Iteration 0    Loss: 0.6934579233405945
Iteration 1000    Loss: 0.18572250379741753
Iteration 2000    Loss: 0.18584824191206326
Iteration 3000    Loss: 0.18584834379336146
Iteration 4000    Loss: 0.18584834387548918
Iteration 5000    Loss: 0.18584834387558494
Iteration 6000    Loss: nan
Iteration 7000    Loss: nan
Iteration 8000    Loss: nan
Iteration 9000    Loss: nan


  return np.sum(-y_true * np.log(y_pred) - (1. - y_true) * np.log(1. - y_pred)) / m
  return np.sum(-y_true * np.log(y_pred) - (1. - y_true) * np.log(1. - y_pred)) / m
