## Logistic Gradient Descent



$$\begin{align*}
&\text{repeat until convergence:} \; \lbrace \\
&  \; \; \;w_j = w_j -  \alpha \frac{\partial J(\mathbf{w},b)}{\partial w_j} \tag{1}  \; & \text{for j := 0..n-1} \\ 
&  \; \; \;  \; \;b = b -  \alpha \frac{\partial J(\mathbf{w},b)}{\partial b} \\
&\rbrace
\end{align*}$$

Where each iteration performs simultaneous updates on $w_j$ for all $j$, where
$$\begin{align*}
\frac{\partial J(\mathbf{w},b)}{\partial w_j}  &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)})x_{j}^{(i)} \tag{2} \\
\frac{\partial J(\mathbf{w},b)}{\partial b}  &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)}) \tag{3} 
\end{align*}$$

* m is the number of training examples in the data set      
* $f_{\mathbf{w},b}(x^{(i)})$ is the model's prediction, while $y^{(i)}$ is the target
* For a logistic regression model  
    $z = \mathbf{w} \cdot \mathbf{x} + b$  
    $f_{\mathbf{w},b}(x) = g(z)$  
    where $g(z)$ is the sigmoid function:  
    $g(z) = \frac{1}{1+e^{-z}}$   
    


In [39]:
import numpy as np
import math
import copy

In [2]:
def sigmoid(z):
    '''
    Compute the sigmoid of z
    Args:
        z(ndarray): A scaler, numpy of any size
    Returns:
        g(ndarray): sigmoid(z), with the same shape of z
    '''

    g=1/(1+ np.exp(-z))

    return g

In [48]:
def compute_gradient_logistic(X, y, w, b):
    m,n=X.shape
    dj_dw=np.zeros((n,))
    dj_db=0
    for i in range(m):
        z_i=np.dot(X[i],w)+b
        z=sigmoid(z_i)
        error=z-y[i]
        print(error)
        for j in range(n):
            dj_dw[j]+=error*X[i][j]
        dj_db+=error
    
    return dj_dw/m,dj_db/m


In [49]:
def gradient_descent(X,y,w,b,lr,epochs=100):
    J_history = []
    w = copy.deepcopy(w)  #avoid modifying global w within function
    b = b
    for i in range(epochs):
        dj_db, dj_dw = compute_gradient_logistic(X, y, w, b)
        
        # Update Parameters using w, b, alpha and gradient
        w = w - lr * dj_dw               
        b = b - lr * dj_db 
        # Save cost J at each iteration
        if i<100000:      # prevent resource exhaustion 
            J_history.append( compute_gradient_logistic(X, y, w, b) )
        if i% math.ceil(epochs / 10) == 0:
            print(f"Iteration {i:4d}: Cost {J_history[-1]}   ")
    return w, b, J_history   

In [50]:
X_tmp = np.array([[0.5, 1.5], [1,1], [1.5, 0.5], [3, 0.5], [2, 2], [1, 2.5]])
y_tmp = np.array([0, 0, 0, 1, 1, 1])
w_tmp = np.array([2.,3.])
b_tmp = 1.
dj_db_tmp, dj_dw_tmp = compute_gradient_logistic(X_tmp, y_tmp, w_tmp, b_tmp)
print(f"dj_db: {dj_db_tmp}" )
print(f"dj_dw: {dj_dw_tmp.tolist()}" )

X_train = np.array([[0.5, 1.5], [1,1], [1.5, 0.5], [3, 0.5], [2, 2], [1, 2.5]])
y_train = np.array([0, 0, 0, 1, 1, 1])
w_tmp  = np.zeros_like(X_train[0])
b_tmp  = 0.
alph = 0.1
iters = 10000

w_out, b_out, _ = gradient_descent(X_train, y_train, w_tmp, b_tmp, alph, iters) 
print(f"\nupdated parameters: w:{w_out}, b:{b_out}")

0.998498817743263
0.9975273768433653
0.995929862284104
-0.00020342697805519894
-1.6701421847953313e-05
-2.7535691114688454e-05
dj_db: [0.49833339 0.49883943]
dj_dw: 0.49861806546328574
0.5
0.5
0.5
-0.5
-0.5
-0.5
[0.50624967 0.50416657]


ValueError: setting an array element with a sequence.