### Sigmoid Function

In [1]:
# numpy has a special method exp() which calculates e^z
import numpy as np
def sigmoid(z):
    
    g = 1 / (1 + np.exp(-z))
    
    return g

In [6]:
# lets see how the function works
z_temp=np.arange(-5,5)

y=sigmoid(z_temp)

np.set_printoptions(precision=3)

output=np.c_[z_temp,y]
print('input(z)  output(y)')
print(output)

input(z)  output(y)
[[-5.     0.007]
 [-4.     0.018]
 [-3.     0.047]
 [-2.     0.119]
 [-1.     0.269]
 [ 0.     0.5  ]
 [ 1.     0.731]
 [ 2.     0.881]
 [ 3.     0.953]
 [ 4.     0.982]]


### Cost function for logistic regression

In [8]:
X_train = np.array([[0.5, 1.5], [1,1], [1.5, 0.5], [3, 0.5], [2, 2], [1, 2.5]])
y_train=np.array([0,0,0,1,1,1])

In [9]:
def compute_cost_logistic(X,y,w,b):
    m=X.shape[0]
    cost=0.0
    
    for i in range(m):
        z_i=np.dot(X[i],w)+b
        f_wb_i=sigmoid(z_i)
        
        cost+=-y[i]*np.log(f_wb_i)-(1-y[i])*np.log(1-f_wb_i)
    
    cost=cost/m
    return cost

In [10]:
w_temp=np.array([1,1])
b=-3
print(compute_cost_logistic(X_train,y_train,w_temp,b))

0.36686678640551745


### Gradient descent for logistic regression

In [11]:
def compute_gradient_logistic(X,y,w,b):
    m,n=X.shape
    dj_dw=np.zeros((n,))
    dj_db=0.0
    for i in range(m):
        z_i=np.dot(X[i],w)+b
        err=sigmoid(z_i)-y[i]
        for j in range(n):
            dj_dw[j]=dj_dw[j]+err*X[i,j]
        dj_db=dj_db+err
    dj_dw=dj_dw/m
    dj_db=dj_db/m
    return dj_dw,dj_db

In [14]:
X_tmp = np.array([[0.5, 1.5], [1,1], [1.5, 0.5], [3, 0.5], [2, 2], [1, 2.5]])
y_tmp = np.array([0, 0, 0, 1, 1, 1])
w_tmp = np.array([2.,3.])
b_tmp = 1.
dj_dw_tmp, dj_db_tmp = compute_gradient_logistic(X_tmp, y_tmp, w_tmp, b_tmp)
print(f"dj_db: {dj_db_tmp}" )
print(f"dj_dw: {dj_dw_tmp.tolist()}")

dj_db: 0.49861806546328574
dj_dw: [0.498333393278696, 0.49883942983996693]


In [15]:
import copy
def gradient_descent_logistic(X,y,w_in,b_in,alpha,num_iters):
    w=copy.deepcopy(w_in)
    b=b_in
    for i in range(num_iters):
        dj_dw,dj_db = compute_gradient_logistic(X, y, w, b)
        w = w - alpha * dj_dw
        b = b - alpha * dj_db
        
    return w,b

In [18]:
w_tmp  = np.zeros_like(X_train[0])
b_tmp  = 0.
alph = 0.1
iters = 10000

w_out, b_out = gradient_descent_logistic(X_train, y_train, w_tmp, b_tmp, alph, iters) 
print(f"\nupdated parameters: w:{w_out}, b:{b_out}")


updated parameters: w:[5.281 5.078], b:-14.222409982019837


array([-0.402, -0.079,  0.458,  0.033,  0.192, -0.184])