In [1]:
import numpy as np
import matplotlib.pyplot as plt

# Scaling the features of the training sets

> ### Note
> Scaling the features helps optimize the Gradient descent algorithm by making the features more consistent
> and shortens the iterations requried.
> We have used the Z-Score normalization method here

In [2]:
def fscale(x):
    mu = np.mean(x, axis=0) # axis 0 will make the resultant array have shape n,
    sigma = np.std(x, axis=0)
    x_scaled = (x - mu)/sigma
    return x_scaled
    

# Compute the sigmoid function

In [3]:
def sigmoid(w,x,b):
    z = np.dot(w,x)+b
    g_z = 1/(1+np.exp(-z))
    return g_z

# Classic Logistic Regression

**Compute the gradient of the Logistic function using Gradient Descent**

In [4]:
def gradient(w,b,x,y):
    m,n = x.shape
    dw = np.zeros((n,))
    db = 0.
    for i in range(m):
        gz = sigmoid(w,x[i],b)
        error = gz-y[i]
        for j in range(n):
            dw[j] += error*x[i][j]
        db += error
    return dw/m,db/m

## Classic Logistic Loss function

In [5]:
def logistic_cost(w,x,y,b):
    m,n = x.shape
    j_wb = 0
    for i in range(m):
        gz = sigmoid(w,x[i],b)
        cost = -y[i]*np.log(gz)-((1-y[i])*(np.log(1-gz)))
        j_wb += cost
    j_wb = j_wb/m
    return j_wb

## Calculate Logistic Regression

In [6]:
def logistic_regression(itr,x,y):
    alpha = 1.0e-2
    m,n = x.shape
    w = np.zeros((n,))
    b = 0.
    dw = np.zeros((n,))
    db = 0.
    for i in range(itr):
        dw,db = gradient(w,b,x,y)
        w = w - alpha*dw
        b = b - alpha*db
        if int(i % 100) == 0:
            print(f"w: {w}, b: {b}")
            cost = logistic_cost(w,x,y,b)
            print("cost: ",cost)
    return w,b

In [7]:
#Scaling the features first.
np.random.seed(1)
X_tmp = np.random.rand(5,6)
y_tmp = np.array([0,1,0,1,0])
x_scaled = fscale(X_tmp)
w,b = logistic_regression(1000,x_scaled,y_tmp)
w,b

w: [-0.0029545  -0.00471772  0.00442478  0.00315503  0.00249769  0.00162223], b: -0.001
cost:  0.6861386003513423
w: [-0.18774614 -0.34886977  0.31892753  0.19488185  0.1588771   0.12397961], b: -0.08920357867993883
cost:  0.33623936808471955
w: [-0.26881559 -0.56219425  0.50390217  0.27116815  0.22467647  0.19997175], b: -0.15801306362222645
cost:  0.22773889451779214
w: [-0.31489877 -0.72033197  0.63616358  0.30928749  0.25901874  0.25413939], b: -0.2130019082357987
cost:  0.17421421832255485
w: [-0.34557102 -0.84747527  0.73985797  0.33105225  0.27990855  0.29608184], b: -0.25798258919566847
cost:  0.14156747979749057
w: [-0.36814177 -0.95428165  0.82534733  0.34452418  0.29416167  0.33027148], b: -0.29549040462712994
cost:  0.11934367220906358
w: [-0.38591378 -1.0465353   0.89810977  0.35330662  0.30478878  0.35911908], b: -0.32728188190340557
cost:  0.1031721826179262
w: [-0.40058398 -1.12779367  0.96143667  0.35922572  0.31326793  0.38406542], b: -0.3546115910574494
cost:  0.0908

(array([-0.43375711, -1.32547096,  1.11280111,  0.3680351 ,  0.33207678,
         0.44324147]),
 -0.41772745403816447)

#### The Cost gradually decreases and something along the following result will be displayed

```
w: [-0.0029545  -0.00471772  0.00442478  0.00315503  0.00249769  0.00162223], b: -0.001
cost:  0.6861386003513423
w: [-0.18774614 -0.34886977  0.31892753  0.19488185  0.1588771   0.12397961], b: -0.08920357867993883
cost:  0.33623936808471955
w: [-0.26881559 -0.56219425  0.50390217  0.27116815  0.22467647  0.19997175], b: -0.15801306362222645
cost:  0.22773889451779214
w: [-0.31489877 -0.72033197  0.63616358  0.30928749  0.25901874  0.25413939], b: -0.2130019082357987
cost:  0.17421421832255485
w: [-0.34557102 -0.84747527  0.73985797  0.33105225  0.27990855  0.29608184], b: -0.25798258919566847
cost:  0.14156747979749057
w: [-0.36814177 -0.95428165  0.82534733  0.34452418  0.29416167  0.33027148], b: -0.29549040462712994
cost:  0.11934367220906358
w: [-0.38591378 -1.0465353   0.89810977  0.35330662  0.30478878  0.35911908], b: -0.32728188190340557
cost:  0.1031721826179262
w: [-0.40058398 -1.12779367  0.96143667  0.35922572  0.31326793  0.38406542], b: -0.3546115910574494
cost:  0.09085858503471074
w: [-0.41311016 -1.2004268   1.01747732  0.36329354  0.32038451  0.40603843], b: -0.37839663564338377
cost:  0.08116490911666821
w: [-0.42407391 -1.26610306  1.06771898  0.36611023  0.32658319  0.42567024], b: -0.3993208129471111
cost:  0.07333473604670826
(array([-0.43375711, -1.32547096,  1.11280111,  0.3680351 ,  0.33207678,
         0.44324147]),
 -0.41772745403816447)
```

# Logistic Regression (Regularized)

> The **sigmoid function** remains unchanged
> 
> The changes are made to the **Gradient descent algorithm** and the **logistic cost function**


In [16]:
def reg_gradient(x,y,w,b,lamd):
    m,n = x.shape
    dw = np.zeros((n,))
    db = 0.
    for i in range(m):
        error = sigmoid(w,x[i],b) - y[i]
        for j in range(n):
            dw[j] = dw[j]+error*x[i][j]
        db+= error
    dw = dw/m
    db = db/m
    dw = dw + (lamd/m)*w
    return dw,db

In [64]:
def reg_logistic_cost(w,b,x,y,lamd):
    m,n = x.shape
    cost = 0.
    for i in range(m):
        # (-i/m)sum(y(log(f_wb(x)) + (1-y)(log(f_wb(x))) +lambda/2m(sum(wj^2)))
        gz = sigmoid(w,x[i],b)
        cost+= -y[i]*np.log(gz) - (1-y[i])*(np.log(1-gz))
        # print(f"cost calculation: -y[i]*np.log(gz) - (1-y)*(np.log(1-gz))= {y[i]}*{np.log(gz)}+{(1-y)}*{np.log(1-gz)}")
    cost = cost/2*m
    su_wj = np.sum(w**2)
    reg_exp = (lamd/2*m)*su_wj
    cost = cost + reg_exp
    return cost

In [95]:
def reg_logistic_gradient_desc(x,y,itr):
    m,n = x.shape
    dw = np.zeros((n,))
    w = np.zeros((n,))
    b = 0.
    db = 0.
    lamd = 1.3
    alpha = 1e-3
    for i in range(itr):
        # print(f"function to be called: reg_gradient({x},{y},{w},{b},{lamd})")
        dw,db = reg_gradient(x,y,w,b,lamd)
        w = w - alpha*dw
        b = b - alpha*db
        if i%100 == 0:
            cost = reg_logistic_cost(w,b,x,y,lamd)
            print(f"cost : {cost}")
            print(f"w: {w}, b: {b}")
    return w,b

In [96]:
#Scaling the features first.
np.random.seed(1)
X_tmp = np.random.rand(5,6)
y_tmp = np.array([0,1,0,1,0])
# print(f"x: {X_tmp}, \ny: {y_tmp}")
x_scaled = fscale(X_tmp)
print(f"x: {x_scaled}, \ny: {y_tmp}")
w,b = reg_logistic_gradient_desc(x_scaled,y_tmp,1000)

x: [[ 0.19097825  0.39716945 -0.8603242  -0.6362233  -1.25338677 -1.84606008]
 [-0.65458148 -0.92024644  0.44266862  0.11063051  1.0767403   0.39215012]
 [-0.58792203  0.95186269 -0.77073255  0.52640416  1.06057788 -0.08551748]
 [-0.8226708  -1.43861337  1.76972053  1.46688419  0.17210233  0.4189654 ]
 [ 1.87419605  1.00982766 -0.58133239 -1.46769556 -1.05603375  1.12046205]], 
y: [0 1 0 1 0]
cost : 8.655547186827633
w: [-0.00029545 -0.00047177  0.00044248  0.0003155   0.00024977  0.00016222], b: -0.0001
cost : 7.878464644583944
w: [-0.02795541 -0.04535207  0.04242379  0.0297786   0.02365176  0.0156793 ], b: -0.009974719081439489
cost : 7.273264143635377
w: [-0.05219722 -0.08602457  0.08025696  0.05545615  0.04419361  0.02988888], b: -0.01960489342398975
cost : 6.801554424744762
w: [-0.07346543 -0.12297215  0.11442546  0.0778414   0.06223788  0.04291684], b: -0.028995551311076177
cost : 6.433292270239045
w: [-0.09216002 -0.1566321   0.14536813  0.09737992  0.07811002  0.0548786 ], b: -