In [106]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### Here we will see how to update the values of w and b , so that we can minimize the loss function

In [107]:
## Back with the same dataset
## X_train has features -> study hours, sleep hours
## y_train -> pass

In [108]:
X_train = np.array([[1, 8], [2, 7.5], [3, 7], [4, 6.5], [5, 6], [6, 6], [7, 5.5], [8, 5], [9, 5], [10, 4.5]])
y_train = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1])

In [109]:
w = np.array([1, 1])
b = -3

In [110]:
def sigmoid_function(X_train, w, b):

    m = X_train.shape[0] ## Number of training sets

    z = np.dot(X_train, w) + b
    y_pred = 1 / (1 + np.exp(-z))
    return y_pred

In [111]:
def compute_loss_function(X_train, y_train, w, b):

    m = X_train.shape[0] ## Number of training sets
    n = X_train.shape[1] ## Number of features
    y_pred = sigmoid_function(X_train, w, b)
    total_loss = 0

    for i in range(m):
        total_loss += (-y_train[i] * np.log(y_pred[i])) - ((1 - y_train[i])*(np.log(1 - y_pred[i])))

    return total_loss    

In [112]:
compute_loss_function(X_train, y_train, w, b)

np.float64(27.00604828132077)

$$\begin{align*}
\frac{\partial J(\mathbf{w},b)}{\partial w_j}  &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)})x_{j}^{(i)} \tag{2} \\
\frac{\partial J(\mathbf{w},b)}{\partial b}  &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)}) \tag{3} 
\end{align*}$$

In [113]:
def compute_derivative(X_train, y_train, w, b):

    m = X_train.shape[0] ## Number of training sets
    n = X_train.shape[1] ## Number of features

    y_pred = sigmoid_function(X_train, w, b)

    d_dw = np.zeros((n,))
    d_db = 0

    for i in range(m):
        for j in range(n):
            d_dw[j] += (y_pred[i] - y_train[i]) * X_train[i, j]
        d_db += (y_pred[i] - y_train[i]) 

    d_dw /= m
    d_db /= m
    return d_dw, d_db

In [114]:
compute_derivative(X_train, y_train, w, b)

(array([0.99860249, 2.89554695]), np.float64(0.399395654453981))

### Now we will calculate the gradient descent

In [115]:
def gradient_descent(X_train, y_train, w, b, alpha, iteration):

    m = X_train.shape[0] ## Number of training sets
    n = X_train.shape[1] ## Number of features

    total_loss = compute_loss_function(X_train, y_train, w, b)

    d_dw, d_db = compute_derivative(X_train, y_train, w, b)

    w = w - alpha * d_dw
    b = b- alpha * d_db

    return iteration, total_loss, w, b

In [116]:
gradient_descent(X_train, y_train, w, b, 0.01, 0)

(0,
 np.float64(27.00604828132077),
 array([0.99001398, 0.97104453]),
 np.float64(-3.00399395654454))

In [117]:
## Initializing the parameters
w = np.zeros((2,))
b = 0
iterations = 4000
alpha = 0.05

In [118]:
iteration_num = []
loss = []

for i in range(iterations):
    iteration, total_loss, w, b = gradient_descent(X_train, y_train, w, b, alpha, i)
    iteration_num.append(iteration)
    loss.append(total_loss)

In [119]:
df = pd.DataFrame(iteration_num, columns = ['iteration'])
df['loss'] = loss

In [120]:
df

Unnamed: 0,iteration,loss
0,0,6.931472
1,1,5.802363
2,2,5.280148
3,3,4.855563
4,4,4.501442
...,...,...
3995,3995,0.138945
3996,3996,0.138916
3997,3997,0.138887
3998,3998,0.138859


In [121]:
## We see from above that after each iteration, loss is decreasing, which means that our parameters are converging to the local minima