In [68]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math, copy

In [69]:
def compute_cost_linear_reg (X, y, w, b, lambda_ = 1) :
    m, n = X.shape
    cost = 0
    for i in range(m) :
        f_wb_i = np.dot(X[i], w) + b
        cost += (f_wb_i - y[i]) ** 2
    cost /= (2*m)
    reg = 0
    for j in range(n) :
        reg += w[j] ** 2
    reg *= (lambda_ / (2*m))
    return cost + reg

In [70]:
# test
np.random.seed(1)
X_tmp = np.random.rand(5,6)
y_tmp = np.array([0,1,0,1,0])
w_tmp = np.random.rand(X_tmp.shape[1]).reshape(-1,)-0.5
b_tmp = 0.5
lambda_tmp = 0.7
cost_tmp = compute_cost_linear_reg(X_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp)

print("Regularized cost:", cost_tmp)

Regularized cost: 0.07917239320214277


In [71]:
def compute_cost_logistic_reg (X, y, w, b, lambda_ = 1) :
    m, n = X.shape
    cost = 0
    for i in range(m) :
        z_i = np.dot(X[i], w) + b
        f_i = 1 / (1 + np.exp(-z_i))
        cost += -y[i] * np.log(f_i) - (1 - y[i]) * np.log(1 - f_i)
    cost /= m
    reg = 0
    for j in range(n) :
        reg += w[j] ** 2
    reg *= (lambda_ / (2*m))
    return cost + reg

In [72]:
# test
np.random.seed(1)
X_tmp = np.random.rand(5,6)
y_tmp = np.array([0,1,0,1,0])
w_tmp = np.random.rand(X_tmp.shape[1]).reshape(-1,)-0.5
b_tmp = 0.5
lambda_tmp = 0.7
cost_tmp = compute_cost_logistic_reg(X_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp)

print("Regularized cost:", cost_tmp)

Regularized cost: 0.6850849138741673


In [73]:
def compute_gradient_linear_reg (X, y, w, b, lambda_) :
    m,n = X.shape        
    dj_dw = np.zeros((n,))
    dj_db = 0.

    for i in range(m):                             
        err = (np.dot(X[i], w) + b) - y[i]                 
        for j in range(n):                         
            dj_dw[j] += err * X[i, j]               
        dj_db += err                        
    dj_dw /= m                                
    dj_db /= m   
    
    for j in range(n):
        dj_dw[j] += (lambda_/m) * w[j]

    return dj_db, dj_dw

In [74]:
# test
np.random.seed(1)
X_tmp = np.random.rand(5,3)
y_tmp = np.array([0,1,0,1,0])
w_tmp = np.random.rand(X_tmp.shape[1])
b_tmp = 0.5
lambda_tmp = 0.7
dj_db_tmp, dj_dw_tmp =  compute_gradient_linear_reg(X_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp)

print(f"dj_db: {dj_db_tmp}", )
print(f"Regularized dj_dw:\n {dj_dw_tmp.tolist()}", )

dj_db: 0.6648774569425726
Regularized dj_dw:
 [0.29653214748822276, 0.4911679625918033, 0.21645877535865857]


In [75]:
def compute_gradient_logistic_reg (X, y, w, b, lambda_):
    m,n = X.shape
    dj_dw = np.zeros((n,))
    dj_db = 0.
    for i in range(m) :
        z_i = np.dot(X[i], w) + b
        f_i = 1 / (1 + np.exp(-z_i))
        error = f_i - y[i]
        for j in range(n) :
            dj_dw[j] += error * X[i, j]
        dj_db += error
    dj_dw /= m
    dj_db /= m
    for j in range(n) :
        dj_dw[j] += (lambda_/m) * w[j]
    return dj_db, dj_dw

In [76]:
# test
np.random.seed(1)
X_tmp = np.random.rand(5,3)
y_tmp = np.array([0,1,0,1,0])
w_tmp = np.random.rand(X_tmp.shape[1])
b_tmp = 0.5
lambda_tmp = 0.7
dj_db_tmp, dj_dw_tmp =  compute_gradient_logistic_reg(X_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp)

print(f"dj_db: {dj_db_tmp}", )
print(f"Regularized dj_dw:\n {dj_dw_tmp.tolist()}", )

dj_db: 0.341798994972791
Regularized dj_dw:
 [0.17380012933994293, 0.32007507881566943, 0.10776313396851499]


In [77]:
def gradient_descent(X, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters, lambda_) :
    J_history = []
    w = copy.deepcopy(w_in)
    b = b_in
    for i in range(num_iters) :
        dj_db, dj_dw = gradient_function(X, y, w, b, lambda_)
        w = w - alpha * dj_dw
        b = b - alpha * dj_db
        if i < 100000 :
            J_history.append(cost_function(X, y, w, b, lambda_))
        if i % math.ceil(num_iters / 10) == 0:
            print(f"Iteration {i:4d}: Cost {J_history[-1]:8.2f}")
    return w, b, J_history