In [58]:
import numpy as np
import copy

### cost function for regularized linear regression

In [29]:
def compute_cost_linr_reg(x,y,w,b,lambda_):
    m = x.shape[0]
    n = len(w)
    cost = 0.
    for i in range(m):
        y_hat = np.dot(w,x[i]) + b
        sq_err = (y_hat - y[i]) ** 2
        cost += sq_err
    
    cost = 1 / (2*m)

    reg_cost = 0
    for j in range(n):
        reg_cost += (lambda_ / 2*m) * (w[j]**2)
        
    total_cost = cost + reg_cost
    return total_cost
    

In [34]:
np.random.seed(1)
X_tmp = np.random.rand(5,6)
y_tmp = np.array([0,1,0,1,0])
w_tmp = np.random.rand(X_tmp.shape[1]).reshape(-1,)-0.5
b_tmp = 0.5
lambda_tmp = 0.7
cost_tmp = compute_cost_linr_reg(X_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp)

print("Regularized cost:", cost_tmp)

Regularized cost: 0.8860358448847132


In [31]:
compute_cost_linr_reg(X_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp)

0.8860358448847132

### cost function for regularized logistic regressoin

In [61]:
def compute_cost_logst_reg(x,y,w,b, lambda_):
    m = x.shape[0]
    n = len(w)
    cost = 0.
    
    for i in range(m):
        z = np.dot(w,x[i]) + b
        f_wb_i = 1 / (1 + np.exp(-z))
        cost += (-y[i]*np.log(f_wb_i)) - (1 - y[i])* np.log(1 - f_wb_i)
        
    cost = cost / m
    
    reg_cost = 0
    for j in range(n):
        reg_cost += (lambda_ / (2*m)) * (w[j]**2)
        
    total_cost = cost + reg_cost
    return total_cost

In [62]:
np.random.seed(1)
X_tmp = np.random.rand(5,6)
y_tmp = np.array([0,1,0,1,0])
w_tmp = np.random.rand(X_tmp.shape[1]).reshape(-1,)-0.5
b_tmp = 0.5
lambda_tmp = 0.7
compute_cost_logst_reg(X_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp)

0.6850849138741673

### gradient function for regularized linear regression

In [78]:
def compute_gradient_lin_reg(x,y,w,b,lambda_):
    m,n = x.shape

    dj_dw = np.zeros((n,)) 
    dj_db = 0.
    
    for i in range(m):
        y_hat = np.dot(w,x[i]) + b
        
        for j in range(n):
            err = y_hat - y[i]
            dj_dw[j] += err * x[i,j]
            
        dj_db += y_hat - y[i]
    
    dj_dw = dj_dw / m
    dj_db = dj_db / m
    
    # w is vector of length j (number of features) so we will add regularized term with their respective j
    
    for j in range(n):
        reg_term = (lambda_ / m) * w[j]
        dj_dw[j] += reg_term
        
    # we don't have to regularized b
    return dj_dw, dj_db

In [83]:
np.random.seed(1)
X_tmp = np.random.rand(5,3)
y_tmp = np.array([0,1,0,1,0])
w_tmp = np.random.rand(X_tmp.shape[1])
b_tmp = 0.5
lambda_tmp = 0.7
dj_dw_tmp, dj_db_tmp = compute_gradient_lin_reg(X_tmp,y_tmp,w_tmp,b_tmp,lambda_tmp)

### gradient function for regularized logistic regression

computing gradient for linear and logistic regression is nearly identical we just have to chage predicted y 

In [106]:
def compute_gradient_logst_reg(x,y,w,b,lambda_):
    m,n = x.shape
    
    dj_dw = np.zeros((n,))
    dj_db = 0.
    
    for i in range(m):
        z = np.dot(w,x[i]) + b
        g_z = 1 / (1 + (np.exp(-z)))
        
        for j in range(n):
            err = (g_z - y[i]) * x[i,j]
            dj_dw[j] += err
        
        dj_db += g_z - y[i]
        
    dj_dw = dj_dw / m
    dj_db = dj_db / m
    
    # w is vector of length j (number of features) so we will add regularized term with their respective j
    
    reg_cost = 0
    for j in range(n):
        reg_term = (lambda_ / m) * w[j]
        dj_dw[j] += reg_term
    
    return dj_dw, dj_db
    

In [107]:
np.random.seed(1)
X_tmp = np.random.rand(5,3)
y_tmp = np.array([0,1,0,1,0])
w_tmp = np.random.rand(X_tmp.shape[1])
b_tmp = 0.5
lambda_tmp = 0.7
dj_dw_tmp, dj_db_tmp =  compute_gradient_logst_reg(X_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp)
dj_dw_tmp

array([0.17380013, 0.32007508, 0.10776313])

### The basic algorithm for running gradient descent does not change with regularization

In [None]:
repeat until convergence:
    - {
        𝑤𝑗 = 𝑤𝑗 − 𝛼 ∂𝐽(𝐰,𝑏)
       𝑤𝑗𝑏 = 𝑏− 𝛼∂𝐽(𝐰,𝑏)∂𝑏
    }
    for j := 0..n-1(1)