### Regularization
    Imagine we have a model whose parameters are very big

In [2]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib widget
np.set_printoptions(precision=2)

In [57]:
def compute_cost_linear_reg(x, y, w, b, lambda_ = 1):
    """
    Computes the cost over all examples
    Args:
      X (ndarray (m,n): Data, m examples with n features
      y (ndarray (m,)): target values
      w (ndarray (n,)): model parameters  
      b (scalar)      : model parameter
      lambda_ (scalar): Controls amount of regularization
    Returns:
      total_cost (scalar):  cost 
    """
    m = x.shape[0]
    f_wb = np.dot(x, w) + b
    cost = 1 / (2*m) * np.sum(np.square(f_wb - y)) 
    regularised_cost = (lambda_ / (2*m)) * np.sum(np.square(w))
    total_cost = cost + regularised_cost
    return total_cost

#     m  = x.shape[0]
#     n  = len(w)
#     cost = 0.
#     for i in range(m):
#         f_wb_i = np.dot(x[i], w) + b                                   #(n,)(n,)=scalar, see np.dot
#         cost = cost + (f_wb_i - y[i])**2                               #scalar             
#     cost = cost / (2 * m)                                              #scalar  
 
#     reg_cost = 0
#     for j in range(n):
#         reg_cost += (w[j]**2)                                          #scalar
#     reg_cost = (lambda_/(2*m)) * reg_cost                              #scalar
    
#     total_cost = cost + reg_cost                                       #scalar
#     return total_cost      

np.random.seed(1)
X_tmp = np.random.rand(5,6)
y_tmp = np.array([0,1,0,1,0])
w_tmp = np.random.rand(X_tmp.shape[1]).reshape(-1,)-0.5
b_tmp = 0.5
lambda_tmp = 0.7
cost_tmp = compute_cost_linear_reg(X_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp)

print("Regularized cost:", cost_tmp)

Regularized cost: 0.07917239320214275


In [52]:
def sigmoid(z):
    g = 1 / (1 + np.exp(-z))
    return g

def compute_logistic_cost(x, y, w, b, mylambda):
    """Computes the cost over all examples
    Args:
    Args:
      X (ndarray (m,n): Data, m examples with n features
      y (ndarray (m,)): target values
      w (ndarray (n,)): model parameters  
      b (scalar)      : model parameter
      lambda_ (scalar): Controls amount of regularization
    Returns:
      total_cost (scalar):  cost 
    """
    m = x.shape[0]
    z = x @ w + b
    f_wb = sigmoid(z)
    cost = np.dot(-y.T, np.log(f_wb)) - np.dot((1- y).T, np.log(1-f_wb))
    cost = cost / m
    regularized_cost = np.sum(np.square(w))
    regularized_cost = mylambda / (2 * m) * regularized_cost
    total_cost = cost + regularized_cost
    return total_cost
#     m, n = x.shape
#     cost = 0.0
#     for i in range(m):
#         z_i = np.dot(x[i], w) + b
#         f_wb_i = sigmoid(z_i)
#         cost += (-y[i] * np.log(f_wb_i)) - ((1 - y[i]) * np.log(1 - f_wb_i))
#     cost = cost / m
#     regularized_cost = 0.0
#     for j in range(n):
#         regularized_cost += w[j] ** 2
#     regularized_cost = mylambda / (2*m) * regularized_cost
#     total_cost = cost + regularized_cost
#     return total_cost
        
#data
np.random.seed(1)
X_tmp = np.random.rand(5 ,6)
y_tmp = np.array([0, 1, 0, 1, 0])
w_tmp = np.random.rand(X_tmp.shape[1]).reshape(-1,) - 0.5
b_tmp = 0.5
lambda_tmp = 0.7
cost_tmp = compute_logistic_cost(X_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp)
        
print("Regularized cost:", cost_tmp)
                 


Regularized cost: 0.6850849138741673


In [74]:
#Gradient descent for linear regression with regularization
def calculate_gradients_linear_reg(x, y, w, b, lambda_):
    m, n = x.shape
    dj_dw = np.zeros((n,))
    dj_db = 0.0
    for i in range(m):
        f_wb_i = np.dot(x[i], w) + b
        err_i = f_wb_i - y[i]
        for j in range(n):
            dj_dw[j] += err_i * x[i, j]
        dj_db += err_i
    dj_db = dj_db / m
    dj_dw = dj_dw / m
    regularised_dj_dw = dj_dw + (lambda_ / m) * w
    return dj_db, regularised_dj_dw

np.random.seed(1)
X_tmp = np.random.rand(5, 3)
y_tmp = np.array([0, 1, 0, 1, 0])
w_tmp = np.random.rand(X_tmp.shape[1]) #(3,)
b_tmp = 0.5
lambda_tmp = 0.7
dj_db_tmp, dj_dw_tmp =  calculate_gradients_linear_reg(X_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp)

print(f"dj_db: {dj_db_tmp}", )
print(f"Regularized dj_dw:\n {dj_dw_tmp.tolist()}", )

dj_db: 0.6648774569425726
Regularized dj_dw:
 [0.29653214748822276, 0.4911679625918033, 0.21645877535865857]


In [75]:
#Gradient descent for logistic regression with regularization
def sigmoid(z):
    g = 1 / (1 + np.exp(-z))
    return g

def calculate_gradients_logistic_reg(x, y, w, b, lambda_):
    m, n = x.shape
    dj_dw = np.zeros((n,))
    dj_db = 0.0
    for i in range(m):
        f_wb_i = sigmoid(np.dot(x[i], w) + b)
        err_i = f_wb_i - y[i]
        for j in range(n):
            dj_dw[j] += err_i * x[i, j]
        dj_db += err_i
    dj_db = dj_db / m
    dj_dw = dj_dw / m
    regularised_dj_dw = dj_dw + (lambda_ / m) * w
    return dj_db, regularised_dj_dw


np.random.seed(1)
X_tmp = np.random.rand(5, 3)
y_tmp = np.array([0, 1, 0, 1, 0])
w_tmp = np.random.rand(X_tmp.shape[1]) #(3,)
b_tmp = 0.5
lambda_tmp = 0.7
dj_db_tmp, dj_dw_tmp =  calculate_gradients_logistic_reg(X_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp)

print(f"dj_db: {dj_db_tmp}", )
print(f"Regularized dj_dw:\n {dj_dw_tmp.tolist()}", )

dj_db: 0.341798994972791
Regularized dj_dw:
 [0.17380012933994293, 0.32007507881566943, 0.10776313396851499]
