# Given

An overfitting example for linear and logistics regression each

# Find

Implement regularization of a cost function

# Solution

In [2]:
import numpy as np

## Define regularized cost function

Regression cost: $$J(\mathbf{w},b) = \frac{1}{2m} \sum\limits^m (f_{\mathbf{w},b}(\mathbf{x}_i) - y_i)^2  + \frac{\lambda}{2m}  \sum^n w_j^2$$ 

In [14]:
def ComputeCost_RegressionRegularized(x, y, w, b, lambda_ = 1):

    m = x.shape[0]

    loss = np.dot(x, w) + b - y
    cost_norm = np.sum(loss**2) / 2 / m
    cost_reg = lambda_ * np.sum(w**2) / 2 / m

    total_cost = cost_norm + cost_reg

    return loss, total_cost

Test against course book:

In [15]:
np.random.seed(1)
X_tmp = np.random.rand(5,6)
y_tmp = np.array([0,1,0,1,0])
w_tmp = np.random.rand(X_tmp.shape[1]).reshape(-1,)-0.5
b_tmp = 0.5
lambda_tmp = 0.7
_, cost_tmp = ComputeCost_RegressionRegularized(X_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp)

print("Regularized cost:", cost_tmp)

Regularized cost: 0.07917239320214275


Classification cost:

$$Loss_i(w,b) = -[y_i * log(gf_{\mathbf{w},b}(x_i))+(1-y_i) * log(1-gf_{\mathbf{w},b}(x_i))] + \frac{\lambda}{2m}  \sum^n w_j^2$$

In [16]:
sigmoid = lambda x, w, b : 1 / (1 + np.exp(-1 * (np.dot(x, w) + b) ) )

def ComputeCost_ClassificationRegularized(x, y, w, b, lambda_ = 1):

    m = x.shape[0]

    upper_half = - np.log(sigmoid(x, w, b))
    lower_half = - np.log(1 - sigmoid(x, w, b))

    cost_norm = np.dot(y, upper_half) + np.dot((1 - y),lower_half)
    cost_norm = np.sum(cost_norm) / m

    cost_reg = lambda_ * np.sum(w**2) / 2 / m
    total_cost = cost_norm + cost_reg

    loss = sigmoid(x, w, b) - y

    return loss, total_cost

Test against course book:

In [17]:
np.random.seed(1)
X_tmp = np.random.rand(5,6)
y_tmp = np.array([0,1,0,1,0])
w_tmp = np.random.rand(X_tmp.shape[1]).reshape(-1,)-0.5
b_tmp = 0.5
lambda_tmp = 0.7
_, cost_tmp = ComputeCost_ClassificationRegularized(X_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp)

print("Regularized cost:", cost_tmp)

Regularized cost: 0.6850849138741671


## Calculate gradient vectors

$$\begin{align*}
\frac{\partial J(\mathbf{w},b)}{\partial w_j}  &= \frac{1}{m} \sum\limits^m (f_{\mathbf{w},b}(\mathbf{x}_i) - y_i)x^j_i  +  \frac{\lambda}{m} w_j  \\
\frac{\partial J(\mathbf{w},b)}{\partial b}  &= \frac{1}{m} \sum\limits^m (f_{\mathbf{w},b}(\mathbf{x}_i) - y_i)
\end{align*}$$

In [18]:
def CalculateGradient(x, y, w, b, lambda_ = 1, method = "regression"):
    m = x.shape[0]  # Number of training instances

    # Calculate the loss and cost using the CalculateLoss function
    if method == "regression":
        loss, cost = ComputeCost_RegressionRegularized(x, y, w, b)
    else:
        loss, cost = ComputeCost_ClassificationRegularized(x, y, w, b)

    # Calculate the gradients of the parameter values by performing a dot product of the loss and input features
    dJ_dw = np.dot(loss, x) / m + lambda_ * w / m

    # Calculate the gradient of the bias value by summing the loss values
    dJ_db = np.sum(loss) / m

    return dJ_dw, dJ_db, cost

Test against course book:

In [20]:
np.random.seed(1)
X_tmp = np.random.rand(5,3)
y_tmp = np.array([0,1,0,1,0])
w_tmp = np.random.rand(X_tmp.shape[1])
b_tmp = 0.5
lambda_tmp = 0.7
dj_db_tmp, dj_dw_tmp, cost =  CalculateGradient(x = X_tmp, y = y_tmp, w = w_tmp, b = b_tmp, lambda_ = lambda_tmp, method = "regression")

print(f"dj_db: {dj_db_tmp}", )
print(f"Regularized dj_dw:\n {dj_dw_tmp.tolist()}", )

dj_db: [0.29653215 0.49116796 0.21645878]
Regularized dj_dw:
 0.6648774569425726


In [21]:
np.random.seed(1)
X_tmp = np.random.rand(5,3)
y_tmp = np.array([0,1,0,1,0])
w_tmp = np.random.rand(X_tmp.shape[1])
b_tmp = 0.5
lambda_tmp = 0.7
dj_db_tmp, dj_dw_tmp, cost =  CalculateGradient(x = X_tmp, y = y_tmp, w = w_tmp, b = b_tmp, lambda_ = lambda_tmp, method = "classification")

print(f"dj_db: {dj_db_tmp}", )
print(f"Regularized dj_dw:\n {dj_dw_tmp.tolist()}", )

dj_db: [0.17380013 0.32007508 0.10776313]
Regularized dj_dw:
 0.341798994972791
