In [1]:
import numpy as np

# Compute Gradient for Linear Regression with Regularization

The equation for the cost function regularized linear regression is:
$$J(\mathbf{w},b) = \frac{1}{2m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)})^2  + \frac{\lambda}{2m}  \sum_{j=0}^{n-1} w_j^2 \tag{1}$$ 
where:
$$ f_{\mathbf{w},b}(\mathbf{x}^{(i)}) = \mathbf{w} \cdot \mathbf{x}^{(i)} + b  \tag{2} $$ 

Compare this to the cost function without regularization (which you implemented in  a previous lab), which is of the form:

$$J(\mathbf{w},b) = \frac{1}{2m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)})^2 $$ 

The difference is the regularization term,  <span style="color:red">
    $\frac{\lambda}{2m}  \sum_{j=0}^{n-1} w_j^2$ </span> 

In [22]:
def CostFunction_Linear(x,y,w,b,lambda_):

    m = x.shape[0]

    loss = np.dot(x,w) + b - y
    loss2 = np.sum(loss**2) / 2 / m
    regularization = lambda_ * np.sum(w**2) / 2 / m

    cost = loss2 + regularization

    return loss, cost

# Compute Gradient for Logistic Regression with Regularization

For regularized logistic regression, the cost function is:
$$J(\mathbf{w},b) = \frac{1}{m}  \sum_{i=0}^{m-1} \left[ -y^{(i)} \log\left(f_{\mathbf{w},b}\left( \mathbf{x}^{(i)} \right) \right) - \left( 1 - y^{(i)}\right) \log \left( 1 - f_{\mathbf{w},b}\left( \mathbf{x}^{(i)} \right) \right) \right] + \frac{\lambda}{2m}  \sum_{j=0}^{n-1} w_j^2 \tag{3}$$
where:
$$ f_{\mathbf{w},b}(\mathbf{x}^{(i)}) = sigmoid(\mathbf{w} \cdot \mathbf{x}^{(i)} + b)  \tag{4} $$ 

Compare this to the cost function without regularization (which you implemented in  a previous lab):

$$ J(\mathbf{w},b) = \frac{1}{m}\sum_{i=0}^{m-1} \left[ (-y^{(i)} \log\left(f_{\mathbf{w},b}\left( \mathbf{x}^{(i)} \right) \right) - \left( 1 - y^{(i)}\right) \log \left( 1 - f_{\mathbf{w},b}\left( \mathbf{x}^{(i)} \right) \right)\right] $$
The difference is the regularization term,  <span style="color:red">
    $\frac{\lambda}{2m}  \sum_{j=0}^{n-1} w_j^2$ </span> 

In [39]:
def CostFunction_Logistics(x,y,w,b,lambda_):

    def gfx(x,w,b):
        return 1 / (1+np.exp(-(np.dot(x,w)+b)))

    m = x.shape[0]

    loss = gfx(x,w,b) - y

    cost_first_log = np.log(gfx(x,w,b))
    
    cost_second_log = np.log(1 - gfx(x,w,b))

    cost_regularization = lambda_ * np.sum(w**2) / 2

    cost_sum = -1 * (np.dot(y,cost_first_log) + np.dot((1-y),cost_second_log)) + cost_regularization

    cost = cost_sum / m

    return loss, cost

# Compute Gradient (same for both regressions)

The gradient calculation for both linear and logistic regression are nearly identical, differing only in computation of $f_{\mathbf{w}b}$.
$$\begin{align*}
\frac{\partial J(\mathbf{w},b)}{\partial w_j}  &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)})x_{j}^{(i)}  +  \frac{\lambda}{m} w_j \tag{2} \\
\frac{\partial J(\mathbf{w},b)}{\partial b}  &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)}) \tag{3} 
\end{align*}$$

In [41]:
def ComputeGradient(x,y,w,b,lambda_,loss="linear"):

    m = x.shape[0]

    if loss == "linear":
        loss_function = CostFunction_Linear
    else:
        loss_function = CostFunction_Logistics

    loss, _ = loss_function(x,y,w,b,lambda_)

    dJ_dw = np.dot(loss,x) / m + lambda_ * w / m
    dJ_db = np.sum(loss) / m

    return dJ_dw, dJ_db

In [42]:
np.random.seed(1)
X_tmp = np.random.rand(5,3)
y_tmp = np.array([0,1,0,1,0])
w_tmp = np.random.rand(X_tmp.shape[1])
b_tmp = 0.5
lambda_tmp = 0.7
dj_db_tmp, dj_dw_tmp =  ComputeGradient(X_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp, "")

print(f"dj_db: {dj_db_tmp}", )
print(f"Regularized dj_dw:\n {dj_dw_tmp.tolist()}", )

dj_db: [0.17380013 0.32007508 0.10776313]
Regularized dj_dw:
 0.341798994972791
