In [50]:
import numpy as np

| Size (sqft) | Number of Bedrooms  | Number of floors | Age of  Home | Price (1000s dollars)  |   
| ----------------| ------------------- |----------------- |--------------|-------------- |  
| 2104            | 5                   | 1                | 45           | 460           |  
| 1416            | 3                   | 2                | 40           | 232           |  
| 852             | 2                   | 1                | 35           | 178           |  

In [51]:
y_train=np.array([460,232,178])
x_train=np.array([
    [2104, 5, 1, 45],
    [1416, 3, 2, 40],
    [852, 2, 1, 35]
])
print(y_train.shape); print(x_tarin.shape)

(3,)
(3, 4)


<a name="toc_15456_5"></a>
# 5 Gradient Descent With Multiple Variables
Gradient descent for multiple variables:

$$\begin{align*} \text{repeat}&\text{ until convergence:} \; \lbrace \newline\;
& w_j = w_j -  \alpha \frac{\partial J(\mathbf{w},b)}{\partial w_j} \tag{5}  \; & \text{for j = 0..n-1}\newline
&b\ \ = b -  \alpha \frac{\partial J(\mathbf{w},b)}{\partial b}  \newline \rbrace
\end{align*}$$

where, n is the number of features, parameters $w_j$,  $b$, are updated simultaneously and where  

$$
\begin{align}
\frac{\partial J(\mathbf{w},b)}{\partial w_j}  &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)})x_{j}^{(i)} \tag{6}  \\
\frac{\partial J(\mathbf{w},b)}{\partial b}  &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)}) \tag{7}
\end{align}
$$
* m is the number of training examples in the data set

    
*  $f_{\mathbf{w},b}(\mathbf{x}^{(i)})$ is the model's prediction, while $y^{(i)}$ is the target value


In [52]:
def computed_gradient(x, y,w,b):
    m,n=x.shape
    dj_dw=np.zeros(n)
    dj_db=0
    
    for i in range(m):
        err=((np.dot(x[i],w))+b)-y[i]
        for j in range(n):
            dj_dw=dj_dw+err*x[i,j]
            dj_db=dj_db+err
    dj_db=dj_db/m
    dj_dw=dj_dw/m
    return(dj_dw,dj_db)

In [53]:
def gradient_decent(x,y,w_in,b_in,alpha,num_iterat):
    w=w_in
    b=b_in
    for i in range(num_iterat):
        dj_dw,dj_db=computed_gradient(x,y,w,b)
        
        w=w-alpha*dj_dw
        b=b-alpha*dj_db
    
    return w,b

In [54]:
# initialize parameters
initial_w = np.zeros(x_train.shape[1])
initial_b = 0.
# some gradient descent settings
iterations = 1000
alpha = 5.0e-7
# run gradient descent 
w_final, b_final= gradient_decent(x_train, y_train, initial_w, initial_b,alpha, iterations)

print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")

m,_ = x_train.shape

for i in range(m):
    print(f"prediction: {np.dot(x_train[i], w_final) + b_final:0.2f}, target value: {y_train[i]}")

b,w found by gradient descent: -0.01,[0.19664346 0.19664346 0.19664346 0.19664346] 
prediction: 423.76, target value: 460
prediction: 287.29, target value: 232
prediction: 175.00, target value: 178
