# Multiple Variable Linear Regression

In [2]:
import copy, math
import numpy as np
import matplotlib.pyplot as plt
np.set_printoptions(precision=2)  # reduced display precision on numpy arrays

In [10]:
x_train = np.array([[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]])
y_train = np.array([460, 232, 178])
print(x_train)
print(y_train)

[[2104    5    1   45]
 [1416    3    2   40]
 [ 852    2    1   35]]
[460 232 178]


In [11]:
b_initial = 785.18113678
w_initial = np.array([0.3913, 18.75377, -53.36, -26.42132])
print(f"w_initial shape: {w_initial.shape}, b_initial type: {type(b_initial)}")

w_initial shape: (4,), b_initial type: <class 'float'>


### Single Prediction element by element

In [13]:
def predict(x, w, b):
    n = x.shape[0]
    p = 0
    for i in range(n):
        p_i = x[i] * w[i]
        p = p_i + p
    p = p + b
    return p

In [16]:
x_vec = x_train[0, :]
print(x_vec)
f_wb = predict(x_vec, w_initial, b_initial)
print(f_wb)

[2104    5    1   45]
459.92578677999995


#### Using `np.dot`

In [20]:
def predict_single_vector(x, w, b):
    p = np.dot(x, w) + b
    return p

pred_dot = predict_single_vector(x_vec, w_initial, b_initial)
print(pred_dot)
print(f_wb == pred_dot)

459.92578677999995
True


### Compute Cost with Multiple Variables

The equation for the cost function with multiple variables $J(\mathbf{w},b)$ is:
$$J(\mathbf{w},b) = \frac{1}{2m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)})^2 \tag{3}$$ 
where:
$$ f_{\mathbf{w},b}(\mathbf{x}^{(i)}) = \mathbf{w} \cdot \mathbf{x}^{(i)} + b  \tag{4} $$ 


In contrast to previous labs, $\mathbf{w}$ and $\mathbf{x}^{(i)}$ are vectors rather than scalars supporting multiple features.

In [33]:
def compute_cost(x, y, w, b):
    m = x.shape[0]
    cost = 0.0
    for i in range(m):
        f_wb = np.dot(x[i], w) + b
        cost = cost + (f_wb - y[i]) ** 2
    cost = cost / (2 * m)
    return cost


cost = compute_cost(x_train, y_train, w_initial, b_initial)
print(cost)

0.0014764204550570398


### Gradient Descent With Multiple Variables
Gradient descent for multiple variables:

$$\begin{align*} \text{repeat}&\text{ until convergence:} \; \lbrace \newline\;
& w_j = w_j -  \alpha \frac{\partial J(\mathbf{w},b)}{\partial w_j} \tag{5}  \; & \text{for j = 0..n-1}\newline
&b\ \ = b -  \alpha \frac{\partial J(\mathbf{w},b)}{\partial b}  \newline \rbrace
\end{align*}$$

where, n is the number of features, parameters $w_j$,  $b$, are updated simultaneously and where  

$$
\begin{align}
\frac{\partial J(\mathbf{w},b)}{\partial w_j}  &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)})x_{j}^{(i)} \tag{6}  \\
\frac{\partial J(\mathbf{w},b)}{\partial b}  &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)}) \tag{7}
\end{align}
$$
* m is the number of training examples in the data set

    
*  $f_{\mathbf{w},b}(\mathbf{x}^{(i)})$ is the model's prediction, while $y^{(i)}$ is the target value


In [24]:
def compute_gradient(x, y, w, b):
    m, n = x.shape
    dj_dw = np.zeros((n, ))
    dj_db = 0.0
    for i in range(m):
        error = (np.dot(x[i], w) + b) - y[i]
        for j in range(n):
            dj_dw[j] = dj_dw[j] + error * x[i, j]
        dj_db = dj_db + error
    dj_db = dj_db / m
    dj_dw = dj_dw / m
    return dj_dw, dj_db
temp_dj_dw, temp_dj_db = compute_gradient(x_train, y_train, w_initial, b_initial)
print(temp_dj_dw, temp_dj_db)

[-8.39e+01 -1.93e-01 -6.77e-02 -2.12e+00] -0.05122988666679854


#### Gradient Descent with Multiple Variables

In [41]:
def gradient_descent(x, y, w_init, b_init, cost_fuction, gradient_function, alpha, num_iter):
    J_history = []
    w = w_init
    b = b_init
    for i in range(num_iter):
        dj_dw, dj_db = gradient_function(x, y, w, b)
        w = w - alpha * dj_dw
        b = b - alpha * dj_db

        J_history.append(cost_fuction(x, y, w, b))
        if (i % 100) == 0:
            print(f"Iteration: {i}; Cost: {J_history[-1]}")
    
    return w, b, J_history


In [46]:
initial_w = np.zeros_like(w_initial)
initial_b = 0.0

iteration = 2000
alpha = 5.0e-7

w_final, b_final, J_history = gradient_descent(x_train, y_train, initial_w, initial_b, cost_fuction = compute_cost,
                                               gradient_function = compute_gradient, alpha=alpha, num_iter=iteration)
M = x_train.shape[0]
for i in range(M):
    print(f'Prediction: {np.dot(x_train[i], w_final) + b_final} and target value: {y_train[i]}')

Iteration: 0; Cost: 2529.4629522316304
Iteration: 100; Cost: 695.990315835203
Iteration: 200; Cost: 694.9206979323061
Iteration: 300; Cost: 693.8604297851192
Iteration: 400; Cost: 692.8094286135915
Iteration: 500; Cost: 691.7676123706057
Iteration: 600; Cost: 690.7348997354997
Iteration: 700; Cost: 689.711210107616
Iteration: 800; Cost: 688.6964635999458
Iteration: 900; Cost: 687.6905810327947
Iteration: 1000; Cost: 686.6934839275277
Iteration: 1100; Cost: 685.705094500366
Iteration: 1200; Cost: 684.7253356562205
Iteration: 1300; Cost: 683.754130982616
Iteration: 1400; Cost: 682.791404743621
Iteration: 1500; Cost: 681.8370818738819
Iteration: 1600; Cost: 680.8910879726681
Iteration: 1700; Cost: 679.9533492980014
Iteration: 1800; Cost: 679.0237927608082
Iteration: 1900; Cost: 678.1023459191534
Prediction: 426.7541288701386 and target value: 460
Prediction: 285.8740600118109 and target value: 232
Prediction: 170.55111419661225 and target value: 178
