<a href="https://colab.research.google.com/github/Parth1132/MachineLearning_01/blob/main/MultipleLinearRegression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

`Numpy`:- A popular library for scientific computing

`Matplotlib`:- A popular library for plotting data

In [102]:
import copy, math
import numpy as np
import matplotlib.pyplot as plt
np.set_printoptions(precision=2) #Reduced display precision on numpy arrays

Creating `X_train` and `y_train` variables

In [103]:
X_train = np.array([[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]])
y_train = np.array([460, 232, 178])

Display the `input` data

In [104]:
#data is stored in numpy array/matrix
print(f"X Shape: {X_train.shape}, X type: {type(X_train)}")
print(X_train)
print(f"y Shape: {y_train.shape}, y type:{type(y_train)}")
print(y_train)

X Shape: (3, 4), X type: <class 'numpy.ndarray'>
[[2104    5    1   45]
 [1416    3    2   40]
 [ 852    2    1   35]]
y Shape: (3,), y type:<class 'numpy.ndarray'>
[460 232 178]


Parameter vector w, b



*   `w` is a vector with *n* elements
    * Each element contains a parameter associated with one feature.
    * In our dataset, n is 4.
    * Notionally, we draw this as a column vector.


*    `b` is a scalar parameter

*    `w` is a 1-D Numpy vector



In [105]:
#For demonstration, w and b will be loaded with some initial values that are near the optimal.

b_init = 785.1811367994083
w_init = np.array([0.39133535, 18.75376741, -53.36032453, -26.42131618])
print(f"w_init shape: {w_init.shape}, b_init type:{type(b_init)}")

w_init shape: (4,), b_init type:<class 'float'>


**Single Prediction element by element**

In [106]:
def predict_single_loop(x, w, b):
  """
  single predict using linear regression

  Args:
  x (ndarray)
  w (ndarray)

  b(scalar): Model parameter

  return p (scalar) : prediction
  """

  n = x.shape[0]
  p = 0

  for i in range(n):
    p_i = x[i] * w[i]
    p = p + p_i

  p = p + b
  return p

In [107]:
#get a row from out training data

x_vec = X_train[0,:]
print(f" x_vec shape {x_vec.shape}, x_vec value: {x_vec}")

#make  prediction
f_wb = predict_single_loop(x_vec, w_init, b_init)
print(f"f_wb Shape {f_wb.shape}, prediction:{f_wb}")

 x_vec shape (4,), x_vec value: [2104    5    1   45]
f_wb Shape (), prediction:459.9999976194083


Single prediction, Vector
--`np.dot()` can be used to perform a vector dot product

In [108]:
def predict(x, w, b):

  """
  single predict using linear regression

  Args:

  x (ndarray)
  y (ndarray)
  b (scalar)

  Returns:
  p (scalar) : prediction
  """
  p = np.dot(x, w) + b
  return p

In [109]:
#get a row from the training data
x_vec = X_train[0,:]
print(f"x_vec Shape {x_vec.shape}, x_vec value:{x_vec}")

#make a prediction
f_wb = predict(x_vec, w_init, b_init)
print(f"f_wb shape {f_wb.shape}, prediction: {f_wb}")

x_vec Shape (4,), x_vec value:[2104    5    1   45]
f_wb shape (), prediction: 459.9999976194083


Compute cost with Multiple Variables

In [110]:
def compute_cost(X, y, w, b):
  """
  compute cost
  Args:
  x (ndarray  (m,n)) : Data, m examples with n features
  y (ndarray (m,)) : target values
  w (ndarray (n,)) : model parameter
  b (scalar)       : model parameter

  returns:
  cost (scalar) : cost
  """
  m = X.shape[0]
  cost = 0.0

  for i in range(m):
    f_wb_i = np.dot(X[i], w) + b
    cost = cost + (f_wb_i - y[i])**2
  cost = cost / (2 * m)

  return cost


In [111]:
#compute and display cost using our pre-chosen optimal parameters
cost = compute_cost(X_train, y_train, w_init, b_init)
print(type(cost))
print(f'cost at optimal w: {cost}')

<class 'numpy.float64'>
cost at optimal w: 1.5578904428966628e-12


Compute Gradient with Multiple Variables

$$
\begin{align}
\frac{\partial J(\mathbf{w},b)}{\partial w_j}  &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)})x_{j}^{(i)} \tag{1}  \\
\frac{\partial J(\mathbf{w},b)}{\partial b}  &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)}) \tag{2}
\end{align}
$$

--An implementation for calculating equations (partial derivative of (1) and (2) is below

In [112]:
def compute_gradient(X, y, w, b):
  """
  Computes the gradient for linear regression
  Args:
  x (ndarray (m,n)): Data, with m examples and n features
  y (ndarray (m,)) : target values
  w (ndarray (n,)) : model parameters
  b (scalar)       : model_Scalar parameter

  returns:

  dj_dw (ndarray (n,)): Effect of w on function j
  dj_db (scalar) : Or can be said as "The gradient of the cost w.r.t to parameter b.

  The partial derivative of the cost function with respect to w_j for a single example is: ∂J / ∂w_j = 2 * error_i * x_j_i
  """
  m,n= X.shape
  dj_dw = np.zeros((n,))

  dj_db = 0

  for i in range(m):
    err = (np.dot(X[i], w) + b) - y[i]
    for j in range(n):
      dj_dw[j] = dj_dw[j] + err * X[i, j] #---CHECK DOCSTRING FOR REFERENCE
    dj_db = dj_db + err

  dj_dw = dj_dw / m
  dj_db = dj_db / m

  return dj_db, dj_dw



In [113]:
#compute and display gradient

tmp_dj_db, tmp_dj_dw = compute_gradient(X_train, y_train, w_init, b_init)
print(f"dj_db at initial w,b: {tmp_dj_db}")
print(f'dj_dw at initial w,b: \n {tmp_dj_dw}')

dj_db at initial w,b: -1.6739251501955248e-06
dj_dw at initial w,b: 
 [-2.73e-03 -6.27e-06 -2.22e-06 -6.92e-05]


In [114]:
def gradient_descent(X, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters):
    """
    Performs batch gradient descent to learn theta. Updates theta by taking
    num_iters gradient steps with learning rate alpha

    Args:
      X (ndarray (m,n))   : Data, m examples with n features
      y (ndarray (m,))    : target values
      w_in (ndarray (n,)) : initial model parameters
      b_in (scalar)       : initial model parameter
      cost_function       : function to compute cost
      gradient_function   : function to compute the gradient
      alpha (float)       : Learning rate
      num_iters (int)     : number of iterations to run gradient descent

    Returns:
      w (ndarray (n,)) : Updated values of parameters
      b (scalar)       : Updated value of parameter
      """

    # An array to store cost J and w's at each iteration primarily for graphing later
    J_history = []
    w = copy.deepcopy(w_in)  #avoid modifying global w within function
    b = b_in

    for i in range(num_iters):

        # Calculate the gradient and update the parameters
        dj_db,dj_dw = gradient_function(X, y, w, b)   ##None

        # Update Parameters using w, b, alpha and gradient
        w = w - alpha * dj_dw               ##None
        b = b - alpha * dj_db               ##None

        # Save cost J at each iteration
        if i<100000:      # prevent resource exhaustion
            J_history.append(cost_function(X, y, w, b))

        # Print cost every at intervals 10 times or as many iterations if < 10
        if i % math.ceil(num_iters / 10) == 0:
          print(f"Iteration {i:4d}: Cost {J_history[-1]:8.2f}   ")  # Assuming a 1D array

    return w, b, J_history #return final w,b and J history for graphing

In [116]:
# initialize parameters
initial_w = np.zeros_like(w_init)
initial_b = 0
# some gradient descent settings
iterations = 1000
alpha = 5.0e-7
# run gradient descent
w_final, b_final, J_hist = gradient_descent(X_train, y_train, initial_w, initial_b,compute_cost, compute_gradient, alpha, iterations)
print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")
m,_ = X_train.shape
for i in range(m):
    print(f"prediction: {np.dot(X_train[i], w_final) + b_final:0.2f}, target value: {y_train[i]}")

Iteration    0: Cost  2529.46   
Iteration  100: Cost   695.99   
Iteration  200: Cost   694.92   
Iteration  300: Cost   693.86   
Iteration  400: Cost   692.81   
Iteration  500: Cost   691.77   
Iteration  600: Cost   690.73   
Iteration  700: Cost   689.71   
Iteration  800: Cost   688.70   
Iteration  900: Cost   687.69   
b,w found by gradient descent: -0.00,[ 0.2   0.   -0.01 -0.07] 
prediction: 426.19, target value: 460
prediction: 286.17, target value: 232
prediction: 171.47, target value: 178
