## Linear Regression Modeling 
This notebook use the sklearn real world Boston housing price dataset as I attempt to implement the linear regression model in the following three ways:

<ul>
    <li>Scikit-learn.</li>
    <li>Gradient descent.</li>
    <li>Closed form solution.</li>
</ul>

#### Imports

In [1]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn import datasets

import numpy as np
import seaborn as sn
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

In [2]:
# import boston dataset
boston= datasets.load_boston()

# set features and target variable 
x= boston.data
y= boston.target

# Display features and targt variable shape 
print(x.shape, y.shape)

(506, 13) (506,)


## Sickit-Learn Implementation

In [3]:
# Regressor
lr= LinearRegression()

# Fit lr model to boston data
lr.fit(x, y)

# Features prediction
y_pred= lr.predict(x)

In [4]:
# Intercept
print('Intercept: ',lr.intercept_)

Intercept:  36.459488385089855


In [5]:
# Mean-squared error (cost function)
print('Mean-squared error:', mean_squared_error(y, y_pred))

Mean-squared error: 21.894831181729202


In [6]:
# Coefficients
print('Coefficients: ',  list(lr.coef_))

Coefficients:  [-0.10801135783679539, 0.04642045836687953, 0.020558626367068917, 2.6867338193448442, -17.766611228299986, 3.8098652068092282, 0.0006922246403431768, -1.47556684560025, 0.30604947898516427, -0.012334593916574021, -0.9527472317072921, 0.00931168327379375, -0.5247583778554881]


## Gradient Descent Implementation

In [None]:
# to calculate the loss function
def cost(X, theta, y):
    m = y.shape[0]
    prediction = np.dot(X, theta)
    error = prediction - y
    J = np.mean(error**2) / 2
    return J

# to calculate the gradient
def gradient(X, theta, y):
    m = y.shape[0]
    prediction = np.dot(X, theta)
    error = prediction - y
    gradient = np.dot(X.T, error) / m
    return gradient

# the linear regression model implmentation
def linear_regression(X, y, n_iter, alpha):
    theta = np.zeros(X.shape[1])
    costs = np.zeros(n_iter)
    m = y.shape[0]
    
    for i in range(n_iter):
        theta_old = theta
        theta = theta - alpha * gradient(X, theta, y)
        costs[i] = cost(X, theta, y)
        if np.linalg.norm(theta - theta_old, ord=1) < 0.000001:
            costs = np.delete(costs, range(i+1, n_iter), None)
            break
        if i > 0:
            diff = costs[i-1] - costs[i]
            if diff < -1:
                raise ValueError('Gradient descent is diverging choose a smaller alpha')
        if i+1 == n_iter:
            print('Model did not fully converge, try picking a higher number of iterations (n_iter) or learning rate (alpha)')
            
    return costs, theta