# Linear Regression from scratch

**Importing necessary things** :<br>
At this step I add necessary tools to our notebook.

In [1]:
#for numerical and matrix things
import numpy as np

#for things like getting sum and mean
import pandas as pd

#for ploting figures
%matplotlib inline
import matplotlib.pyplot as plt

#loading our dataset
from sklearn.datasets import load_diabetes

**Assigning X and Y** :<br>
At this step I define X , Y for dataset to make a linear Regression implement.

In [2]:
X_diabetes, y_diabetes = load_diabetes(return_X_y=True)

#to make it easier for later
X = X_diabetes
Y= y_diabetes

#let's take a look at our X, Y data dimension
print(X.shape)
print(Y.shape)

(442, 10)
(442,)


**Spliting to train and test** :<br>
At this step we split our data to training and test with ratio of 70% and 30% for each.

In [3]:
#in order to have a fix output around all runs
np.random.seed(2042)

#test ratio is 30%
test_ratio = 0.3
total_size = len(X)

test_size = int(test_ratio * total_size)
train_size = total_size - test_size

#in order to have randon Train and Test
rnd_indices = np.random.permutation(total_size)

X_train = X[rnd_indices[:train_size]]
X_test = X[rnd_indices[train_size:]]
Y_train = Y[rnd_indices[:train_size]]
Y_test = Y[rnd_indices[train_size:]]

**Model Implementation** :<br>
At this step I define a class named **LinearRegression** in order to implement linear regression

In [4]:
class LinearRegression():

    """
        Linear Regression model with mean squared error as cost function

        Parameters
        ----------

        learning_rate : float
                        learning rate of gradient descent algorithm
        n_iter : int
                number of iterations or epoch of gradient descent algorithm
        Attributes
        ----------
        lr : float
            learning rate of gradient descent algorithm
        n_iter : int
                number of iterations or epoch of gradient descent algorithm
        weights : numpy.array
                weights of our model are initialized with random numbers in [0,1]  
        bias : float
            bias of our model which is initialized with 0


        Methods
        -------
        fit(X,y)
            training the weights with regards to dataset (X and y)
        predict(X)
                predict target values corresponding to X
        _compute_gradient(X,y_true,y_predicted)
                        computing gradients of mse cost function

    """

    def __init__(self,learning_rate = 0.01,n_iter=250000):
        self.lr = learning_rate
        self.n_iter = n_iter
        self.weights = None
        self.bias = None
    # computing gradients of mse cost function
    def _compute_gradient(self,X,y_true,y_predicted):
        n = X.shape[0]
        dw = (1/n) * np.dot(X.T,(y_predicted-y_true))
        db = (1/n) * np.sum(y_predicted-y_true)
        return dw , db

    def predict(self,X):
        return np.dot(X,self.weights) + self.bias

    def fit(self,X,y):
        # initializing weights and bias
        self.weights = np.random.rand(X.shape[1])
        
        
        self.bias = 0 

        for _ in range(self.n_iter):

            y_predicted = self.predict(X)

            dw , db = self._compute_gradient(X,y,y_predicted)
            
            # updating weights and bias
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

**defining model**

In [5]:
model = LinearRegression()

#remove single-dimensional entries from the shape of an array
Y_train = Y_train.squeeze()

model.fit(X_train, Y_train)

In [6]:
#Let's take a look at our weights and bias
print("Model Weights Are : {}".format(model.weights))
print("Model bias is : {}".format(model.bias))

Model Weights Are : [  23.66764336 -174.46069598  606.19975669  289.94315985  -45.32525116
 -102.95497087 -184.84012308   93.17251518  377.92006567   85.03677887]
Model bias is : 153.8098122945581


In [7]:
#Predicting Y according to X_test and Assumed weights and bias
Y_predict = X_test.dot(model.weights) + model.bias

**Checking Accuracy** :<br>
calculating RSS according to this equation :<br>
$RSS = \Sigma(Y^{2} - \hat{Y}^{2})$

In [8]:
RSSs = Y_test**2 - Y_predict**2
RSS = RSSs.sum()
RSS

225331.3387715741

In [9]:
#Calculating TSS which is Variance of Y
TSSs = Y_test**2. - Y.mean()**2
TSS = TSSs.sum()
TSS

443961.19553244184

Calculating $R^{2}$ according to this equation : <br>
<br>
$R^{2} = \frac{(TSS - RSS)}{TSS}$

In [10]:
R_Squared = (TSS - RSS)/TSS
R_Squared

0.4924526264027768

# *****

# Linear Regression with SKlearn

In [11]:
#Adding necessary tools and defining model
from sklearn import linear_model
model = linear_model.LinearRegression()
model.fit(X_train,Y_train)

LinearRegression()

In [12]:
#Let's take a look at weights and bias
model.coef_

array([  19.70314614, -175.49893906,  610.67778789,  287.51155318,
       -385.48553379,  179.0466207 ,  -47.59277311,  107.81873576,
        516.48046603,   74.46240035])

In [13]:
model.intercept_

153.8029728645442

In [14]:
#Predicting Y according to X_test
Y_predict_SKlearn = model.predict(X_test)

calculating RSS according to this equation :<br>
$RSS = \Sigma(Y^{2} - \hat{Y}^{2})$

In [15]:
RSSs = Y_test**2 - Y_predict_SKlearn**2
RSS = RSSs.sum()
RSS

225732.16849902825

In [16]:
#Calculating TSS which is Variance of Y
TSSs = Y_test**2. - Y.mean()**2
TSS = TSSs.sum()
TSS

443961.19553244184

Calculating $R^{2}$ according to this equation : <br>
<br>
$R^{2} = \frac{(TSS - RSS)}{TSS}$

In [17]:
R_Squared = (TSS - RSS)/TSS
R_Squared

0.4915497778396869