# Linear Regression - Gradient descent strategy

see [cs229 notes](https://cs229.stanford.edu/notes2020fall/notes2020fall/cs229-notes1.pdf) for details

In [1]:
import numpy as np

from sklearn import datasets
from sklearn.model_selection import train_test_split

## Diabetes dataset from sklearn
See [toy datasets](https://scikit-learn.org/stable/datasets/toy_dataset.html) for full info

In [2]:
diabetes = datasets.load_diabetes(as_frame=True) #
diabetes.keys()

dict_keys(['data', 'target', 'frame', 'DESCR', 'feature_names', 'data_filename', 'target_filename'])

In [3]:
diabetes.data.info() #no missing values

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 442 entries, 0 to 441
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   age     442 non-null    float64
 1   sex     442 non-null    float64
 2   bmi     442 non-null    float64
 3   bp      442 non-null    float64
 4   s1      442 non-null    float64
 5   s2      442 non-null    float64
 6   s3      442 non-null    float64
 7   s4      442 non-null    float64
 8   s5      442 non-null    float64
 9   s6      442 non-null    float64
dtypes: float64(10)
memory usage: 34.7 KB


In [4]:
X = diabetes.data.values
X.shape #10 features

(442, 10)

In [5]:
y = diabetes.target.values
y.shape

(442,)

In [6]:
train_X,test_X, train_y,test_y = train_test_split(X,y,random_state=42,test_size=0.2)

In [7]:
class LinearRegression:
    def __init__(self, alpha=1, n_iter=20000):
        self.alpha = alpha
        self.theta = np.NAN
        self.iter = n_iter
        
    def fit(self,X,y):
        
        X=np.hstack((np.ones((len(X),1)),X)) #adding intercept term to each training example (m,n+1)
        y=y[:, np.newaxis]  # (m,1)
        m=len(y)
        theta = np.ones((X.shape[1] ,1)) # (n+1,1)
        
        for i in range(0,self.iter):
            dcost = X.T.dot(y - X.dot(theta)) # Gradient descent
            theta = theta + 2 * (self.alpha/m) * dcost #see cs229 notes
        self.theta = theta
        return self
    
    def score(self,X,y):
        
        X=np.hstack((np.ones((len(X),1)),X))
        y=y[:, np.newaxis]
        
        h = X.dot(self.theta)
        score = 1 - ((y - h)**2).sum()/((y- y.mean())**2).sum()
        
        return score
    
    
    def predict(self, X=None):
        X=np.hstack((np.ones((len(X),1)),X))
        return X.dot(self.theta)
        
        
        

In [8]:
reg = LinearRegression(alpha=0.1, n_iter=40000)

In [9]:
reg.fit(train_X,train_y)

<__main__.LinearRegression at 0x7fe5c085bbe0>

In [10]:
reg.score(train_X, train_y)

0.5242355587633337

In [11]:
reg.score(test_X, test_y)

0.45534393994219957

## Using scikit-learn 

In [12]:
from sklearn import linear_model


In [13]:
sreg = linear_model.LinearRegression() #uses linalg.lstsq internally as long as matrix is not sparse

In [14]:
sreg.get_params() #default parameters of LR

{'copy_X': True,
 'fit_intercept': True,
 'n_jobs': None,
 'normalize': False,
 'positive': False}

In [15]:
sreg.fit(train_X,train_y)

LinearRegression()

In [16]:
sreg.coef_

array([  37.90031426, -241.96624835,  542.42575342,  347.70830529,
       -931.46126093,  518.04405547,  163.40353476,  275.31003837,
        736.18909839,   48.67112488])

In [17]:
sreg.score(train_X, train_y)

0.5279198995709652

In [18]:
sreg.score(test_X,test_y)

0.45260660216173787