In [1]:
import numpy as np
import pandas as pd

In [2]:
class LinearRegression(object):
    def __init__(self, fit_intercept=True, copy_X=True):
        self.fit_intercept = fit_intercept
        self.copy_X = copy_X
        
        self._coef = None
        self._intercept = None
        self._new_X = None
        
    def fit(self, X, y):
        self._new_X = np.array(X)
        y = y.reshape(-1, 1)
        if self.fit_intercept:
            intercept_vector = np.ones([self._new_X.shape[0], 1])
            self._new_X = np.concatenate(
                (intercept_vector, self._new_X), axis=1
            )
        weights = np.linalg.inv(self._new_X.T.dot(self._new_X)).dot(self._new_X.T.dot(y)).flatten()
        print(weights)

        if self.fit_intercept:
            self._intercept = weights[0]
            self._coef = weights[1]
        else: self._coef = weights[0]
        print(self._coef)

    def predict(self, X):
        if self.fit_intercept:
            return X*self._coef + self._intercept
        else: return X*self._coef

    @property
    def coef(self):
        return self._coef

    @property
    def intercept(self):
        return self._intercept

In [3]:
df = pd.read_csv('06_Linear Regression/test.csv')
df

Unnamed: 0,x,y
0,77,79.775152
1,21,23.177279
2,22,25.609262
3,20,17.857388
4,36,41.849864
...,...,...
295,71,68.545888
296,46,47.334876
297,55,54.090637
298,62,63.297171


In [4]:
X = df['x'].values.reshape(-1,1)
y = df['y'].values

In [5]:
lr = LinearRegression()

In [6]:
lr.fit(X, y)

[-0.46181077  1.01433536]
1.0143353551195156


In [7]:
lr._new_X.shape

(300, 2)

In [8]:
lr.fit_intercept

True

In [9]:
print(lr.predict(123))

124.30143790603944


In [10]:
X.shape
y.shape

(300,)

In [11]:
from sklearn import linear_model
sk_lr = linear_model.LinearRegression(normalize=False)
sk_lr.fit(X, y)

LinearRegression()

In [12]:
sk_lr.intercept_

-0.4618107736611847

In [13]:
sk_lr.coef_

array([1.01433536])

In [14]:
sk_lr.predict(X)

array([ 77.64201157,  20.83923168,  21.85356704,  19.82489633,
        36.05426201,  14.75321955,  62.42698124,  95.90004796,
        19.82489633,   4.609866  ,   3.59553065,  18.81056097,
        96.91438332,  62.42698124,  36.05426201,  14.75321955,
        65.46998731,  13.7388842 ,  87.78536512,  69.52732873,
        89.81403583,  51.26929234,  89.81403583,  26.92524381,
        97.92871867,  58.36963982,  79.67068228,  20.83923168,
        93.87137725,  26.92524381,  99.95738938,  30.98258524,
        33.01125595,  80.68501764,  27.93957917,  47.21195092,
        53.29796305,  69.52732873,  27.93957917,  33.01125595,
        91.84270654,  71.55599944,  50.25495698,  76.62767622,
         3.59553065,  37.06859737,  70.54166408,  68.51299337,
        40.11160343,  35.03992666,  94.88571261,  88.79970048,
        52.28362769,  30.98258524,  59.38397518,  -0.46181077,
        39.09726808,  64.45565195,  69.52732873,  57.35530447,
        12.72454884,  72.57033479,  76.62767622,  61.41

In [15]:
df = pd.read_csv("06_Linear Regression/mlr09.csv")

In [16]:
df.head()

Unnamed: 0,height_in_feet,weight_in_pounds,successful_field_goals,percent_of_successful_free_throws,average_points_scored
0,6.8,225,0.442,0.672,9.2
1,6.3,180,0.435,0.797,11.7
2,6.4,190,0.456,0.761,15.8
3,6.2,180,0.416,0.651,8.6
4,6.9,205,0.449,0.9,23.2


In [18]:
y = df["average_points_scored"].values

In [20]:
df.iloc[:,:-1]

Unnamed: 0,height_in_feet,weight_in_pounds,successful_field_goals,percent_of_successful_free_throws
0,6.8,225,0.442,0.672
1,6.3,180,0.435,0.797
2,6.4,190,0.456,0.761
3,6.2,180,0.416,0.651
4,6.9,205,0.449,0.9
5,6.4,225,0.431,0.78
6,6.3,185,0.487,0.771
7,6.8,235,0.469,0.75
8,6.9,235,0.435,0.818
9,6.7,210,0.48,0.825


In [21]:
X = df.iloc[:,:-1].values

In [22]:
mu_X = np.mean(X, axis=0)
std_x = np.std(X, axis=0)

rescaled_X = (X-mu_X)/std_x

In [23]:
lr.fit(rescaled_X, y)

[11.79074074 -1.67779283  0.28359762  2.68586629  1.12816882]
-1.6777928325040945


In [24]:
lr.coef

-1.6777928325040945

In [25]:
lr.intercept

11.790740740740738

In [27]:
sk_lr.fit(rescaled_X, y)

LinearRegression()

In [28]:
sk_lr.coef_

array([-1.67779283,  0.28359762,  2.68586629,  1.12816882])