## Basic Linear Regression

## 1. Create dataset

In [1]:
import os 

def mkdir_if_not_exist(path):
    if not isinstance(path, str):
        path = os.path.join(*path)
    if not os.path.exists(path):
        os.makedirs(path)

In [8]:
import numpy as np

data_file = '../data/LR1.csv'
mkdir_if_not_exist('../data')

x = np.array([147, 150, 153, 158, 163, 165, 168, 170, 173, 175, 178, 180, 183])
y = np.array([49, 50, 51, 54, 58, 59, 60, 62, 63, 64, 66, 67, 68])



with open (data_file, 'w') as f:
    f.write('Height, Weight\n')
    for i in range(x.size):
        str0 = '{},{}\n'.format(x[i], y[i])
        f.write(str0)

## 2. Read dataset

In [12]:
import pandas as pd

data_file = '../data/LR1.csv'
data = pd.read_csv(data_file)
inputs ,outputs = data.iloc[:, 0:1], data.iloc[:, -1]
X, y= np.array(inputs.values), np.array(outputs.values)
X, y

(array([[147],
        [150],
        [153],
        [158],
        [163],
        [165],
        [168],
        [170],
        [173],
        [175],
        [178],
        [180],
        [183]]),
 array([49, 50, 51, 54, 58, 59, 60, 62, 63, 64, 66, 67, 68]))

## 3. Caculate Linear Regression

*Add x0 to data (x0 = 1)* 

In [13]:
Xbar = np.concatenate((np.ones((X.shape[0], 1)),X), axis=1)
Xbar

array([[  1., 147.],
       [  1., 150.],
       [  1., 153.],
       [  1., 158.],
       [  1., 163.],
       [  1., 165.],
       [  1., 168.],
       [  1., 170.],
       [  1., 173.],
       [  1., 175.],
       [  1., 178.],
       [  1., 180.],
       [  1., 183.]])

**w = pinv(XbarT.Xbar)x(Xbar.y)**

In [14]:
A = np.linalg.pinv(np.dot(Xbar.T, Xbar))
B = np.dot(Xbar.T, y)
A, B

(array([[ 1.69874542e+01, -1.01635185e-01],
        [-1.01635185e-01,  6.10844845e-04]]),
 array([   771., 129198.]))

In [15]:
w = np.dot(A, B)
w

array([-33.73541021,   0.55920496])

## 4. Predict

In [17]:
y1 = w[1]*155 + w[0] #predict weight for a person with 155cm height
y2 = w[1]*160 + w[0] #predict weight for a person with 160cm height
y1, y2

(52.9413588948063, 55.73738370451929)

## 5. LR with Scikit-learn

In [24]:
from sklearn import datasets, linear_model

regr = linear_model.LinearRegression()
regr.fit(X, y)

regr.intercept_, regr.coef_[0]

(-33.73541020580774, 0.5592049619396674)