# Linear Regression

<img src="img/linear.png" width=700 height=400 />

In [110]:
# import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Read data

In [111]:
# read data
df = pd.read_csv('data.txt', header=None, names=['size', 'room', 'price'])
df.head()

Unnamed: 0,size,room,price
0,2104,3,399900
1,1600,3,329900
2,2400,3,369000
3,1416,2,232000
4,3000,4,539900


In [112]:
# check correlation
df.corr()

Unnamed: 0,size,room,price
size,1.0,0.559967,0.854988
room,0.559967,1.0,0.442261
price,0.854988,0.442261,1.0


## Scale the data

In [113]:
df = (df - df.mean()) / df.std()
df.head()

Unnamed: 0,size,room,price
0,0.13001,-0.223675,0.475747
1,-0.50419,-0.223675,-0.084074
2,0.502476,-0.223675,0.228626
3,-0.735723,-1.537767,-0.867025
4,1.257476,1.090417,1.595389


## add to x features another column contain one in each record (Xo)

In [114]:
df.insert(0, 'Ones', 1)
df.head()

Unnamed: 0,Ones,size,room,price
0,1,0.13001,-0.223675,0.475747
1,1,-0.50419,-0.223675,-0.084074
2,1,0.502476,-0.223675,0.228626
3,1,-0.735723,-1.537767,-0.867025
4,1,1.257476,1.090417,1.595389


## split the data and convert to numpy matrices

In [115]:
x = np.matrix(df.drop('price', axis=1))
y = df['price']
y = np.matrix(y).reshape(-1, 1)

In [116]:
print('x shape', x.shape)
print('y shape', y.shape)

x shape (47, 3)
y shape (47, 1)


In [117]:
# x
xtrain = x[:42]
xtest = x[42:]
# y
ytrain = y[:42]
ytest = y[42:]

$$ \theta_0, \theta_1,... \theta_\infty

In [118]:
thetas = np.matrix([0, 0, 0])
thetas.shape

(1, 3)

## Hypothesis Function

$$h \theta (x) = \theta 0+ \theta 1 x $$

## Cost Function

$$ J(\theta 0, \theta 1)=\frac{1}{2m} \sum_{i=1}^m (h \theta (x) - y)^2 $$

In [119]:
def costFun(x, y, theta):
    # summation = summation((x * thet) - y) ^ 2
    summation = np.power(((x * theta.T) - y), 2)
    # cost = (1/2*m) * summation
    cost = np.sum(summation) / (2 * len(x))

    return cost

## Gradient Descent Function

Repeat until get minimum value {
$$ \theta_j := \theta_j - \alpha \frac{d}{d \theta_j} J(\theta_0, \theta_1) $$
}

## Note
$$ \alpha $$
called learning rate :
- if it is large, it may fail to converge and overshoot the minimum
- if it ia very small, it would take long time to converge
- default = 0.01

## Gradient Descent for Linear Regression

Repeat until get minimum value {
$$ \theta_0 := \theta_0 - \alpha \frac{1}{m} \sum_{i=1}^m (h \theta (x) - y) $$
$$ \theta_1 := \theta_1 - \alpha \frac{1}{m} \sum_{i=1}^m (h \theta (x) - y) . x_1 $$
}

Note : first equation in gradient descent don't multiply by x because xo just 1

In [120]:
def GDF(x, y, theta, alpha, iterates):
    # temp contain theta and will change later after process
    temp = np.matrix(np.zeros(theta.shape))
    # number of thetas
    parameter = int(theta.ravel().shape[1])
    # define variable costs have zeros to assign in each variable the cost in each iteration
    costs = np.zeros(iterates)

    # loop in range number of iterates
    for i in range(iterates):
        # error = my predict - real predict
        error = (x * theta.T) - y

        # loop in range number of thetas
        for j in range(parameter):
            # multuply error with feature
            term = np.multiply(error, x[:, j])
            # new thetas = old - (alph / number of rows) * sum of term-->(error - predict)
            temp[0, j] = theta[0, j] - ((alpha / len(x)) * np.sum(term))
        # put thetas = to new thetas
        theta = temp
        # put cost of this iteration in costs
        costs[i] = costFun(x, y, theta)
    # return theta and costs
    return theta, costs

## initiate alpha and learning rate

In [135]:
alpha = 0.15
iters = 500

## Run the model

In [136]:
last_thetas, costs = GDF(xtrain, ytrain, thetas, alpha, iters)
print('theta :', last_thetas)
print('---------')
print('the first 50 costs :\n', costs[:50])

theta : [[ 0.00731702  0.90143994 -0.03871813]]
---------
the first 50 costs :
 [0.38921101 0.30959328 0.25919469 0.2265778  0.20488302 0.18998296
 0.17938263 0.17156387 0.16559431 0.16089408 0.15709633 0.15396385
 0.15133902 0.14911368 0.14721101 0.1455744  0.14416068 0.1429359
 0.14187263 0.14094827 0.14014391 0.1394435  0.13883333 0.13830161
 0.13783815 0.13743413 0.13708189 0.13677478 0.13650699 0.1362735
 0.13606989 0.13589235 0.13573753 0.13560253 0.13548481 0.13538215
 0.13529263 0.13521457 0.1351465  0.13508714 0.13503538 0.13499024
 0.13495088 0.13491655 0.13488662 0.13486052 0.13483776 0.13481791
 0.1348006  0.13478551]


In [137]:
print('final cost fun :', costFun(xtrain, ytrain, last_thetas))

final cost fun : 0.13468266980065002


## predict test values

In [138]:
xtest * last_thetas.T

matrix([[ 0.6074804 ],
        [-0.89224411],
        [-1.23610531],
        [-0.20355225],
        [-0.88884118]])