# Predict house price 
The data is located in "./Data/data2.txt", the first column is the house size, the second column is the number of bedrooms, and the third column is the house price.

In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## 1.Read

In [6]:
data = pd.read_csv("../Data/data2.txt", header=None, names=['Size', 'Bedrooms', 'Price'])
data.head()

Unnamed: 0,Size,Bedrooms,Price
0,2104,3,399900
1,1600,3,329900
2,2400,3,369000
3,1416,2,232000
4,3000,4,539900


# 2.Mean variance normalization  
  
$x_i=\frac{(x_i-\mu_i)}{\sigma_i}$

In [7]:
data = (data - data.mean()) / data.std()
data.head()

Unnamed: 0,Size,Bedrooms,Price
0,0.13001,-0.223675,0.475747
1,-0.50419,-0.223675,-0.084074
2,0.502476,-0.223675,0.228626
3,-0.735723,-1.537767,-0.867025
4,1.257476,1.090417,1.595389


## 3.Gradient Descent
### 3.1 Implement the cost function$J(θ_0,θ_1)$ 
$J(θ_0,θ_1)=\frac{1}{2m}\sum\limits_{i=1}\limits^m(h_θ(x^{(i)})-y^{(i)})^2$ 

In [8]:
def ComputeCost(X,Y,Theta):
    """
    X:n-dimensional matrix, input
    Y:n-dimensional matrix, output
    Theta:n-dimensional matrix,parameter
    """
    SSME=np.power(X*Theta.T-Y,2) #compute square of modeling error
    return np.sum(SSME)/(2*len(X))

### 3.2 Implement gradient descent
$repeat ~ until ~ convergence \lbrace$  
$θ_0:=θ_0-\frac{\alpha}{m}\sum\limits_{i=1}\limits^m(h_θ(x^{(i)})-y^{(i)})$  
$θ_1:=θ_1-\frac{\alpha}{m}\sum\limits_{i=1}\limits^m[(h_θ(x^{(i)})-y^{(i)})x_1^{(i)}]$  
$θ_2:=θ_2-\frac{\alpha}{m}\sum\limits_{i=1}\limits^m[(h_θ(x^{(i)})-y^{(i)})x_2^{(i)}]$  
$\rbrace$

In [11]:
def GradientDescent(X,Y,Theta,alpha,iters):
    """
    X:n-dimensional matrix, input
    Y:n-dimensional matrix, output
    Theta:n-dimensional matrix,parameter
    alpha:learning rate
    iters:number of iterations
    """
    temp=np.mat([0.0,0.0,0.0])
    for i in range(iters):
        error = (X*Theta.T-Y)
        for j in range(3):
            term=np.multiply(error,X[:,j])
            temp[0,j]=Theta[0,j]-((alpha/len(X))*np.sum(term))
        Theta=temp
        cost=ComputeCost(X,Y,Theta)
    return Theta,cost

### 3.3 run

In [13]:
data.insert(0,"new",1)
X=np.mat(data.iloc[:,:-1].values)
Y=np.mat(data.iloc[:,-1].values).T
Theta = np.mat([0,0,0])
alpha=0.01
iters=1500
parameters,cost=GradientDescent(X,Y,Theta,alpha,iters)
print("Fitted Curve Parameters:",parameters)
print("cost:",cost)

Fitted Curve Parameters: [[-9.98551124e-17  8.84042349e-01 -5.24551809e-02]]
cost: 0.13068670606095903


## 4.Normal Equation
$\Theta=(X^TX)^{-1}X^TY$

In [15]:
def normalEqn(X,Y):
    Theta2 = np.linalg.pinv(X.T*X)*X.T*Y
    return Theta2

In [20]:
parameters=normalEqn(X,Y)
cost=ComputeCost(X,Y,parameters.T)
print("Fitted Curve Parameters:",parameters)
print("cost:",cost)

Fitted Curve Parameters: [[ 1.04083409e-17]
 [ 8.84765988e-01]
 [-5.31788197e-02]]
cost: 0.13068648053904197
