### Import required packages 

In [33]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (20.0, 10.0)
from mpl_toolkits.mplot3d import Axes3D

### Import housing data

In [34]:
# Fetching data
data = pd.read_csv('housing.csv')
print(data.shape)
data.head()
# Dropping unnecessary columns
data=data.drop(['Unnamed: 0'],axis=1)
data.head()

(546, 13)


Unnamed: 0,price,lotsize,bedrooms,bathrms,stories,driveway,recroom,fullbase,gashw,airco,garagepl,prefarea
0,42000.0,5850,3,1,2,yes,no,yes,no,no,1,no
1,38500.0,4000,2,1,1,yes,no,no,no,no,0,no
2,49500.0,3060,3,1,1,yes,no,no,no,no,0,no
3,60500.0,6650,3,1,2,yes,yes,no,no,no,0,no
4,61000.0,6360,2,1,1,yes,no,no,no,no,0,no


### Map categorical features to integer equivalent and mean normalizing

In [35]:
# Mapping Yes and No to1 and 0 respectively
di = {'no': 0, 'yes': 1}
data=data.replace({'driveway':di,'recroom':di,'fullbase':di,'gashw':di,'airco':di,'prefarea':di})
data.head()

# Normalize the features using mean normalization
data = (data - data.mean())/data.std()
data.head()

Unnamed: 0,price,lotsize,bedrooms,bathrms,stories,driveway,recroom,fullbase,gashw,airco,garagepl,prefarea
0,-0.978239,0.322732,0.047192,-0.568973,0.221501,0.404819,-0.464371,1.36207,-0.218853,-0.68041,0.357239,-0.552865
1,-1.109312,-0.530526,-1.308947,-0.568973,-0.930304,0.404819,-0.464371,-0.732832,-0.218853,-0.68041,-0.803788,-0.552865
2,-0.697368,-0.964074,0.047192,-0.568973,-0.930304,0.404819,-0.464371,-0.732832,-0.218853,-0.68041,-0.803788,-0.552865
3,-0.285425,0.691709,0.047192,-0.568973,0.221501,0.404819,2.149509,-0.732832,-0.218853,-0.68041,-0.803788,-0.552865
4,-0.2667,0.557955,-1.308947,-0.568973,-0.930304,0.404819,-0.464371,-0.732832,-0.218853,-0.68041,-0.803788,-0.552865


### Separate the features from target variable 

In [36]:
# Separate out the Feature and Target matrices
Y=data.iloc[:,0:1].values
#X=data[['lotsize','bedrooms','bathrms','stories','garagepl']].iloc[:,:]
X=data.drop(["price"],axis=1).iloc[:,:]
ones = np.ones([X.shape[0],1])
X = np.concatenate((ones,X),axis=1)
print(X.shape)
print(Y.shape)

(546, 12)
(546, 1)


### Defining weights and Locally Weighted Regression cost functions

In [37]:
# LWR Cost Function
def LWRCost(X, Y, theta, W, reg_lambda):
    reg_sum = (reg_lambda / (2 * X.shape[0])) * np.sum(np.square(theta[1:]))
    return np.dot(np.dot(np.transpose(np.dot(X, theta) - Y), W), (np.dot(X, theta) - Y)) + reg_sum

# Local Weights 
def get_weights(X, input_var, tau) :
    weights = np.zeros((X.shape[0], X.shape[0]))
    for i in range(X.shape[0]) :
        weights[i][i] = np.exp((float(-1/2) * np.dot(np.transpose(X[i] - input_var), (X[i] - input_var))) / (tau * tau))
    return weights

# Normal Equation Definition
def LWR_Normal(X, W, Y, reg_lambda) :  
    return np.dot(np.linalg.inv(np.dot(np.dot(np.transpose(X), W), X)+reg_lambda), np.dot(np.dot(np.transpose(X), W), Y))

### Initialize parameters and use whole batch as neighbourhood

In [38]:
# Initialize parameters
reg_lambda = 0.001
tau = 1

### Leaving last tuple for testing purposes

In [39]:
X=X[:-1]
Y=Y[:-1]
# To be predicted
input_var = X[-1]            

### Apply Locally Weighted Regression

In [40]:
# Apply LWR with Normal Equation 
W = get_weights(X, input_var, tau)
theta_final = LWR_Normal(X, W, Y, reg_lambda)
print("Final cost: ", LWRCost(X, Y, theta_final, W, reg_lambda))
print("Final theta: ", theta_final)
print("Predicted value : ", np.dot(np.transpose(input_var), theta_final))
print("Original value : ", Y[-1])

Final cost:  [[0.31795942]]
Final theta:  [[-0.42480668]
 [-0.09024056]
 [ 0.11276518]
 [ 0.56568635]
 [-0.02569115]
 [ 0.22438749]
 [ 0.21914763]
 [ 0.1072797 ]
 [-1.55721084]
 [ 0.15718895]
 [ 0.46929286]
 [ 0.25576549]]
Predicted value :  [1.42503106]
Original value :  [1.38107544]


### Now taking neighbourhood of 50, or tau=50/545

In [51]:
reg_lambda = 0.001
tau = float(50/545)
# Apply LWR with Normal Equation 
W = get_weights(X, input_var, tau)
theta_final = LWR_Normal(X, W, Y, reg_lambda)
print("Final cost: ", LWRCost(X, Y, theta_final, W, reg_lambda))
print("Final theta: ", theta_final)
print("Predicted value : ", np.dot(np.transpose(input_var), theta_final))
print("Original value : ", Y[-1])

Final cost:  [[135.97812411]]
Final theta:  [[ 176.]
 [-192.]
 [-512.]
 [ -24.]
 [ 224.]
 [ -96.]
 [  -4.]
 [  -8.]
 [ 256.]
 [ -16.]
 [ 192.]
 [  48.]]
Predicted value :  [13.02377364]
Original value :  [1.38107544]
