# Housing Price Prediction

load libraries

In [1]:
import pandas as pd
import numpy as np

Load Housoing data as dataframe

In [2]:
house_price_dataframe = pd.read_csv("./datasets/house-prices.csv")
df = house_price_dataframe.copy()

In [3]:
df.head(2)

Unnamed: 0,Home,Price,SqFt,Bedrooms,Bathrooms,Offers,Brick,Neighborhood
0,1,114300,1790,2,2,2,No,East
1,2,114200,2030,4,2,3,No,East


In [4]:
df.columns

Index(['Home', 'Price', 'SqFt', 'Bedrooms', 'Bathrooms', 'Offers', 'Brick',
       'Neighborhood'],
      dtype='object')

In [5]:
df.corr()

Unnamed: 0,Home,Price,SqFt,Bedrooms,Bathrooms,Offers
Home,1.0,0.10819,0.168553,-0.068568,0.127694,-0.053597
Price,0.10819,1.0,0.552982,0.525926,0.523258,-0.313636
SqFt,0.168553,0.552982,1.0,0.483807,0.522745,0.336923
Bedrooms,-0.068568,0.525926,0.483807,1.0,0.414556,0.114271
Bathrooms,0.127694,0.523258,0.522745,0.414556,1.0,0.143793
Offers,-0.053597,-0.313636,0.336923,0.114271,0.143793,1.0


In [6]:
df.isnull().sum()

Home            0
Price           0
SqFt            0
Bedrooms        0
Bathrooms       0
Offers          0
Brick           0
Neighborhood    0
dtype: int64

## Observatoin

### Required columns

1. Sqft 
2. Bedrooms 
3. Bathrooms
4. offers


In [7]:
df.drop(columns=["Home", "Brick", "Neighborhood"], axis=1, inplace=True)

In [8]:
df

Unnamed: 0,Price,SqFt,Bedrooms,Bathrooms,Offers
0,114300,1790,2,2,2
1,114200,2030,4,2,3
2,114800,1740,3,2,1
3,94700,1980,3,2,3
4,119800,2130,3,3,3
...,...,...,...,...,...
123,119700,1900,3,3,3
124,147900,2160,4,3,3
125,113500,2070,2,2,2
126,149900,2020,3,3,1


## Devide the target and independent datase

In [9]:
y = df["Price"]
X = df.drop(columns=["Price"], axis=1)

In [10]:
y.head(2)

0    114300
1    114200
Name: Price, dtype: int64

In [11]:
X.head()

Unnamed: 0,SqFt,Bedrooms,Bathrooms,Offers
0,1790,2,2,2
1,2030,4,2,3
2,1740,3,2,1
3,1980,3,2,3
4,2130,3,3,3


### Train Test split data

In [12]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=25, test_size=0.2)

scaler = StandardScaler()

X_train_scale = scaler.fit_transform(X_train)
X_test_scale = scaler.transform(X_test)

## Create pipeline for house prediction

Added features
1. StanderScaler

In [41]:
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.ensemble import GradientBoostingRegressor

from sklearn.model_selection import cross_validate
from sklearn.metrics import accuracy_score, mean_absolute_error

import seaborn as sns

models = [
          ('LinReg', LinearRegression()), 
          ('RF', RandomForestRegressor()),
          ('SVR', SVR()),
          ('XGB', GradientBoostingRegressor())
        ]

In [42]:
for name, model in models:
    regressor = cross_validate(model, X_train_scale, y_train, cv=5)
    model_regressor = model.fit(X_train_scale, y_train)
    y_predict = model.predict(X_test_scale)
    accuracy = model_regressor.score(X_test_scale, y_test)
    print(f"{name} : accuracy = {accuracy}")

LinReg : accuracy = 0.6126855892944286
RF : accuracy = 0.37353241491102074
SVR : accuracy = -0.005773023136815381
XGB : accuracy = 0.41531299188562953
