# Importing required Libraries

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression

Loading the dataset

In [2]:
house_data = pd.read_csv("Housing_Data.csv")
house_data

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished
...,...,...,...,...,...,...,...,...,...,...,...,...,...
15540,4970000,4410,4,3,2,yes,no,yes,no,no,2,no,semi-furnished
15541,2240000,1950,3,1,1,no,no,no,yes,no,0,no,unfurnished
15542,7070000,8880,2,1,1,yes,no,no,no,yes,1,no,semi-furnished
15543,3990000,4100,4,1,1,no,no,yes,no,no,0,no,unfurnished


Getting Statistical measures of data

In [3]:
house_data.describe()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,parking
count,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0
mean,4769655.0,5144.372017,2.962239,1.283242,1.823223,0.689161
std,1859416.0,2159.714462,0.740388,0.500636,0.880765,0.853832
min,1750000.0,1650.0,1.0,1.0,1.0,0.0
25%,3430000.0,3600.0,2.0,1.0,1.0,0.0
50%,4340000.0,4560.0,3.0,1.0,2.0,0.0
75%,5803000.0,6360.0,3.0,2.0,2.0,1.0
max,13300000.0,16200.0,6.0,4.0,4.0,3.0


Checking null values

In [4]:
house_data.isnull().sum()

price               0
area                0
bedrooms            0
bathrooms           0
stories             0
mainroad            0
guestroom           0
basement            0
hotwaterheating     0
airconditioning     0
parking             0
prefarea            0
furnishingstatus    0
dtype: int64

Unique values of different attributes/columns

In [5]:
house_data['furnishingstatus'].unique()

array(['furnished', 'semi-furnished', 'unfurnished'], dtype=object)

In [6]:
house_data['mainroad'].unique()

array(['yes', 'no'], dtype=object)

In [7]:
house_data['guestroom'].unique()

array(['no', 'yes'], dtype=object)

In [8]:
house_data['basement'].unique()

array(['no', 'yes'], dtype=object)

In [9]:
house_data['hotwaterheating'].unique()

array(['no', 'yes'], dtype=object)

In [10]:
house_data['airconditioning'].unique()

array(['yes', 'no'], dtype=object)

In [11]:
house_data['prefarea'].unique()

array(['yes', 'no'], dtype=object)

Encoding string data into integers 0,1,2,etc i.e Data Preprocessing

In [12]:
encoder = LabelEncoder()

In [13]:
house_data['mainroad'] = encoder.fit_transform(house_data['mainroad'])
house_data['guestroom'] = encoder.fit_transform(house_data['guestroom'])
house_data['basement'] = encoder.fit_transform(house_data['basement'])
house_data['hotwaterheating'] = encoder.fit_transform(house_data['hotwaterheating'])
house_data['airconditioning'] = encoder.fit_transform(house_data['airconditioning'])
house_data['prefarea'] = encoder.fit_transform(house_data['prefarea'])
house_data['furnishingstatus'] = encoder.fit_transform(house_data['furnishingstatus'])

In [14]:
house_data['furnishingstatus'].unique()

array([0, 1, 2])

In [15]:
house_data['mainroad'].unique()

array([1, 0])

In [16]:
house_data['guestroom'].unique()

array([0, 1])

In [17]:
house_data['basement'].unique()

array([0, 1])

In [18]:
house_data['hotwaterheating'].unique()

array([0, 1])

In [19]:
house_data['airconditioning'].unique()

array([1, 0])

In [20]:
house_data['prefarea'].unique()

array([1, 0])

# Breakdown  of encoded fields

mainroad,guestroom,basement,hotwaterheating,airconditioning,prefarea => Yes :1 and No :0


furnishingstatus => furnished:0, semi-furnished:1, unfurnished:2

In [21]:
house_data

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,1,0,0,0,1,2,1,0
1,12250000,8960,4,4,4,1,0,0,0,1,3,0,0
2,12250000,9960,3,2,2,1,0,1,0,0,2,1,1
3,12215000,7500,4,2,2,1,0,1,0,1,3,1,0
4,11410000,7420,4,1,2,1,1,1,0,1,2,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
15540,4970000,4410,4,3,2,1,0,1,0,0,2,0,1
15541,2240000,1950,3,1,1,0,0,0,1,0,0,0,2
15542,7070000,8880,2,1,1,1,0,0,0,1,1,0,1
15543,3990000,4100,4,1,1,0,0,1,0,0,0,0,2


Split data into dependent and indepndent variables

In [22]:
x = house_data.iloc[:,1:]
x

Unnamed: 0,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,7420,4,2,3,1,0,0,0,1,2,1,0
1,8960,4,4,4,1,0,0,0,1,3,0,0
2,9960,3,2,2,1,0,1,0,0,2,1,1
3,7500,4,2,2,1,0,1,0,1,3,1,0
4,7420,4,1,2,1,1,1,0,1,2,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
15540,4410,4,3,2,1,0,1,0,0,2,0,1
15541,1950,3,1,1,0,0,0,1,0,0,0,2
15542,8880,2,1,1,1,0,0,0,1,1,0,1
15543,4100,4,1,1,0,0,1,0,0,0,0,2


In [23]:
y = house_data.iloc[:,0]
y

0        13300000
1        12250000
2        12250000
3        12215000
4        11410000
           ...   
15540     4970000
15541     2240000
15542     7070000
15543     3990000
15544     4270000
Name: price, Length: 15545, dtype: int64

Split in it into train and test data

In [24]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3)

# Implementing Regression Algorithms

XGBRegressor

In [25]:
xgb = XGBRegressor()

In [26]:
xgb.fit(x_train,y_train)

Training Accuracy and Testing Accuracy

In [27]:
tr = xgb.score(x_train,y_train)*100
te = xgb.score(x_test,y_test)*100
print(tr,te)

99.77159156808078 99.76681523976862


RandomForestRegressor

In [28]:
random = RandomForestRegressor(n_estimators=50)

In [29]:
random.fit(x_train,y_train)

Training Accuracy and Testing Accuracy

In [30]:
tr = random.score(x_train,y_train)*100
te = random.score(x_test,y_test)*100
print(tr,te)

99.90867602269395 99.90641743656249


ExtraTreesRegressor

In [31]:
ensemble = ExtraTreesRegressor(n_estimators=50)

In [32]:
ensemble.fit(x_train,y_train)

Training Accuracy and Testing Accuracy

In [33]:
tr = ensemble.score(x_train,y_train)*100
te = ensemble.score(x_test,y_test)*100
print(tr,te)

99.9087011879757 99.90551764391455


DecisionTreeRegressor

In [34]:
decision = DecisionTreeRegressor()

In [35]:
decision.fit(x_train,y_train)

Training Accuracy and Testing Accuracy

In [36]:
tr = decision.score(x_train,y_train)*100
te = decision.score(x_test,y_test)*100
print(tr,te)

99.9087011879757 99.90551764391455


LinearRegression

In [37]:
linear = LinearRegression()

In [38]:
linear.fit(x_train,y_train)

Training Accuracy and Testing Accuracy

In [39]:
tr = linear.score(x_train,y_train)*100
te = linear.score(x_test,y_test)*100
print(tr,te)

67.83362432125818 67.95731931476864


In [40]:
house_data.head(1)

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,1,0,0,0,1,2,1,0


# House Price Prediction Function

In [41]:
def house_price_prediction():
    area = int(input("Enter the area of a house (in sq feet):"))
    bedrooms = int(input("Enter the number of bedrooms:"))
    bathrooms = int(input("Enter the number of bathrooms:"))
    stories = int(input("Enter the number of floors of the house:"))
    mainroad = input("Is there a main road? (Yes/No): ").strip().lower()
    mainroad = 1 if mainroad == "yes" else 0
    guestroom = input("Is there a guestroom? (Yes/No): ").strip().lower()
    guestroom = 1 if guestroom == "yes" else 0
    basement = input("Is there a basement? (Yes/No): ").strip().lower()
    basement = 1 if basement == "yes" else 0
    hotwaterheating = input("Is there a hot water heating facility? (Yes/No): ").strip().lower()
    hotwaterheating = 1 if hotwaterheating == "yes" else 0
    airconditioning = input("Is there an air conditioning facility? (Yes/No): ").strip().lower()
    airconditioning = 1 if airconditioning == "yes" else 0
    parking = int(input("Enter bnumber of parking:"))
    prefarea = input("Is there a prefarea? (Yes/No): ").strip().lower()
    prefarea = 1 if prefarea == "yes" else 0
    furnishingstatus = input("Enter Furnishing Status (furnished/semi-furnished/unfurnished): ").strip().lower()
    if furnishingstatus == "furnished":
        furnishingstatus = 0
    elif furnishingstatus == "semi-furnished":
        furnishingstatus = 1
    elif furnishingstatus == "unfurnished":
        furnishingstatus = 2
    else:
        print("Invalid input")
    inputed_data = [[area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus]]
    result = random.predict(inputed_data)
    print(f"House Price Predicted to be Rs{result}.")

Call the House Price Prediction Function

Here are some of the demo examples

In [42]:
house_price_prediction()

Enter the area of a house (in sq feet):100
Enter the number of bedrooms:4
Enter the number of bathrooms:2
Enter the number of floors of the house:4
Is there a main road? (Yes/No): No
Is there a guestroom? (Yes/No): No
Is there a basement? (Yes/No): 1
Is there a hot water heating facility? (Yes/No): No
Is there an air conditioning facility? (Yes/No): Yes
Enter bnumber of parking:0
Is there a prefarea? (Yes/No): No
Enter Furnishing Status (furnished/semi-furnished/unfurnished): unfurnished
House Price Predicted to be Rs[4984420.].




In [43]:
house_price_prediction()

Enter the area of a house (in sq feet):100
Enter the number of bedrooms:4
Enter the number of bathrooms:2
Enter the number of floors of the house:4
Is there a main road? (Yes/No): Yes
Is there a guestroom? (Yes/No): Yes
Is there a basement? (Yes/No): Yes
Is there a hot water heating facility? (Yes/No): Yes
Is there an air conditioning facility? (Yes/No): Yes
Enter bnumber of parking:3
Is there a prefarea? (Yes/No): Yes
Enter Furnishing Status (furnished/semi-furnished/unfurnished): furnished
House Price Predicted to be Rs[8693300.].




In [44]:
house_price_prediction()

Enter the area of a house (in sq feet):2000
Enter the number of bedrooms:4
Enter the number of bathrooms:2
Enter the number of floors of the house:2
Is there a main road? (Yes/No): Yes
Is there a guestroom? (Yes/No): Yes
Is there a basement? (Yes/No): No
Is there a hot water heating facility? (Yes/No): Yes
Is there an air conditioning facility? (Yes/No): Yes
Enter bnumber of parking:4
Is there a prefarea? (Yes/No): Yes
Enter Furnishing Status (furnished/semi-furnished/unfurnished): furnished
House Price Predicted to be Rs[8840300.].




In [45]:
house_price_prediction()

Enter the area of a house (in sq feet):2000
Enter the number of bedrooms:4
Enter the number of bathrooms:2
Enter the number of floors of the house:2
Is there a main road? (Yes/No): No
Is there a guestroom? (Yes/No): No
Is there a basement? (Yes/No): No
Is there a hot water heating facility? (Yes/No): No
Is there an air conditioning facility? (Yes/No): Yes
Enter bnumber of parking:1
Is there a prefarea? (Yes/No): Yes
Enter Furnishing Status (furnished/semi-furnished/unfurnished): semi-furnished
House Price Predicted to be Rs[5565000.].


