In [18]:
import numpy as np
import pandas as pd
import seaborn as sns 
import matplotlib.pyplot as plt

In [20]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import r2_score,mean_absolute_error
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.ensemble import GradientBoostingRegressor,RandomForestRegressor
from sklearn.linear_model import Lasso,Ridge
import xgboost as xgb
from sklearn.metrics import mean_squared_error, r2_score

%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

In [22]:


dataset = pd.read_csv("Housing.csv")
df = pd.DataFrame(dataset)
df.sample(5)

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
35,8080940,7000,3,2,4,yes,no,no,no,yes,2,no,furnished
224,4760000,10240,2,1,1,yes,no,no,no,yes,2,yes,unfurnished
51,7420000,6325,3,1,4,yes,no,no,no,yes,1,no,unfurnished
320,4060000,4320,3,1,2,yes,no,no,no,no,2,yes,furnished
341,3850000,3180,2,2,1,yes,no,yes,no,no,2,no,semi-furnished


In [24]:
#Creating dummies for categorical variables to convert them into numeric columns 
status=pd.get_dummies(df[['furnishingstatus','mainroad','guestroom','basement','hotwaterheating','airconditioning','prefarea']], drop_first=True)
df=pd.concat([df,status],axis=1)
df.drop(['furnishingstatus','mainroad','guestroom','basement','hotwaterheating','airconditioning','prefarea'],axis=1,inplace=True)
df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,parking,furnishingstatus_semi-furnished,furnishingstatus_unfurnished,mainroad_yes,guestroom_yes,basement_yes,hotwaterheating_yes,airconditioning_yes,prefarea_yes
0,13300000,7420,4,2,3,2,False,False,True,False,False,False,True,True
1,12250000,8960,4,4,4,3,False,False,True,False,False,False,True,False
2,12250000,9960,3,2,2,2,True,False,True,False,True,False,False,True
3,12215000,7500,4,2,2,3,False,False,True,False,True,False,True,True
4,11410000,7420,4,1,2,2,False,False,True,True,True,False,True,False


In [26]:
X=df.drop(['price'],axis=1)
Y=df['price']
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,random_state=21)

In [28]:
#Use MinMaxScaler to scale the data inorder to bring all columns on same scale
scaler=MinMaxScaler()
scaler.fit(X_train)
X_train_scaled=scaler.transform(X_train)
X_train_scaled=scaler.transform(X_test)

In [30]:
Model={
        'Random Forest Regressor':RandomForestRegressor(),
         'Gradient Boost Regressor':GradientBoostingRegressor(),
         'XGRF Regressor':xgb.XGBRFRegressor(),
            'Lasso Reg': Lasso(),
            'Rigge Reg': Ridge()}

In [32]:
pred={}

for name,Model in Model.items():
    Model.fit(X_train,Y_train)
    y_pred=Model.predict(X_test)
    pred[name]=y_pred

In [34]:
acc={}

for name,y_pred in pred.items():
    mse=mean_squared_error(Y_test,y_pred)
    r2=r2_score(Y_test,y_pred)
    acc[name]=r2
    print(f"Results for {name}: ")
    print(f"Mean Square Error: {mse}")
    print(f"R2 Score: {r2}")
    print('\n')

Results for Random Forest Regressor: 
Mean Square Error: 1245360974878.247
R2 Score: 0.6591322700486851


Results for Gradient Boost Regressor: 
Mean Square Error: 1345170510563.1526
R2 Score: 0.6318134038382399


Results for XGRF Regressor: 
Mean Square Error: 1281594490880.0
R2 Score: 0.6492148041725159


Results for Lasso Reg: 
Mean Square Error: 1351485046095.2979
R2 Score: 0.6300850524317332


Results for Rigge Reg: 
Mean Square Error: 1353613587560.503
R2 Score: 0.6295024493857188




In [36]:
Model_Accuracy=pd.DataFrame.from_dict(acc,orient='index',columns=['Accuracy'])

In [38]:
Model_Accuracy.sort_values(by='Accuracy',ascending=False)

Unnamed: 0,Accuracy
Random Forest Regressor,0.659132
XGRF Regressor,0.649215
Gradient Boost Regressor,0.631813
Lasso Reg,0.630085
Rigge Reg,0.629502
