In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import warnings
warnings.filterwarnings("ignore")


In [2]:
dataset=pd.read_csv("preprocessed_house_rent.csv")

In [3]:
dataset

Unnamed: 0,BHK,Rent,Size,Area Type,City,Furnishing Status,Tenant Preferred,Bathroom,Floor Number,Total Floors
0,2,10000,1100,2,4,2,1,2,0,2
1,2,20000,800,2,4,1,1,1,1,3
2,2,17000,1000,2,4,1,1,1,1,3
3,2,10000,800,2,4,2,1,1,1,2
4,2,7500,850,1,4,2,0,1,1,2
...,...,...,...,...,...,...,...,...,...,...
4741,2,15000,1000,1,3,1,1,2,3,5
4742,3,29000,2000,2,3,1,1,3,1,4
4743,3,35000,1750,1,3,1,1,3,3,5
4744,3,45000,1500,1,3,1,2,2,23,34


In [4]:
print(dataset.isnull().sum())

BHK                  0
Rent                 0
Size                 0
Area Type            0
City                 0
Furnishing Status    0
Tenant Preferred     0
Bathroom             0
Floor Number         0
Total Floors         0
dtype: int64


In [5]:
dataset.columns

Index(['BHK', 'Rent', 'Size', 'Area Type', 'City', 'Furnishing Status',
       'Tenant Preferred', 'Bathroom', 'Floor Number', 'Total Floors'],
      dtype='object')

In [6]:
independent=dataset[[ 'BHK', 'Size', 'Area Type', 'Furnishing Status',
       'Tenant Preferred', 'Bathroom', 'City', 'Floor Number','Total Floors']]

In [7]:
independent

Unnamed: 0,BHK,Size,Area Type,Furnishing Status,Tenant Preferred,Bathroom,City,Floor Number,Total Floors
0,2,1100,2,2,1,2,4,0,2
1,2,800,2,1,1,1,4,1,3
2,2,1000,2,1,1,1,4,1,3
3,2,800,2,2,1,1,4,1,2
4,2,850,1,2,0,1,4,1,2
...,...,...,...,...,...,...,...,...,...
4741,2,1000,1,1,1,2,3,3,5
4742,3,2000,2,1,1,3,3,1,4
4743,3,1750,1,1,1,3,3,3,5
4744,3,1500,1,1,2,2,3,23,34


In [8]:
dependent=dataset[['Rent']]

In [9]:
dependent

Unnamed: 0,Rent
0,10000
1,20000
2,17000
3,10000
4,7500
...,...
4741,15000
4742,29000
4743,35000
4744,45000


In [10]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(independent,dependent,test_size=0.30,random_state=0)

In [11]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [12]:
from sklearn.ensemble import RandomForestRegressor

In [13]:
from sklearn.model_selection import GridSearchCV
param_grid={'criterion':["squared_error","absolute_error","friedman_mse","poisson"],
'max_features':["sqrt","log2",None],
'n_estimators':[10,100]}
grid = GridSearchCV(RandomForestRegressor(),param_grid,refit=True,verbose=3,n_jobs=-1)
grid.fit(x_train, y_train)

Fitting 5 folds for each of 24 candidates, totalling 120 fits


In [14]:
re=grid.cv_results_
grid_predictions = grid.predict(x_test)
from sklearn.metrics import r2_score
r_score=r2_score(y_test,grid_predictions)
print("The R_score value for best parameter {}:".format(grid.best_params_),r_score)

The R_score value for best parameter {'criterion': 'absolute_error', 'max_features': 'log2', 'n_estimators': 100}: 0.2608566874094119


In [15]:
table=pd.DataFrame.from_dict(re)

In [16]:
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.105883,0.008823,0.008743,0.005251,squared_error,sqrt,10,"{'criterion': 'squared_error', 'max_features':...",0.737697,0.654741,0.641801,0.729065,0.616361,0.675933,0.048581,14
1,0.854695,0.016015,0.040892,0.005722,squared_error,sqrt,100,"{'criterion': 'squared_error', 'max_features':...",0.742697,0.716321,0.627382,0.725883,0.624778,0.687412,0.050791,10
2,0.101079,0.009622,0.006031,0.005813,squared_error,log2,10,"{'criterion': 'squared_error', 'max_features':...",0.719313,0.704784,0.606666,0.667014,0.63679,0.666913,0.041754,17
3,0.849757,0.011394,0.035637,0.00959,squared_error,log2,100,"{'criterion': 'squared_error', 'max_features':...",0.750534,0.716038,0.64629,0.732201,0.644245,0.697862,0.044313,6
4,0.152395,0.003451,0.004808,0.006413,squared_error,,10,"{'criterion': 'squared_error', 'max_features':...",0.705156,0.671411,0.651005,0.661189,0.595833,0.656919,0.035557,22
5,1.423783,0.01954,0.042496,0.006088,squared_error,,100,"{'criterion': 'squared_error', 'max_features':...",0.696108,0.707783,0.647077,0.725331,0.647011,0.684662,0.032093,11
6,0.850316,0.04828,0.009377,0.007656,absolute_error,sqrt,10,"{'criterion': 'absolute_error', 'max_features'...",0.720933,0.716313,0.629072,0.709074,0.672647,0.689608,0.034715,9
7,8.516732,0.170355,0.034879,0.006269,absolute_error,sqrt,100,"{'criterion': 'absolute_error', 'max_features'...",0.746819,0.750231,0.620908,0.727558,0.649844,0.699072,0.053369,3
8,0.884294,0.016584,0.011178,0.006149,absolute_error,log2,10,"{'criterion': 'absolute_error', 'max_features'...",0.708079,0.66574,0.605011,0.708626,0.632093,0.66391,0.041074,19
9,8.803843,0.317166,0.039961,0.007757,absolute_error,log2,100,"{'criterion': 'absolute_error', 'max_features'...",0.754303,0.736594,0.609486,0.754493,0.670092,0.704994,0.056956,1
