In [60]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import accuracy_score

In [61]:
housing_data = pd.read_csv('train.csv') #Using pandas to read the file
housing_data.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000


In [62]:
y = housing_data['SalePrice']  #y is the saleprice that I need to predict
features = ['LotArea','MSSubClass','PoolArea','MoSold','YrSold','MiscVal','OverallQual','OverallCond','YearBuilt','YearRemodAdd','1stFlrSF','2ndFlrSF','LowQualFinSF','GrLivArea','FullBath','HalfBath','BedroomAbvGr','KitchenAbvGr','TotRmsAbvGrd','Fireplaces','WoodDeckSF','OpenPorchSF','EnclosedPorch','3SsnPorch','ScreenPorch']
X = housing_data[features] #The components which affect the saleprice

In [63]:
train_X,val_X,train_y,val_y = train_test_split(X,y,random_state = 1) #splitting the data

In [64]:
rf_model = RandomForestRegressor()

In [65]:
print(train_X.shape)
print(val_X.shape)
print(train_y.shape)
print(val_y.shape)

(1095, 25)
(365, 25)
(1095,)
(365,)


In [66]:
rf_model.fit(X,y) #fitting the data

RandomForestRegressor()

In [67]:
print(val_X.head())

      LotArea  MSSubClass  PoolArea  MoSold  YrSold  MiscVal  OverallQual  \
258     12435          60         0       5    2008        0            7   
267      8400          75         0       7    2008        0            5   
288      9819          20         0       2    2010        0            5   
649      1936         180         0      12    2007        0            4   
1233    12160          20         0       5    2010        0            5   

      OverallCond  YearBuilt  YearRemodAdd  ...  HalfBath  BedroomAbvGr  \
258             5       2001          2001  ...         1             3   
267             8       1939          1997  ...         1             4   
288             5       1967          1967  ...         0             3   
649             6       1970          1970  ...         0             1   
1233            5       1959          1959  ...         0             3   

      KitchenAbvGr  TotRmsAbvGrd  Fireplaces  WoodDeckSF  OpenPorchSF  \
258          

In [68]:
rf_model.fit(train_X,train_y) #fitting the train data

RandomForestRegressor()

In [69]:
rf_val_pred = rf_model.predict(val_X).round()

In [70]:
print(rf_val_pred)

[209156. 167141. 116184.  80900. 143166. 297979. 293507. 145986. 222077.
 199907. 172190.  77785. 200646. 328524. 221756. 107628. 113067. 101263.
 184505. 139672. 142680. 121810. 269240. 321666.  90976. 178729. 127228.
 187941. 468272. 119412. 118697. 108981. 121936.  88921. 139692. 353079.
 122007.  99912. 257985. 122918. 155937. 146984. 100928. 120326. 182174.
 171384. 131927. 180946. 262022. 243854. 121332. 282313. 106360. 255618.
 195237. 103059. 123358. 174511. 129702. 177621. 163485. 298825.  97012.
 127527. 155259. 131000. 144758. 233017. 138920. 157985. 183110. 125882.
 301383. 157996. 150759. 215765. 172234. 126112. 372643. 188923. 207744.
 143799. 135581. 152628. 189357. 161672. 160030. 162134. 190092. 176722.
 197977. 163804. 120860. 117451. 116336. 114858. 117579. 140969. 140634.
 152221. 175674. 129373. 113730. 115860. 126820. 181457. 183949. 156218.
 149726. 313178. 147968. 162827. 147364. 199814. 226898. 173902. 244157.
 122822. 148737. 312452. 138153. 242613. 320064. 17

In [71]:
print(val_y)

258     231500
267     179500
288     122000
649      84500
1233    142000
         ...  
1017    187500
534     178000
1334    125000
1369    232000
628     135000
Name: SalePrice, Length: 365, dtype: int64


In [74]:
print(mean_absolute_error(rf_val_pred,val_y)) #calculating the mean absolute error

17974.70684931507


In [75]:
test_data = pd.read_csv('test.csv')

In [76]:
test_X = test_data[features]

In [77]:
test_pred = rf_model.predict(test_X) #predicting the given test data

In [78]:
output = pd.DataFrame({'Id':test_data.Id,'SalePrice':test_pred})

In [79]:
print(output)

        Id  SalePrice
0     1461  121510.66
1     1462  152109.50
2     1463  168545.78
3     1464  182204.85
4     1465  195369.68
...    ...        ...
1454  2915   87110.50
1455  2916   81682.50
1456  2917  157180.00
1457  2918  137234.60
1458  2919  234464.60

[1459 rows x 2 columns]


In [80]:
output.to_csv('submission.csv',index = 'False') #submission