# Predict house pricing using advanced regression

#### Importing Necessary Libraries

In [1]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb
from sklearn.metrics import mean_squared_error, r2_score

pd.set_option("display.max_rows", 100)
pd.set_option("display.max_columns", 100)

#### Exploratory Data Analysis and Preparation of Train Data

In [2]:
df = pd.read_csv("train.csv")

In [3]:
df.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2003,2003,Gable,CompShg,VinylSd,VinylSd,BrkFace,196.0,Gd,TA,PConc,Gd,TA,No,GLQ,706,Unf,0,150,856,GasA,Ex,Y,SBrkr,856,854,0,1710,1,0,2,1,3,1,Gd,8,Typ,0,,Attchd,2003.0,RFn,2,548,TA,TA,Y,0,61,0,0,0,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,FR2,Gtl,Veenker,Feedr,Norm,1Fam,1Story,6,8,1976,1976,Gable,CompShg,MetalSd,MetalSd,,0.0,TA,TA,CBlock,Gd,TA,Gd,ALQ,978,Unf,0,284,1262,GasA,Ex,Y,SBrkr,1262,0,0,1262,0,1,2,0,3,1,TA,6,Typ,1,TA,Attchd,1976.0,RFn,2,460,TA,TA,Y,298,0,0,0,0,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2001,2002,Gable,CompShg,VinylSd,VinylSd,BrkFace,162.0,Gd,TA,PConc,Gd,TA,Mn,GLQ,486,Unf,0,434,920,GasA,Ex,Y,SBrkr,920,866,0,1786,1,0,2,1,3,1,Gd,6,Typ,1,TA,Attchd,2001.0,RFn,2,608,TA,TA,Y,0,42,0,0,0,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,Corner,Gtl,Crawfor,Norm,Norm,1Fam,2Story,7,5,1915,1970,Gable,CompShg,Wd Sdng,Wd Shng,,0.0,TA,TA,BrkTil,TA,Gd,No,ALQ,216,Unf,0,540,756,GasA,Gd,Y,SBrkr,961,756,0,1717,1,0,1,0,3,1,Gd,7,Typ,1,Gd,Detchd,1998.0,Unf,3,642,TA,TA,Y,0,35,272,0,0,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,FR2,Gtl,NoRidge,Norm,Norm,1Fam,2Story,8,5,2000,2000,Gable,CompShg,VinylSd,VinylSd,BrkFace,350.0,Gd,TA,PConc,Gd,TA,Av,GLQ,655,Unf,0,490,1145,GasA,Ex,Y,SBrkr,1145,1053,0,2198,1,0,2,1,4,1,Gd,9,Typ,1,TA,Attchd,2000.0,RFn,3,836,TA,TA,Y,192,84,0,0,0,0,,,,0,12,2008,WD,Normal,250000


In [4]:
df.shape

(1460, 81)

In [5]:
df.isnull().sum()

Id                  0
MSSubClass          0
MSZoning            0
LotFrontage       259
LotArea             0
Street              0
Alley            1369
LotShape            0
LandContour         0
Utilities           0
LotConfig           0
LandSlope           0
Neighborhood        0
Condition1          0
Condition2          0
BldgType            0
HouseStyle          0
OverallQual         0
OverallCond         0
YearBuilt           0
YearRemodAdd        0
RoofStyle           0
RoofMatl            0
Exterior1st         0
Exterior2nd         0
MasVnrType        872
MasVnrArea          8
ExterQual           0
ExterCond           0
Foundation          0
BsmtQual           37
BsmtCond           37
BsmtExposure       38
BsmtFinType1       37
BsmtFinSF1          0
BsmtFinType2       38
BsmtFinSF2          0
BsmtUnfSF           0
TotalBsmtSF         0
Heating             0
HeatingQC           0
CentralAir          0
Electrical          1
1stFlrSF            0
2ndFlrSF            0
LowQualFin

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 81 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             1460 non-null   int64  
 1   MSSubClass     1460 non-null   int64  
 2   MSZoning       1460 non-null   object 
 3   LotFrontage    1201 non-null   float64
 4   LotArea        1460 non-null   int64  
 5   Street         1460 non-null   object 
 6   Alley          91 non-null     object 
 7   LotShape       1460 non-null   object 
 8   LandContour    1460 non-null   object 
 9   Utilities      1460 non-null   object 
 10  LotConfig      1460 non-null   object 
 11  LandSlope      1460 non-null   object 
 12  Neighborhood   1460 non-null   object 
 13  Condition1     1460 non-null   object 
 14  Condition2     1460 non-null   object 
 15  BldgType       1460 non-null   object 
 16  HouseStyle     1460 non-null   object 
 17  OverallQual    1460 non-null   int64  
 18  OverallC

In [7]:
categoric_columns = [i for i in df.columns if df[i].dtype == "object"]
df_numeric = df.drop(columns=categoric_columns, axis=1)
df_categoric = df[categoric_columns]

In [8]:
df_numeric

Unnamed: 0,Id,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,TotRmsAbvGrd,Fireplaces,GarageYrBlt,GarageCars,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SalePrice
0,1,60,65.0,8450,7,5,2003,2003,196.0,706,0,150,856,856,854,0,1710,1,0,2,1,3,1,8,0,2003.0,2,548,0,61,0,0,0,0,0,2,2008,208500
1,2,20,80.0,9600,6,8,1976,1976,0.0,978,0,284,1262,1262,0,0,1262,0,1,2,0,3,1,6,1,1976.0,2,460,298,0,0,0,0,0,0,5,2007,181500
2,3,60,68.0,11250,7,5,2001,2002,162.0,486,0,434,920,920,866,0,1786,1,0,2,1,3,1,6,1,2001.0,2,608,0,42,0,0,0,0,0,9,2008,223500
3,4,70,60.0,9550,7,5,1915,1970,0.0,216,0,540,756,961,756,0,1717,1,0,1,0,3,1,7,1,1998.0,3,642,0,35,272,0,0,0,0,2,2006,140000
4,5,60,84.0,14260,8,5,2000,2000,350.0,655,0,490,1145,1145,1053,0,2198,1,0,2,1,4,1,9,1,2000.0,3,836,192,84,0,0,0,0,0,12,2008,250000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,1456,60,62.0,7917,6,5,1999,2000,0.0,0,0,953,953,953,694,0,1647,0,0,2,1,3,1,7,1,1999.0,2,460,0,40,0,0,0,0,0,8,2007,175000
1456,1457,20,85.0,13175,6,6,1978,1988,119.0,790,163,589,1542,2073,0,0,2073,1,0,2,0,3,1,7,2,1978.0,2,500,349,0,0,0,0,0,0,2,2010,210000
1457,1458,70,66.0,9042,7,9,1941,2006,0.0,275,0,877,1152,1188,1152,0,2340,0,0,2,0,4,1,9,2,1941.0,1,252,0,60,0,0,0,0,2500,5,2010,266500
1458,1459,20,68.0,9717,5,6,1950,1996,0.0,49,1029,0,1078,1078,0,0,1078,1,0,1,0,2,1,5,0,1950.0,1,240,366,0,112,0,0,0,0,4,2010,142125


In [9]:
df_categoric

Unnamed: 0,MSZoning,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinType2,Heating,HeatingQC,CentralAir,Electrical,KitchenQual,Functional,FireplaceQu,GarageType,GarageFinish,GarageQual,GarageCond,PavedDrive,PoolQC,Fence,MiscFeature,SaleType,SaleCondition
0,RL,Pave,,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,Gable,CompShg,VinylSd,VinylSd,BrkFace,Gd,TA,PConc,Gd,TA,No,GLQ,Unf,GasA,Ex,Y,SBrkr,Gd,Typ,,Attchd,RFn,TA,TA,Y,,,,WD,Normal
1,RL,Pave,,Reg,Lvl,AllPub,FR2,Gtl,Veenker,Feedr,Norm,1Fam,1Story,Gable,CompShg,MetalSd,MetalSd,,TA,TA,CBlock,Gd,TA,Gd,ALQ,Unf,GasA,Ex,Y,SBrkr,TA,Typ,TA,Attchd,RFn,TA,TA,Y,,,,WD,Normal
2,RL,Pave,,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,Gable,CompShg,VinylSd,VinylSd,BrkFace,Gd,TA,PConc,Gd,TA,Mn,GLQ,Unf,GasA,Ex,Y,SBrkr,Gd,Typ,TA,Attchd,RFn,TA,TA,Y,,,,WD,Normal
3,RL,Pave,,IR1,Lvl,AllPub,Corner,Gtl,Crawfor,Norm,Norm,1Fam,2Story,Gable,CompShg,Wd Sdng,Wd Shng,,TA,TA,BrkTil,TA,Gd,No,ALQ,Unf,GasA,Gd,Y,SBrkr,Gd,Typ,Gd,Detchd,Unf,TA,TA,Y,,,,WD,Abnorml
4,RL,Pave,,IR1,Lvl,AllPub,FR2,Gtl,NoRidge,Norm,Norm,1Fam,2Story,Gable,CompShg,VinylSd,VinylSd,BrkFace,Gd,TA,PConc,Gd,TA,Av,GLQ,Unf,GasA,Ex,Y,SBrkr,Gd,Typ,TA,Attchd,RFn,TA,TA,Y,,,,WD,Normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,RL,Pave,,Reg,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,Gable,CompShg,VinylSd,VinylSd,,TA,TA,PConc,Gd,TA,No,Unf,Unf,GasA,Ex,Y,SBrkr,TA,Typ,TA,Attchd,RFn,TA,TA,Y,,,,WD,Normal
1456,RL,Pave,,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,1Story,Gable,CompShg,Plywood,Plywood,Stone,TA,TA,CBlock,Gd,TA,No,ALQ,Rec,GasA,TA,Y,SBrkr,TA,Min1,TA,Attchd,Unf,TA,TA,Y,,MnPrv,,WD,Normal
1457,RL,Pave,,Reg,Lvl,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,2Story,Gable,CompShg,CemntBd,CmentBd,,Ex,Gd,Stone,TA,Gd,No,GLQ,Unf,GasA,Ex,Y,SBrkr,Gd,Typ,Gd,Attchd,RFn,TA,TA,Y,,GdPrv,Shed,WD,Normal
1458,RL,Pave,,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,Hip,CompShg,MetalSd,MetalSd,,TA,TA,CBlock,TA,TA,Mn,GLQ,Rec,GasA,Gd,Y,FuseA,Gd,Typ,,Attchd,Unf,TA,TA,Y,,,,WD,Normal


In [10]:
df.corr(numeric_only=True)["SalePrice"]

Id              -0.021917
MSSubClass      -0.084284
LotFrontage      0.351799
LotArea          0.263843
OverallQual      0.790982
OverallCond     -0.077856
YearBuilt        0.522897
YearRemodAdd     0.507101
MasVnrArea       0.477493
BsmtFinSF1       0.386420
BsmtFinSF2      -0.011378
BsmtUnfSF        0.214479
TotalBsmtSF      0.613581
1stFlrSF         0.605852
2ndFlrSF         0.319334
LowQualFinSF    -0.025606
GrLivArea        0.708624
BsmtFullBath     0.227122
BsmtHalfBath    -0.016844
FullBath         0.560664
HalfBath         0.284108
BedroomAbvGr     0.168213
KitchenAbvGr    -0.135907
TotRmsAbvGrd     0.533723
Fireplaces       0.466929
GarageYrBlt      0.486362
GarageCars       0.640409
GarageArea       0.623431
WoodDeckSF       0.324413
OpenPorchSF      0.315856
EnclosedPorch   -0.128578
3SsnPorch        0.044584
ScreenPorch      0.111447
PoolArea         0.092404
MiscVal         -0.021190
MoSold           0.046432
YrSold          -0.028923
SalePrice        1.000000
Name: SalePr

In [11]:
correlation = df.corr(numeric_only=True)["SalePrice"]
important_numeric_columns = correlation[abs(correlation) > 0.20].index.tolist()

In [12]:
important_numeric_columns

['LotFrontage',
 'LotArea',
 'OverallQual',
 'YearBuilt',
 'YearRemodAdd',
 'MasVnrArea',
 'BsmtFinSF1',
 'BsmtUnfSF',
 'TotalBsmtSF',
 '1stFlrSF',
 '2ndFlrSF',
 'GrLivArea',
 'BsmtFullBath',
 'FullBath',
 'HalfBath',
 'TotRmsAbvGrd',
 'Fireplaces',
 'GarageYrBlt',
 'GarageCars',
 'GarageArea',
 'WoodDeckSF',
 'OpenPorchSF',
 'SalePrice']

In [13]:
df_important_numeric = df_numeric[important_numeric_columns]

In [14]:
df_important_numeric.head()

Unnamed: 0,LotFrontage,LotArea,OverallQual,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtUnfSF,TotalBsmtSF,1stFlrSF,2ndFlrSF,GrLivArea,BsmtFullBath,FullBath,HalfBath,TotRmsAbvGrd,Fireplaces,GarageYrBlt,GarageCars,GarageArea,WoodDeckSF,OpenPorchSF,SalePrice
0,65.0,8450,7,2003,2003,196.0,706,150,856,856,854,1710,1,2,1,8,0,2003.0,2,548,0,61,208500
1,80.0,9600,6,1976,1976,0.0,978,284,1262,1262,0,1262,0,2,0,6,1,1976.0,2,460,298,0,181500
2,68.0,11250,7,2001,2002,162.0,486,434,920,920,866,1786,1,2,1,6,1,2001.0,2,608,0,42,223500
3,60.0,9550,7,1915,1970,0.0,216,540,756,961,756,1717,1,1,0,7,1,1998.0,3,642,0,35,140000
4,84.0,14260,8,2000,2000,350.0,655,490,1145,1145,1053,2198,1,2,1,9,1,2000.0,3,836,192,84,250000


In [15]:
df_important_numeric.isnull().sum()

LotFrontage     259
LotArea           0
OverallQual       0
YearBuilt         0
YearRemodAdd      0
MasVnrArea        8
BsmtFinSF1        0
BsmtUnfSF         0
TotalBsmtSF       0
1stFlrSF          0
2ndFlrSF          0
GrLivArea         0
BsmtFullBath      0
FullBath          0
HalfBath          0
TotRmsAbvGrd      0
Fireplaces        0
GarageYrBlt      81
GarageCars        0
GarageArea        0
WoodDeckSF        0
OpenPorchSF       0
SalePrice         0
dtype: int64

In [16]:
df_important_numeric["LotFrontage"]

0       65.0
1       80.0
2       68.0
3       60.0
4       84.0
        ... 
1455    62.0
1456    85.0
1457    66.0
1458    68.0
1459    75.0
Name: LotFrontage, Length: 1460, dtype: float64

In [17]:
df_important_numeric["GarageYrBlt"]

0       2003.0
1       1976.0
2       2001.0
3       1998.0
4       2000.0
         ...  
1455    1999.0
1456    1978.0
1457    1941.0
1458    1950.0
1459    1965.0
Name: GarageYrBlt, Length: 1460, dtype: float64

In [18]:
imputer = SimpleImputer(strategy="most_frequent")
df_important_numeric = pd.DataFrame(imputer.fit_transform(df_important_numeric), columns=df_important_numeric.columns)

In [19]:
df_important_numeric.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 23 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   LotFrontage   1460 non-null   float64
 1   LotArea       1460 non-null   float64
 2   OverallQual   1460 non-null   float64
 3   YearBuilt     1460 non-null   float64
 4   YearRemodAdd  1460 non-null   float64
 5   MasVnrArea    1460 non-null   float64
 6   BsmtFinSF1    1460 non-null   float64
 7   BsmtUnfSF     1460 non-null   float64
 8   TotalBsmtSF   1460 non-null   float64
 9   1stFlrSF      1460 non-null   float64
 10  2ndFlrSF      1460 non-null   float64
 11  GrLivArea     1460 non-null   float64
 12  BsmtFullBath  1460 non-null   float64
 13  FullBath      1460 non-null   float64
 14  HalfBath      1460 non-null   float64
 15  TotRmsAbvGrd  1460 non-null   float64
 16  Fireplaces    1460 non-null   float64
 17  GarageYrBlt   1460 non-null   float64
 18  GarageCars    1460 non-null 

In [20]:
df_categoric.head()

Unnamed: 0,MSZoning,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinType2,Heating,HeatingQC,CentralAir,Electrical,KitchenQual,Functional,FireplaceQu,GarageType,GarageFinish,GarageQual,GarageCond,PavedDrive,PoolQC,Fence,MiscFeature,SaleType,SaleCondition
0,RL,Pave,,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,Gable,CompShg,VinylSd,VinylSd,BrkFace,Gd,TA,PConc,Gd,TA,No,GLQ,Unf,GasA,Ex,Y,SBrkr,Gd,Typ,,Attchd,RFn,TA,TA,Y,,,,WD,Normal
1,RL,Pave,,Reg,Lvl,AllPub,FR2,Gtl,Veenker,Feedr,Norm,1Fam,1Story,Gable,CompShg,MetalSd,MetalSd,,TA,TA,CBlock,Gd,TA,Gd,ALQ,Unf,GasA,Ex,Y,SBrkr,TA,Typ,TA,Attchd,RFn,TA,TA,Y,,,,WD,Normal
2,RL,Pave,,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,Gable,CompShg,VinylSd,VinylSd,BrkFace,Gd,TA,PConc,Gd,TA,Mn,GLQ,Unf,GasA,Ex,Y,SBrkr,Gd,Typ,TA,Attchd,RFn,TA,TA,Y,,,,WD,Normal
3,RL,Pave,,IR1,Lvl,AllPub,Corner,Gtl,Crawfor,Norm,Norm,1Fam,2Story,Gable,CompShg,Wd Sdng,Wd Shng,,TA,TA,BrkTil,TA,Gd,No,ALQ,Unf,GasA,Gd,Y,SBrkr,Gd,Typ,Gd,Detchd,Unf,TA,TA,Y,,,,WD,Abnorml
4,RL,Pave,,IR1,Lvl,AllPub,FR2,Gtl,NoRidge,Norm,Norm,1Fam,2Story,Gable,CompShg,VinylSd,VinylSd,BrkFace,Gd,TA,PConc,Gd,TA,Av,GLQ,Unf,GasA,Ex,Y,SBrkr,Gd,Typ,TA,Attchd,RFn,TA,TA,Y,,,,WD,Normal


In [21]:
for i in df_categoric.columns:
    print(i, df[i].unique())

MSZoning ['RL' 'RM' 'C (all)' 'FV' 'RH']
Street ['Pave' 'Grvl']
Alley [nan 'Grvl' 'Pave']
LotShape ['Reg' 'IR1' 'IR2' 'IR3']
LandContour ['Lvl' 'Bnk' 'Low' 'HLS']
Utilities ['AllPub' 'NoSeWa']
LotConfig ['Inside' 'FR2' 'Corner' 'CulDSac' 'FR3']
LandSlope ['Gtl' 'Mod' 'Sev']
Neighborhood ['CollgCr' 'Veenker' 'Crawfor' 'NoRidge' 'Mitchel' 'Somerst' 'NWAmes'
 'OldTown' 'BrkSide' 'Sawyer' 'NridgHt' 'NAmes' 'SawyerW' 'IDOTRR'
 'MeadowV' 'Edwards' 'Timber' 'Gilbert' 'StoneBr' 'ClearCr' 'NPkVill'
 'Blmngtn' 'BrDale' 'SWISU' 'Blueste']
Condition1 ['Norm' 'Feedr' 'PosN' 'Artery' 'RRAe' 'RRNn' 'RRAn' 'PosA' 'RRNe']
Condition2 ['Norm' 'Artery' 'RRNn' 'Feedr' 'PosN' 'PosA' 'RRAn' 'RRAe']
BldgType ['1Fam' '2fmCon' 'Duplex' 'TwnhsE' 'Twnhs']
HouseStyle ['2Story' '1Story' '1.5Fin' '1.5Unf' 'SFoyer' 'SLvl' '2.5Unf' '2.5Fin']
RoofStyle ['Gable' 'Hip' 'Gambrel' 'Mansard' 'Flat' 'Shed']
RoofMatl ['CompShg' 'WdShngl' 'Metal' 'WdShake' 'Membran' 'Tar&Grv' 'Roll'
 'ClyTile']
Exterior1st ['VinylSd' 'MetalSd'

In [22]:
df_categoric.nunique()

MSZoning          5
Street            2
Alley             2
LotShape          4
LandContour       4
Utilities         2
LotConfig         5
LandSlope         3
Neighborhood     25
Condition1        9
Condition2        8
BldgType          5
HouseStyle        8
RoofStyle         6
RoofMatl          8
Exterior1st      15
Exterior2nd      16
MasVnrType        3
ExterQual         4
ExterCond         5
Foundation        6
BsmtQual          4
BsmtCond          4
BsmtExposure      4
BsmtFinType1      6
BsmtFinType2      6
Heating           6
HeatingQC         5
CentralAir        2
Electrical        5
KitchenQual       4
Functional        7
FireplaceQu       5
GarageType        6
GarageFinish      3
GarageQual        5
GarageCond        5
PavedDrive        3
PoolQC            3
Fence             4
MiscFeature       4
SaleType          9
SaleCondition     6
dtype: int64

In [23]:
df_categoric.isnull().sum()

MSZoning            0
Street              0
Alley            1369
LotShape            0
LandContour         0
Utilities           0
LotConfig           0
LandSlope           0
Neighborhood        0
Condition1          0
Condition2          0
BldgType            0
HouseStyle          0
RoofStyle           0
RoofMatl            0
Exterior1st         0
Exterior2nd         0
MasVnrType        872
ExterQual           0
ExterCond           0
Foundation          0
BsmtQual           37
BsmtCond           37
BsmtExposure       38
BsmtFinType1       37
BsmtFinType2       38
Heating             0
HeatingQC           0
CentralAir          0
Electrical          1
KitchenQual         0
Functional          0
FireplaceQu       690
GarageType         81
GarageFinish       81
GarageQual         81
GarageCond         81
PavedDrive          0
PoolQC           1453
Fence            1179
MiscFeature      1406
SaleType            0
SaleCondition       0
dtype: int64

In [24]:
df_categoric = pd.get_dummies(df_categoric, drop_first=True)

In [25]:
df_categoric.head()

Unnamed: 0,MSZoning_FV,MSZoning_RH,MSZoning_RL,MSZoning_RM,Street_Pave,Alley_Pave,LotShape_IR2,LotShape_IR3,LotShape_Reg,LandContour_HLS,LandContour_Low,LandContour_Lvl,Utilities_NoSeWa,LotConfig_CulDSac,LotConfig_FR2,LotConfig_FR3,LotConfig_Inside,LandSlope_Mod,LandSlope_Sev,Neighborhood_Blueste,Neighborhood_BrDale,Neighborhood_BrkSide,Neighborhood_ClearCr,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NPkVill,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,Condition1_Feedr,Condition1_Norm,Condition1_PosA,Condition1_PosN,Condition1_RRAe,Condition1_RRAn,Condition1_RRNe,...,KitchenQual_Gd,KitchenQual_TA,Functional_Maj2,Functional_Min1,Functional_Min2,Functional_Mod,Functional_Sev,Functional_Typ,FireplaceQu_Fa,FireplaceQu_Gd,FireplaceQu_Po,FireplaceQu_TA,GarageType_Attchd,GarageType_Basment,GarageType_BuiltIn,GarageType_CarPort,GarageType_Detchd,GarageFinish_RFn,GarageFinish_Unf,GarageQual_Fa,GarageQual_Gd,GarageQual_Po,GarageQual_TA,GarageCond_Fa,GarageCond_Gd,GarageCond_Po,GarageCond_TA,PavedDrive_P,PavedDrive_Y,PoolQC_Fa,PoolQC_Gd,Fence_GdWo,Fence_MnPrv,Fence_MnWw,MiscFeature_Othr,MiscFeature_Shed,MiscFeature_TenC,SaleType_CWD,SaleType_Con,SaleType_ConLD,SaleType_ConLI,SaleType_ConLw,SaleType_New,SaleType_Oth,SaleType_WD,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial
0,False,False,True,False,True,False,False,False,True,False,False,True,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,...,True,False,False,False,False,False,False,True,False,False,False,False,True,False,False,False,False,True,False,False,False,False,True,False,False,False,True,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,True,False
1,False,False,True,False,True,False,False,False,True,False,False,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,False,False,False,False,False,False,...,False,True,False,False,False,False,False,True,False,False,False,True,True,False,False,False,False,True,False,False,False,False,True,False,False,False,True,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,True,False
2,False,False,True,False,True,False,False,False,False,False,False,True,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,...,True,False,False,False,False,False,False,True,False,False,False,True,True,False,False,False,False,True,False,False,False,False,True,False,False,False,True,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,True,False
3,False,False,True,False,True,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,...,True,False,False,False,False,False,False,True,False,True,False,False,False,False,False,False,True,False,True,False,False,False,True,False,False,False,True,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False
4,False,False,True,False,True,False,False,False,False,False,False,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,...,True,False,False,False,False,False,False,True,False,False,False,True,True,False,False,False,False,True,False,False,False,False,True,False,False,False,True,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,True,False


In [26]:
df_categoric.shape

(1460, 208)

In [27]:
df_categoric.head()

Unnamed: 0,MSZoning_FV,MSZoning_RH,MSZoning_RL,MSZoning_RM,Street_Pave,Alley_Pave,LotShape_IR2,LotShape_IR3,LotShape_Reg,LandContour_HLS,LandContour_Low,LandContour_Lvl,Utilities_NoSeWa,LotConfig_CulDSac,LotConfig_FR2,LotConfig_FR3,LotConfig_Inside,LandSlope_Mod,LandSlope_Sev,Neighborhood_Blueste,Neighborhood_BrDale,Neighborhood_BrkSide,Neighborhood_ClearCr,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NPkVill,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,Condition1_Feedr,Condition1_Norm,Condition1_PosA,Condition1_PosN,Condition1_RRAe,Condition1_RRAn,Condition1_RRNe,...,KitchenQual_Gd,KitchenQual_TA,Functional_Maj2,Functional_Min1,Functional_Min2,Functional_Mod,Functional_Sev,Functional_Typ,FireplaceQu_Fa,FireplaceQu_Gd,FireplaceQu_Po,FireplaceQu_TA,GarageType_Attchd,GarageType_Basment,GarageType_BuiltIn,GarageType_CarPort,GarageType_Detchd,GarageFinish_RFn,GarageFinish_Unf,GarageQual_Fa,GarageQual_Gd,GarageQual_Po,GarageQual_TA,GarageCond_Fa,GarageCond_Gd,GarageCond_Po,GarageCond_TA,PavedDrive_P,PavedDrive_Y,PoolQC_Fa,PoolQC_Gd,Fence_GdWo,Fence_MnPrv,Fence_MnWw,MiscFeature_Othr,MiscFeature_Shed,MiscFeature_TenC,SaleType_CWD,SaleType_Con,SaleType_ConLD,SaleType_ConLI,SaleType_ConLw,SaleType_New,SaleType_Oth,SaleType_WD,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial
0,False,False,True,False,True,False,False,False,True,False,False,True,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,...,True,False,False,False,False,False,False,True,False,False,False,False,True,False,False,False,False,True,False,False,False,False,True,False,False,False,True,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,True,False
1,False,False,True,False,True,False,False,False,True,False,False,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,False,False,False,False,False,False,...,False,True,False,False,False,False,False,True,False,False,False,True,True,False,False,False,False,True,False,False,False,False,True,False,False,False,True,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,True,False
2,False,False,True,False,True,False,False,False,False,False,False,True,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,...,True,False,False,False,False,False,False,True,False,False,False,True,True,False,False,False,False,True,False,False,False,False,True,False,False,False,True,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,True,False
3,False,False,True,False,True,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,...,True,False,False,False,False,False,False,True,False,True,False,False,False,False,False,False,True,False,True,False,False,False,True,False,False,False,True,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False
4,False,False,True,False,True,False,False,False,False,False,False,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,...,True,False,False,False,False,False,False,True,False,False,False,True,True,False,False,False,False,True,False,False,False,False,True,False,False,False,True,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,True,False


In [28]:
df_categoric["SalePrice"] = df["SalePrice"]

In [29]:
df_categoric.corr()

Unnamed: 0,MSZoning_FV,MSZoning_RH,MSZoning_RL,MSZoning_RM,Street_Pave,Alley_Pave,LotShape_IR2,LotShape_IR3,LotShape_Reg,LandContour_HLS,LandContour_Low,LandContour_Lvl,Utilities_NoSeWa,LotConfig_CulDSac,LotConfig_FR2,LotConfig_FR3,LotConfig_Inside,LandSlope_Mod,LandSlope_Sev,Neighborhood_Blueste,Neighborhood_BrDale,Neighborhood_BrkSide,Neighborhood_ClearCr,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NPkVill,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,Condition1_Feedr,Condition1_Norm,Condition1_PosA,Condition1_PosN,Condition1_RRAe,Condition1_RRAn,Condition1_RRNe,...,KitchenQual_TA,Functional_Maj2,Functional_Min1,Functional_Min2,Functional_Mod,Functional_Sev,Functional_Typ,FireplaceQu_Fa,FireplaceQu_Gd,FireplaceQu_Po,FireplaceQu_TA,GarageType_Attchd,GarageType_Basment,GarageType_BuiltIn,GarageType_CarPort,GarageType_Detchd,GarageFinish_RFn,GarageFinish_Unf,GarageQual_Fa,GarageQual_Gd,GarageQual_Po,GarageQual_TA,GarageCond_Fa,GarageCond_Gd,GarageCond_Po,GarageCond_TA,PavedDrive_P,PavedDrive_Y,PoolQC_Fa,PoolQC_Gd,Fence_GdWo,Fence_MnPrv,Fence_MnWw,MiscFeature_Othr,MiscFeature_Shed,MiscFeature_TenC,SaleType_CWD,SaleType_Con,SaleType_ConLD,SaleType_ConLI,SaleType_ConLw,SaleType_New,SaleType_Oth,SaleType_WD,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial,SalePrice
MSZoning_FV,1.000000,-0.022722,-0.416608,-0.090435,0.013866,0.445741,0.023612,-0.017926,0.074564,-0.040649,-0.034321,0.072772,-0.005651,-0.043094,0.017075,0.052219,0.045621,-0.046595,-0.020460,-0.007995,-0.022722,-0.043905,-0.030184,-0.073043,-0.041068,-0.058533,-0.051628,-0.034807,-0.023429,-0.040226,-0.092136,-0.017000,-0.049521,-0.036692,-0.050934,-0.062521,-0.028491,-0.049877,-0.044297,0.862807,-0.028491,-0.035287,-0.018808,-0.052315,0.076342,-0.016023,-0.024786,-0.018808,-0.003956,-0.007995,...,-0.190774,-0.012654,-0.031793,-0.033331,-0.021993,-0.005651,0.058533,-0.010483,-0.006946,-0.025439,-0.056117,0.008575,-0.024786,-0.012807,-0.017000,0.035894,0.082139,-0.080458,-0.039799,-0.021240,-0.009795,0.072772,-0.033830,-0.017000,-0.014983,0.068620,-0.031265,0.064596,-0.007995,-0.009795,-0.024707,-0.074929,-0.018808,-0.007995,-0.040226,-0.005651,0.052219,0.081793,-0.017000,-0.012654,-0.012654,0.198831,-0.009795,-0.151069,-0.011314,-0.019651,-0.025439,-0.124064,0.195066,0.089950
MSZoning_RH,-0.022722,1.000000,-0.203158,-0.044101,0.006762,0.061752,0.021930,-0.008742,0.039092,0.052529,-0.016737,-0.094910,-0.002756,-0.027613,-0.019198,-0.005517,0.036231,0.009176,-0.009977,-0.003899,-0.011080,-0.021410,-0.014719,-0.035619,0.051637,0.023548,-0.025176,-0.016974,-0.011425,-0.019616,-0.008487,-0.008290,-0.024149,-0.017893,-0.024838,-0.030488,0.239668,-0.024323,0.145444,-0.026335,-0.013894,-0.017208,-0.009171,0.060708,-0.034599,-0.007813,-0.012087,-0.009171,-0.014174,-0.003899,...,0.051912,-0.006171,-0.015504,-0.016254,-0.010725,-0.002756,0.028543,-0.016007,-0.062439,-0.012405,-0.038957,-0.087603,-0.012087,-0.026659,0.075763,0.041124,-0.067117,0.045005,0.017487,-0.010358,-0.004776,-0.073177,0.026513,-0.008290,-0.007306,-0.080472,0.031128,-0.112220,-0.003899,-0.004776,-0.020629,0.005935,-0.009171,-0.003899,-0.019616,-0.002756,-0.005517,-0.003899,-0.008290,-0.006171,-0.006171,-0.031785,-0.004776,0.021659,-0.005517,-0.009583,-0.012405,-0.036497,-0.032210,-0.065429
MSZoning_RL,-0.416608,-0.203158,1.000000,-0.808585,0.045348,-0.267166,0.027175,0.022698,-0.268764,0.023808,0.060758,-0.002963,0.013565,0.129087,0.018499,-0.069080,-0.094730,0.022413,0.049111,-0.071482,-0.203158,-0.152169,0.072452,0.120101,0.052913,0.074114,0.123925,-0.311212,-0.209483,0.050003,0.211868,0.040806,0.118868,0.088073,0.114756,-0.452328,0.003762,0.104434,0.063753,-0.333293,0.068389,0.084700,0.045144,0.008374,0.017904,0.038459,0.059496,-0.013029,0.031731,0.019190,...,-0.075263,-0.027031,0.006525,0.002178,0.002904,0.013565,0.018824,0.056230,0.074231,-0.011066,0.176690,0.304561,0.000314,0.081903,-0.023463,-0.323280,0.156517,-0.197274,-0.148965,-0.000636,-0.050548,0.179828,-0.105154,0.019383,-0.061136,0.148910,-0.066790,0.229569,0.019190,0.023511,0.021579,0.001235,0.006362,-0.026146,0.022071,0.013565,-0.037001,-0.026146,-0.044886,0.001671,-0.027031,0.004972,0.023511,0.015608,0.027158,-0.008548,-0.025492,0.046098,0.002730,0.245063
MSZoning_RM,-0.090435,-0.044101,-0.808585,1.000000,-0.003127,0.010214,-0.047949,-0.011491,0.242838,-0.026057,-0.066614,0.007923,-0.010968,-0.109902,-0.021970,0.051574,0.068184,-0.025210,-0.039710,0.088403,0.251252,0.209974,-0.058583,-0.078471,-0.048306,-0.052738,-0.100204,0.274832,0.259074,-0.024719,-0.178824,-0.032996,-0.096115,-0.071214,-0.090258,0.561881,-0.055298,-0.079283,-0.085975,-0.104815,-0.055298,-0.068487,-0.036503,-0.009189,-0.051070,-0.031098,-0.048107,0.030171,-0.027351,-0.015517,...,0.170098,0.041233,0.018280,0.024508,0.014490,-0.010968,-0.068997,-0.050781,-0.055798,0.033294,-0.148613,-0.305097,0.002764,-0.073804,-0.008442,0.323147,-0.207779,0.256173,0.192197,0.017937,0.065869,-0.220623,0.122799,-0.008442,0.082207,-0.173012,0.088332,-0.238478,-0.015517,-0.019011,-0.010825,0.034477,0.007946,0.036443,-0.014048,-0.010968,0.014807,-0.015517,0.016111,0.008337,0.041233,-0.105674,-0.019011,0.078404,-0.021959,0.004432,0.049827,0.050686,-0.100724,-0.288065
Street_Pave,0.013866,0.006762,0.045348,-0.003127,1.000000,0.010919,-0.053886,0.005335,-0.004414,-0.046774,-0.058824,0.119812,0.001682,0.016851,0.011716,0.003367,0.007713,-0.141864,-0.107881,0.002379,0.006762,0.013066,0.008983,0.021737,0.012221,-0.024967,0.015364,-0.194010,0.006972,0.011971,0.027419,0.005059,0.014737,0.010919,0.015158,0.018606,0.008479,0.014843,0.013183,0.016071,0.008479,-0.123987,0.005597,0.015569,0.005545,0.004768,0.007376,0.005597,0.008650,0.002379,...,-0.042386,0.003766,0.009461,0.009919,0.006545,0.001682,-0.017419,0.009769,0.013704,0.007571,0.007469,0.056188,-0.087092,0.016269,-0.131726,0.014322,-0.006277,0.010569,0.011844,0.006321,0.002915,0.013711,0.010067,0.005059,0.004459,0.016662,0.009304,0.019757,0.002379,0.002915,0.012589,0.022298,0.005597,0.002379,-0.166372,0.001682,0.003367,0.002379,-0.131726,0.003766,0.003766,0.019397,0.002915,0.006539,0.003367,-0.112734,0.007571,-0.002140,0.019657,0.041036
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
SaleCondition_Alloca,-0.019651,-0.009583,-0.008548,0.004432,-0.112734,-0.015474,-0.015474,-0.007560,0.037744,0.024571,-0.014474,-0.119671,-0.002383,0.037938,-0.016603,-0.004772,0.022881,0.053913,-0.008629,-0.003372,-0.009583,-0.018516,-0.012730,-0.030805,0.065316,0.035382,-0.021773,0.033590,-0.009881,0.025158,-0.038857,-0.007170,-0.020885,-0.015474,-0.021480,0.002022,-0.012016,0.013550,0.135415,-0.022775,-0.012016,-0.014882,-0.007932,0.077357,-0.029922,-0.006757,-0.010453,-0.007932,-0.012258,-0.003372,...,0.060068,-0.005337,-0.013408,-0.014057,-0.009275,-0.002383,-0.005348,-0.013844,-0.019421,-0.010729,-0.010585,-0.033248,0.123422,-0.023055,0.089753,-0.003108,-0.041310,0.062021,0.025760,0.068887,-0.004131,-0.044490,-0.014267,0.089753,-0.006319,-0.023612,-0.013186,-0.000378,-0.003372,0.163397,-0.017841,-0.007112,-0.007932,-0.003372,0.025158,-0.002383,-0.004772,-0.003372,-0.007170,-0.005337,-0.005337,-0.027489,-0.004131,0.035530,-0.004772,1.000000,-0.010729,-0.194663,-0.027856,-0.015525
SaleCondition_Family,-0.025439,-0.012405,-0.025492,0.049827,0.007571,-0.020032,-0.020032,0.061658,0.028480,0.042610,-0.018738,0.000800,-0.003085,-0.030915,-0.021494,-0.006177,-0.005397,0.003131,0.051556,-0.004365,0.044194,0.006199,-0.016479,-0.039879,0.041765,-0.031957,0.023906,-0.019003,-0.012792,0.043475,0.014979,-0.009282,0.054074,-0.020032,-0.027808,0.054071,-0.015555,-0.000368,-0.024185,-0.029484,-0.015555,-0.019265,-0.010268,-0.028562,0.012677,-0.008748,-0.013533,-0.010268,-0.015869,-0.004365,...,0.034549,-0.006909,0.023517,-0.018198,-0.012007,-0.003085,0.008628,-0.017922,0.010670,-0.013889,-0.004130,-0.011021,-0.013533,-0.005088,-0.009282,0.022678,-0.036147,0.044406,0.011317,-0.011596,-0.005348,0.020265,0.020053,-0.009282,-0.008180,0.017055,-0.017070,0.035267,-0.004365,-0.005348,0.039349,0.016155,0.057876,-0.004365,-0.021962,-0.003085,0.106555,-0.004365,-0.009282,-0.006909,-0.006909,-0.035587,-0.005348,0.028599,-0.006177,-0.010729,1.000000,-0.252006,-0.036062,-0.046480
SaleCondition_Normal,-0.124064,-0.036497,0.046098,0.050686,-0.002140,-0.060962,0.014667,-0.026089,-0.003730,-0.059157,0.028317,0.036921,-0.055982,0.013589,0.034728,0.024512,-0.004839,0.023058,-0.031676,0.017320,-0.019352,0.058563,0.013335,0.034790,0.011200,-0.000387,-0.006496,-0.015449,0.034120,0.017772,0.066130,0.014026,0.033578,0.036275,-0.145199,0.008537,0.020450,0.051097,0.014391,-0.163502,-0.062103,-0.035659,0.040746,0.004176,-0.015005,0.034712,0.022200,-0.021179,-0.004511,0.017320,...,0.128151,-0.003139,0.019353,0.048540,0.029946,-0.055982,-0.056145,0.035089,-0.109053,0.039757,0.126863,-0.006826,-0.056547,-0.054061,-0.031583,0.082693,-0.016818,0.118008,0.006143,0.009384,-0.018197,0.025128,0.038284,-0.008778,-0.019221,0.018259,0.055153,-0.035966,0.017320,-0.057614,0.006530,0.035573,-0.000537,0.017320,0.047504,0.012243,-0.043784,0.017320,-0.031583,-0.003139,0.027414,-0.645698,-0.097031,0.634322,-0.112080,-0.194663,-0.252006,1.000000,-0.654323,-0.153990
SaleCondition_Partial,0.195066,-0.032210,0.002730,-0.100724,0.019657,0.051709,0.007257,0.033950,-0.041636,0.090444,-0.032868,-0.010053,-0.008011,-0.020426,-0.014201,-0.016038,0.037814,-0.054183,-0.002945,-0.011333,-0.032210,-0.062238,-0.024939,0.041583,-0.044884,-0.044209,0.056659,-0.049342,-0.033213,-0.043430,-0.130609,-0.024099,-0.070200,-0.052013,0.278268,-0.070306,-0.040389,-0.070705,-0.050363,0.266552,0.110568,0.057604,-0.026661,-0.031386,0.036476,-0.022713,0.008063,0.001648,0.032835,-0.011333,...,-0.273826,-0.017938,-0.045069,-0.047249,-0.031176,-0.008011,0.082975,-0.046533,0.242494,-0.036062,-0.135988,0.117299,-0.013537,0.117935,-0.024099,-0.156037,0.085699,-0.232554,-0.056418,-0.004989,-0.013885,0.078899,-0.047956,-0.024099,-0.021239,0.080316,-0.044321,0.082657,-0.011333,0.040174,-0.059968,-0.106217,-0.026661,-0.011333,-0.057023,-0.008011,-0.016038,-0.011333,0.007176,-0.017938,-0.017938,0.986819,-0.013885,-0.769559,-0.016038,-0.027856,-0.036062,-0.654323,1.000000,0.352060


In [30]:
correlation2 = df_categoric.corr(numeric_only=True)["SalePrice"]
important_categoric_columns = correlation2[abs(correlation2) > 0.20].index.tolist()

In [31]:
important_categoric_columns

['MSZoning_RL',
 'MSZoning_RM',
 'LotShape_Reg',
 'Neighborhood_NoRidge',
 'Neighborhood_NridgHt',
 'Neighborhood_StoneBr',
 'HouseStyle_2Story',
 'RoofStyle_Gable',
 'RoofStyle_Hip',
 'Exterior1st_VinylSd',
 'Exterior2nd_VinylSd',
 'MasVnrType_Stone',
 'ExterQual_Gd',
 'ExterQual_TA',
 'Foundation_CBlock',
 'Foundation_PConc',
 'BsmtQual_Gd',
 'BsmtQual_TA',
 'BsmtExposure_Gd',
 'BsmtExposure_No',
 'BsmtFinType1_GLQ',
 'HeatingQC_TA',
 'CentralAir_Y',
 'Electrical_SBrkr',
 'KitchenQual_Gd',
 'KitchenQual_TA',
 'FireplaceQu_Gd',
 'GarageType_Attchd',
 'GarageType_BuiltIn',
 'GarageType_Detchd',
 'GarageFinish_Unf',
 'GarageQual_TA',
 'GarageCond_TA',
 'PavedDrive_Y',
 'SaleType_New',
 'SaleType_WD',
 'SaleCondition_Partial',
 'SalePrice']

In [32]:
df_important_categoric = df_categoric[important_categoric_columns]

In [33]:
df_important_categoric

Unnamed: 0,MSZoning_RL,MSZoning_RM,LotShape_Reg,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_StoneBr,HouseStyle_2Story,RoofStyle_Gable,RoofStyle_Hip,Exterior1st_VinylSd,Exterior2nd_VinylSd,MasVnrType_Stone,ExterQual_Gd,ExterQual_TA,Foundation_CBlock,Foundation_PConc,BsmtQual_Gd,BsmtQual_TA,BsmtExposure_Gd,BsmtExposure_No,BsmtFinType1_GLQ,HeatingQC_TA,CentralAir_Y,Electrical_SBrkr,KitchenQual_Gd,KitchenQual_TA,FireplaceQu_Gd,GarageType_Attchd,GarageType_BuiltIn,GarageType_Detchd,GarageFinish_Unf,GarageQual_TA,GarageCond_TA,PavedDrive_Y,SaleType_New,SaleType_WD,SaleCondition_Partial,SalePrice
0,True,False,True,False,False,False,True,True,False,True,True,False,True,False,False,True,True,False,False,True,True,False,True,True,True,False,False,True,False,False,False,True,True,True,False,True,False,208500
1,True,False,True,False,False,False,False,True,False,False,False,False,False,True,True,False,True,False,True,False,False,False,True,True,False,True,False,True,False,False,False,True,True,True,False,True,False,181500
2,True,False,False,False,False,False,True,True,False,True,True,False,True,False,False,True,True,False,False,False,True,False,True,True,True,False,False,True,False,False,False,True,True,True,False,True,False,223500
3,True,False,False,False,False,False,True,True,False,False,False,False,False,True,False,False,False,True,False,True,False,False,True,True,True,False,True,False,False,True,True,True,True,True,False,True,False,140000
4,True,False,False,True,False,False,True,True,False,True,True,False,True,False,False,True,True,False,False,False,True,False,True,True,True,False,False,True,False,False,False,True,True,True,False,True,False,250000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,True,False,True,False,False,False,True,True,False,True,True,False,False,True,False,True,True,False,False,True,False,False,True,True,False,True,False,True,False,False,False,True,True,True,False,True,False,175000
1456,True,False,True,False,False,False,False,True,False,False,False,True,False,True,True,False,True,False,False,True,False,True,True,True,False,True,False,True,False,False,True,True,True,True,False,True,False,210000
1457,True,False,True,False,False,False,True,True,False,False,False,False,False,False,False,False,False,True,False,True,True,False,True,True,True,False,True,True,False,False,False,True,True,True,False,True,False,266500
1458,True,False,True,False,False,False,False,False,True,False,False,False,False,True,True,False,False,True,False,False,True,False,True,False,True,False,False,True,False,False,True,True,True,True,False,True,False,142125


In [34]:
df_important_numeric = df_important_numeric.drop("SalePrice", axis=1)
last_df = df_important_numeric.join(df_important_categoric)
last_df.head()

Unnamed: 0,LotFrontage,LotArea,OverallQual,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtUnfSF,TotalBsmtSF,1stFlrSF,2ndFlrSF,GrLivArea,BsmtFullBath,FullBath,HalfBath,TotRmsAbvGrd,Fireplaces,GarageYrBlt,GarageCars,GarageArea,WoodDeckSF,OpenPorchSF,MSZoning_RL,MSZoning_RM,LotShape_Reg,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_StoneBr,HouseStyle_2Story,RoofStyle_Gable,RoofStyle_Hip,Exterior1st_VinylSd,Exterior2nd_VinylSd,MasVnrType_Stone,ExterQual_Gd,ExterQual_TA,Foundation_CBlock,Foundation_PConc,BsmtQual_Gd,BsmtQual_TA,BsmtExposure_Gd,BsmtExposure_No,BsmtFinType1_GLQ,HeatingQC_TA,CentralAir_Y,Electrical_SBrkr,KitchenQual_Gd,KitchenQual_TA,FireplaceQu_Gd,GarageType_Attchd,GarageType_BuiltIn,GarageType_Detchd,GarageFinish_Unf,GarageQual_TA,GarageCond_TA,PavedDrive_Y,SaleType_New,SaleType_WD,SaleCondition_Partial,SalePrice
0,65.0,8450.0,7.0,2003.0,2003.0,196.0,706.0,150.0,856.0,856.0,854.0,1710.0,1.0,2.0,1.0,8.0,0.0,2003.0,2.0,548.0,0.0,61.0,True,False,True,False,False,False,True,True,False,True,True,False,True,False,False,True,True,False,False,True,True,False,True,True,True,False,False,True,False,False,False,True,True,True,False,True,False,208500
1,80.0,9600.0,6.0,1976.0,1976.0,0.0,978.0,284.0,1262.0,1262.0,0.0,1262.0,0.0,2.0,0.0,6.0,1.0,1976.0,2.0,460.0,298.0,0.0,True,False,True,False,False,False,False,True,False,False,False,False,False,True,True,False,True,False,True,False,False,False,True,True,False,True,False,True,False,False,False,True,True,True,False,True,False,181500
2,68.0,11250.0,7.0,2001.0,2002.0,162.0,486.0,434.0,920.0,920.0,866.0,1786.0,1.0,2.0,1.0,6.0,1.0,2001.0,2.0,608.0,0.0,42.0,True,False,False,False,False,False,True,True,False,True,True,False,True,False,False,True,True,False,False,False,True,False,True,True,True,False,False,True,False,False,False,True,True,True,False,True,False,223500
3,60.0,9550.0,7.0,1915.0,1970.0,0.0,216.0,540.0,756.0,961.0,756.0,1717.0,1.0,1.0,0.0,7.0,1.0,1998.0,3.0,642.0,0.0,35.0,True,False,False,False,False,False,True,True,False,False,False,False,False,True,False,False,False,True,False,True,False,False,True,True,True,False,True,False,False,True,True,True,True,True,False,True,False,140000
4,84.0,14260.0,8.0,2000.0,2000.0,350.0,655.0,490.0,1145.0,1145.0,1053.0,2198.0,1.0,2.0,1.0,9.0,1.0,2000.0,3.0,836.0,192.0,84.0,True,False,False,True,False,False,True,True,False,True,True,False,True,False,False,True,True,False,False,False,True,False,True,True,True,False,False,True,False,False,False,True,True,True,False,True,False,250000


In [35]:
last_df.isnull().sum()

LotFrontage              0
LotArea                  0
OverallQual              0
YearBuilt                0
YearRemodAdd             0
MasVnrArea               0
BsmtFinSF1               0
BsmtUnfSF                0
TotalBsmtSF              0
1stFlrSF                 0
2ndFlrSF                 0
GrLivArea                0
BsmtFullBath             0
FullBath                 0
HalfBath                 0
TotRmsAbvGrd             0
Fireplaces               0
GarageYrBlt              0
GarageCars               0
GarageArea               0
WoodDeckSF               0
OpenPorchSF              0
MSZoning_RL              0
MSZoning_RM              0
LotShape_Reg             0
Neighborhood_NoRidge     0
Neighborhood_NridgHt     0
Neighborhood_StoneBr     0
HouseStyle_2Story        0
RoofStyle_Gable          0
RoofStyle_Hip            0
Exterior1st_VinylSd      0
Exterior2nd_VinylSd      0
MasVnrType_Stone         0
ExterQual_Gd             0
ExterQual_TA             0
Foundation_CBlock        0
F

#### Separating Train Data into X and Y

In [36]:
x = last_df.drop("SalePrice", axis=1)
y = last_df[["SalePrice"]]

In [37]:
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=.20, random_state=42)

#### Creating a Linear Regression Model with Train Data

In [38]:
lr = LinearRegression()
lr.fit(x_train, y_train)
y_pred = lr.predict(x_test)

#### Testing Linear Regression Model

In [39]:
mse = mean_squared_error(y_test, y_pred) ** .5
print(mse)
r2 = r2_score(y_test, y_pred)
print(r2)

36767.74025261977
0.7944441857547401


#### Creating a Random Forest Regression Model with Train Data

In [40]:
rfr = RandomForestRegressor()
rfr.fit(x_train, y_train)
y_pred2 = rfr.predict(x_test)

  return fit_method(estimator, *args, **kwargs)


#### Testing Random Forest Regression Model

In [41]:
mse2 = mean_squared_error(y_test, y_pred2) ** .5
print(mse2)
r2_2 = r2_score(y_test, y_pred2)
print(r2_2)

34757.36863828463
0.8163082431565015


#### Boosting Random Forest Regression Model

In [42]:
rfr_model = xgb.XGBRFRegressor(
    n_estimators=200,  # Ağaç sayısı
    max_depth=10,       # Her bir ağacın maksimum derinliği
    learning_rate=1, # Öğrenme hızı (düşük, çünkü random forest mantığı kullanılıyor)
    subsample=0.8,     # Alt örnekleme oranı
    colsample_bynode=0.8,  # Her düğümde kullanılacak özelliklerin alt kümesi
    random_state=42
)
rfr_model.fit(x_train, y_train)
y_pred3 = rfr_model.predict(x_test)

In [43]:
mse3 = mean_squared_error(y_test, y_pred3) ** .5
print(mse3)
r2_3 = r2_score(y_test, y_pred3)
print(r2_3)

36049.663402753016
0.8023948073387146


#### Preparation of Test Data

In [44]:
df = pd.read_csv("test.csv")

In [45]:
df.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
0,1461,20,RH,80.0,11622,Pave,,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Feedr,Norm,1Fam,1Story,5,6,1961,1961,Gable,CompShg,VinylSd,VinylSd,,0.0,TA,TA,CBlock,TA,TA,No,Rec,468.0,LwQ,144.0,270.0,882.0,GasA,TA,Y,SBrkr,896,0,0,896,0.0,0.0,1,0,2,1,TA,5,Typ,0,,Attchd,1961.0,Unf,1.0,730.0,TA,TA,Y,140,0,0,0,120,0,,MnPrv,,0,6,2010,WD,Normal
1,1462,20,RL,81.0,14267,Pave,,IR1,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,6,1958,1958,Hip,CompShg,Wd Sdng,Wd Sdng,BrkFace,108.0,TA,TA,CBlock,TA,TA,No,ALQ,923.0,Unf,0.0,406.0,1329.0,GasA,TA,Y,SBrkr,1329,0,0,1329,0.0,0.0,1,1,3,1,Gd,6,Typ,0,,Attchd,1958.0,Unf,1.0,312.0,TA,TA,Y,393,36,0,0,0,0,,,Gar2,12500,6,2010,WD,Normal
2,1463,60,RL,74.0,13830,Pave,,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,5,5,1997,1998,Gable,CompShg,VinylSd,VinylSd,,0.0,TA,TA,PConc,Gd,TA,No,GLQ,791.0,Unf,0.0,137.0,928.0,GasA,Gd,Y,SBrkr,928,701,0,1629,0.0,0.0,2,1,3,1,TA,6,Typ,1,TA,Attchd,1997.0,Fin,2.0,482.0,TA,TA,Y,212,34,0,0,0,0,,MnPrv,,0,3,2010,WD,Normal
3,1464,60,RL,78.0,9978,Pave,,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,6,1998,1998,Gable,CompShg,VinylSd,VinylSd,BrkFace,20.0,TA,TA,PConc,TA,TA,No,GLQ,602.0,Unf,0.0,324.0,926.0,GasA,Ex,Y,SBrkr,926,678,0,1604,0.0,0.0,2,1,3,1,Gd,7,Typ,1,Gd,Attchd,1998.0,Fin,2.0,470.0,TA,TA,Y,360,36,0,0,0,0,,,,0,6,2010,WD,Normal
4,1465,120,RL,43.0,5005,Pave,,IR1,HLS,AllPub,Inside,Gtl,StoneBr,Norm,Norm,TwnhsE,1Story,8,5,1992,1992,Gable,CompShg,HdBoard,HdBoard,,0.0,Gd,TA,PConc,Gd,TA,No,ALQ,263.0,Unf,0.0,1017.0,1280.0,GasA,Ex,Y,SBrkr,1280,0,0,1280,0.0,0.0,2,0,2,1,Gd,5,Typ,0,,Attchd,1992.0,RFn,2.0,506.0,TA,TA,Y,0,82,0,0,144,0,,,,0,1,2010,WD,Normal


In [46]:
df.shape

(1459, 80)

In [47]:
df.isnull().sum()

Id                  0
MSSubClass          0
MSZoning            4
LotFrontage       227
LotArea             0
Street              0
Alley            1352
LotShape            0
LandContour         0
Utilities           2
LotConfig           0
LandSlope           0
Neighborhood        0
Condition1          0
Condition2          0
BldgType            0
HouseStyle          0
OverallQual         0
OverallCond         0
YearBuilt           0
YearRemodAdd        0
RoofStyle           0
RoofMatl            0
Exterior1st         1
Exterior2nd         1
MasVnrType        894
MasVnrArea         15
ExterQual           0
ExterCond           0
Foundation          0
BsmtQual           44
BsmtCond           45
BsmtExposure       44
BsmtFinType1       42
BsmtFinSF1          1
BsmtFinType2       42
BsmtFinSF2          1
BsmtUnfSF           1
TotalBsmtSF         1
Heating             0
HeatingQC           0
CentralAir          0
Electrical          0
1stFlrSF            0
2ndFlrSF            0
LowQualFin

In [48]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1459 entries, 0 to 1458
Data columns (total 80 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             1459 non-null   int64  
 1   MSSubClass     1459 non-null   int64  
 2   MSZoning       1455 non-null   object 
 3   LotFrontage    1232 non-null   float64
 4   LotArea        1459 non-null   int64  
 5   Street         1459 non-null   object 
 6   Alley          107 non-null    object 
 7   LotShape       1459 non-null   object 
 8   LandContour    1459 non-null   object 
 9   Utilities      1457 non-null   object 
 10  LotConfig      1459 non-null   object 
 11  LandSlope      1459 non-null   object 
 12  Neighborhood   1459 non-null   object 
 13  Condition1     1459 non-null   object 
 14  Condition2     1459 non-null   object 
 15  BldgType       1459 non-null   object 
 16  HouseStyle     1459 non-null   object 
 17  OverallQual    1459 non-null   int64  
 18  OverallC

In [49]:
df = pd.get_dummies(df, drop_first=True)

In [50]:
imputer = SimpleImputer(strategy="most_frequent")
df = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)
last_df = last_df.drop("SalePrice", axis=1)
df_important = df[last_df.columns]

In [51]:
df_important.head()

Unnamed: 0,LotFrontage,LotArea,OverallQual,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtUnfSF,TotalBsmtSF,1stFlrSF,2ndFlrSF,GrLivArea,BsmtFullBath,FullBath,HalfBath,TotRmsAbvGrd,Fireplaces,GarageYrBlt,GarageCars,GarageArea,WoodDeckSF,OpenPorchSF,MSZoning_RL,MSZoning_RM,LotShape_Reg,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_StoneBr,HouseStyle_2Story,RoofStyle_Gable,RoofStyle_Hip,Exterior1st_VinylSd,Exterior2nd_VinylSd,MasVnrType_Stone,ExterQual_Gd,ExterQual_TA,Foundation_CBlock,Foundation_PConc,BsmtQual_Gd,BsmtQual_TA,BsmtExposure_Gd,BsmtExposure_No,BsmtFinType1_GLQ,HeatingQC_TA,CentralAir_Y,Electrical_SBrkr,KitchenQual_Gd,KitchenQual_TA,FireplaceQu_Gd,GarageType_Attchd,GarageType_BuiltIn,GarageType_Detchd,GarageFinish_Unf,GarageQual_TA,GarageCond_TA,PavedDrive_Y,SaleType_New,SaleType_WD,SaleCondition_Partial
0,80.0,11622.0,5.0,1961.0,1961.0,0.0,468.0,270.0,882.0,896.0,0.0,896.0,0.0,1.0,0.0,5.0,0.0,1961.0,1.0,730.0,140.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0
1,81.0,14267.0,6.0,1958.0,1958.0,108.0,923.0,406.0,1329.0,1329.0,0.0,1329.0,0.0,1.0,1.0,6.0,0.0,1958.0,1.0,312.0,393.0,36.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0
2,74.0,13830.0,5.0,1997.0,1998.0,0.0,791.0,137.0,928.0,928.0,701.0,1629.0,0.0,2.0,1.0,6.0,1.0,1997.0,2.0,482.0,212.0,34.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0
3,78.0,9978.0,6.0,1998.0,1998.0,20.0,602.0,324.0,926.0,926.0,678.0,1604.0,0.0,2.0,1.0,7.0,1.0,1998.0,2.0,470.0,360.0,36.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0
4,43.0,5005.0,8.0,1992.0,1992.0,0.0,263.0,1017.0,1280.0,1280.0,0.0,1280.0,0.0,2.0,0.0,5.0,0.0,1992.0,2.0,506.0,0.0,82.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0


#### Creating Models and Saving Predictions

In [52]:
lr = LinearRegression()
lr.fit(x, y)
y_pred = lr.predict(df_important)
son_df = pd.DataFrame(df["Id"].astype("int"))
son_df["SalePrice"] = pd.DataFrame(y_pred)
son_df.to_csv("lr_pred.csv", index=False)
son_df

Unnamed: 0,Id,SalePrice
0,1461,111420.461509
1,1462,156365.717652
2,1463,172522.552940
3,1464,194489.134964
4,1465,226975.106916
...,...,...
1454,2915,64709.952856
1455,2916,70488.573586
1456,2917,162492.007217
1457,2918,105200.493865


In [53]:
rfr = RandomForestRegressor()
rfr.fit(x, y)
y_pred2 = rfr.predict(df_important)
son_df2 = pd.DataFrame(df["Id"].astype("int"))
son_df2["SalePrice"] = pd.DataFrame(y_pred2)
son_df2.to_csv("rfr_pred.csv", index=False)
son_df2

  return fit_method(estimator, *args, **kwargs)


Unnamed: 0,Id,SalePrice
0,1461,128679.58
1,1462,153419.00
2,1463,181526.59
3,1464,180983.54
4,1465,196493.90
...,...,...
1454,2915,83419.71
1455,2916,87534.75
1456,2917,155336.00
1457,2918,108812.50
