In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline
import seaborn as sns

In [28]:
housingDataImputed = pd.read_csv('./imputeImpl_housingData.csv',index_col = 0, na_filter = False)

cat_features=list(housingDataImputed.select_dtypes(exclude=[np.number]).columns.values)
cat_features

['MSZoning',
 'Street',
 'LotShape',
 'LandContour',
 'Utilities',
 'LotConfig',
 'LandSlope',
 'Neighborhood',
 'Condition1',
 'Condition2',
 'BldgType',
 'HouseStyle',
 'RoofStyle',
 'RoofMatl',
 'Exterior1st',
 'Exterior2nd',
 'MasVnrType',
 'ExterQual',
 'ExterCond',
 'Foundation',
 'BsmtQual',
 'BsmtCond',
 'BsmtExposure',
 'BsmtFinType1',
 'BsmtFinType2',
 'Heating',
 'HeatingQC',
 'CentralAir',
 'Electrical',
 'KitchenQual',
 'Functional',
 'FireplaceQu',
 'GarageType',
 'GarageFinish',
 'GarageQual',
 'GarageCond',
 'PavedDrive',
 'PoolQC',
 'Fence',
 'SaleType',
 'SaleCondition']

In [20]:
housingDataImputed.groupby("Neighborhood")['SalePrice'].agg('mean').sort_values()

Neighborhood
MeadowV     96836.470588
BrDale     106094.827586
IDOTRR     116537.084746
BrkSide    126029.854369
OldTown    127230.383886
Edwards    133152.248485
SWISU      133575.809524
Landmrk    137000.000000
Sawyer     137492.863309
NPkVill    140743.181818
Blueste    143590.000000
NAmes      145087.258537
Mitchel    163443.203883
SawyerW    188102.000000
Gilbert    189439.867133
NWAmes     190372.398374
Greens     193531.250000
Blmngtn    196237.173913
CollgCr    198133.169492
Crawfor    201988.946237
ClearCr    213980.800000
Timber     242909.518519
Veenker    251263.043478
Somerst    275539.333333
GrnHill    280000.000000
StoneBr    305307.930233
NridgHt    313662.495868
NoRidge    326114.298507
Name: SalePrice, dtype: float64

<h1>Go through mean value of SalePrice for all categorical variables and feature engineer new variables</h1>

In [46]:
for cat in cat_features:
    print(housingDataImputed.groupby(cat)['SalePrice'].agg('mean').sort_values())

MSZoning
RM    127076.361244
RH    131639.714286
RL    187973.443227
Name: SalePrice, dtype: float64
Street
Grvl    148700.666667
Pave    177157.240066
Name: SalePrice, dtype: float64
LotShape
Reg    159380.613990
IR1    205566.576127
IR3    209374.230769
IR2    228367.173913
Name: SalePrice, dtype: float64
LandContour
Bnk    151976.709677
Lvl    174765.449977
Low    207514.285714
HLS    239313.687500
Name: SalePrice, dtype: float64
Utilities
NoSewr    115000.000000
AllPub    177112.848324
Name: SalePrice, dtype: float64
LotConfig
FR2        173091.267606
Inside     173325.078134
Corner     177373.200946
FR3        190116.666667
CulDSac    218444.253086
Name: SalePrice, dtype: float64
LandSlope
Gtl    175618.326311
Mod    203984.722222
Sev    216496.153846
Name: SalePrice, dtype: float64
Neighborhood
MeadowV     96836.470588
BrDale     106094.827586
IDOTRR     116537.084746
BrkSide    126029.854369
OldTown    127230.383886
Edwards    133152.248485
SWISU      133575.809524
Landmrk    13

<h1>

In [49]:
desirable_neighborhoods=["StoneBr","NridgHt","NoRidge"]   
housingDataImputed['OldHouse']=housingDataImputed.YearBuilt.map(lambda t:1 if t <=1954 else 0)
housingDataImputed['DesirableNeighborhood']=housingDataImputed.Neighborhood.map(lambda t:1 if t in desirable_neighborhoods else 0)
housingDataImputed['BuiltInGarage']=housingDataImputed.GarageType.map(lambda t:1 if t in ['Attchd','BuiltIn'] else 0)
housingDataImputed['central_air']=housingDataImputed.GarageType.map(lambda t:1 if t=="Yes" else 0)
housingDataImputed['GoodKitchen']=housingDataImputed.GarageType.map(lambda t:1 if t in ['Excellent','Good'] else 0)
housingDataImputed['WalkoutBasement']=housingDataImputed.BsmtExposure.map(lambda t:1 if t in ['Good Exposure','Average Exposure'] else 0)
housingDataImputed['BasementQuality']=housingDataImputed.BsmtQual.map(lambda t:1 if t in ['Ex','Gd'] else 0)
housingDataImputed['StreetType']=housingDataImputed.Street.map(lambda t:1 if t=="Pave" else 0)
housingDataImputed['LandContourHLS']=housingDataImputed.LandContour.map(lambda t:1 if t=="HLS" else 0)
housingDataImputed['PubUtilities']=housingDataImputed.Utilities.map(lambda t:1 if t=="AllPub" else 0)
housingDataImputed['GreatLocation1']=housingDataImputed.Condition1.map(lambda t:1 if t in ['PosN','PosA'] else 0)
housingDataImputed['GreatLocation2']=housingDataImputed.Condition2.map(lambda t:1 if t in ['PosN','PosA'] else 0)
housingDataImputed['GoodRoofMat']=housingDataImputed.RoofMatl.map(lambda t:1 if t in ['WdShngl','Membran'] else 0)
housingDataImputed['GoodExterior1']=housingDataImputed.Exterior1st.map(lambda t:1 if t in ['ImStucc','PreCast'] else 0)
housingDataImputed['GoodExterior2']=housingDataImputed.Exterior2nd.map(lambda t:1 if t in ['ImStucc','PreCast'] else 0)
housingDataImputed['GoodExteriorQual']=housingDataImputed.ExterQual.map(lambda t:1 if t=="Ex" else 0)
housingDataImputed['LivableBasement1']=housingDataImputed.BsmtFinType1.map(lambda t:1 if t=="GLQ" else 0)
housingDataImputed['LivableBasement2']=housingDataImputed.BsmtFinType2.map(lambda t:1 if t=="GLQ" else 0)
housingDataImputed['GasHeating']=housingDataImputed.Heating.map(lambda t:1 if t in ['GasA','GasW'] else 0)
housingDataImputed['HeatingQuality']=housingDataImputed.HeatingQC.map(lambda t:1 if t in ['Ex','Gd'] else 0)
housingDataImputed['AirConditioning']=housingDataImputed.CentralAir.map(lambda t:1 if t=="Y" else 0)
housingDataImputed['FunctionDeduction']=housingDataImputed.Functional.map(lambda t:1 if t=="Typ" else 0)
housingDataImputed['FireplaceQuality']=housingDataImputed.FireplaceQu.map(lambda t:1 if t in ['Ex','Gd'] else 0)
housingDataImputed['GarageFinished']=housingDataImputed.GarageFinish.map(lambda t:1 if t=="Fin" else 0)
housingDataImputed['GoodGarage']=housingDataImputed.GarageQual.map(lambda t:1 if t in ["Ex","Gd"] else 0)
housingDataImputed['GoodPool']=housingDataImputed.PoolQC.map(lambda t:1 if t in ["Ex","Gd"] else 0)
housingDataImputed['NewHome']=housingDataImputed.GarageQual.map(lambda t:1 if t=="New" else 0)