In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker #For changing ticker location and formatting
import seaborn as sns

from sklearn.linear_model import LinearRegression, Ridge, RidgeCV, Lasso, LassoCV
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn import metrics
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import StandardScaler, PolynomialFeatures

In [2]:
#Load in clean CSV
test = pd.read_csv('./datasets/clean_recoded_test_ames.csv', index_col = 'id')
test.head()
    #I'm at the end of the worflow, but I just realized I always should have been reading in the data files by making the 'id' the index column.

Unnamed: 0_level_0,pid,ms_subclass,ms_zoning,lot_frontage,lot_area,street,alley,lot_shape,land_contour,utilities,...,misc_val,mo_sold,yr_sold,sale_type,age_at_sale,age_since_remod,bsmnt_fin_sf,sold_in_crisis,has_wood_deck,has_pool
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2658,902301120,190,RM,69.0,9142,Pave,Grvl,4,Lvl,AllPub,...,0,4,2006,WD,96,56,0,0,0,0
2718,905108090,90,RL,0.0,9662,Pave,,3,Lvl,AllPub,...,0,8,2006,WD,29,29,0,0,1,0
2414,528218130,60,RL,58.0,17104,Pave,,3,Lvl,AllPub,...,0,9,2006,New,0,0,554,0,1,0
1989,902207150,30,RM,60.0,8520,Pave,,4,Lvl,AllPub,...,0,7,2007,WD,84,1,0,0,0,0
625,535105100,20,RL,0.0,9500,Pave,,3,Lvl,AllPub,...,0,7,2009,WD,46,46,609,1,0,0


In [3]:
#Categorical columns I want to use
fewer_cats = test[['ms_zoning', 'neighborhood', 'bldg_type', 'house_style', 'foundation', 'garage_type', 'mo_sold']]
fewer_cats

Unnamed: 0_level_0,ms_zoning,neighborhood,bldg_type,house_style,foundation,garage_type,mo_sold
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2658,RM,OldTown,2fmCon,2Story,Stone,Detchd,4
2718,RL,Sawyer,Duplex,1Story,CBlock,Attchd,8
2414,RL,Gilbert,1Fam,2Story,PConc,Attchd,9
1989,RM,OldTown,1Fam,1Story,CBlock,Detchd,7
625,RL,NAmes,1Fam,1Story,CBlock,Attchd,7
...,...,...,...,...,...,...,...
1662,RL,NWAmes,1Fam,2Story,CBlock,Attchd,11
1234,RL,NAmes,1Fam,2Story,CBlock,Attchd,8
1373,RL,Sawyer,1Fam,1Story,CBlock,Attchd,8
1672,RL,NAmes,1Fam,1Story,PConc,Detchd,5


In [4]:
numeric_test = test.select_dtypes(exclude = 'object')
numeric_test.head(5)

Unnamed: 0_level_0,pid,ms_subclass,lot_frontage,lot_area,lot_shape,land_slope,overall_qual,overall_cond,year_built,year_remod/add,...,fence,misc_val,mo_sold,yr_sold,age_at_sale,age_since_remod,bsmnt_fin_sf,sold_in_crisis,has_wood_deck,has_pool
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2658,902301120,190,69.0,9142,4,3,6,8,1910,1950,...,0,0,4,2006,96,56,0,0,0,0
2718,905108090,90,0.0,9662,3,3,5,4,1977,1977,...,0,0,8,2006,29,29,0,0,1,0
2414,528218130,60,58.0,17104,3,3,7,5,2006,2006,...,0,0,9,2006,0,0,554,0,1,0
1989,902207150,30,60.0,8520,4,3,5,6,1923,2006,...,0,0,7,2007,84,1,0,0,0,0
625,535105100,20,0.0,9500,3,3,6,5,1963,1963,...,0,0,7,2009,46,46,609,1,0,0


In [5]:
cols_to_drop = ['pid', 'garage_yr_blt', 'sold_in_crisis', 'mo_sold']
numeric_test = numeric_test.drop(cols_to_drop, axis = 1)

In [10]:
#The remaining numeric columns I want to use
numeric_test.head()

Unnamed: 0_level_0,ms_subclass,lot_frontage,lot_area,lot_shape,land_slope,overall_qual,overall_cond,year_built,year_remod/add,mas_vnr_area,...,screen_porch,pool_area,fence,misc_val,yr_sold,age_at_sale,age_since_remod,bsmnt_fin_sf,has_wood_deck,has_pool
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2658,190,69.0,9142,4,3,6,8,1910,1950,0.0,...,0,0,0,0,2006,96,56,0,0,0
2718,90,0.0,9662,3,3,5,4,1977,1977,0.0,...,0,0,0,0,2006,29,29,0,1,0
2414,60,58.0,17104,3,3,7,5,2006,2006,0.0,...,0,0,0,0,2006,0,0,554,1,0
1989,30,60.0,8520,4,3,5,6,1923,2006,0.0,...,0,0,0,0,2007,84,1,0,0,0
625,20,0.0,9500,3,3,6,5,1963,1963,247.0,...,185,0,0,0,2009,46,46,609,0,0


In [11]:
numeric_test.columns

Index(['ms_subclass', 'lot_frontage', 'lot_area', 'lot_shape', 'land_slope',
       'overall_qual', 'overall_cond', 'year_built', 'year_remod/add',
       'mas_vnr_area', 'exter_qual', 'exter_cond', 'bsmt_qual', 'bsmt_cond',
       'bsmt_exposure', 'bsmtfin_type_1', 'bsmtfin_sf_1', 'bsmtfin_type_2',
       'bsmtfin_sf_2', 'bsmt_unf_sf', 'total_bsmt_sf', 'heating_qc',
       'central_air', 'electrical', '1st_flr_sf', '2nd_flr_sf',
       'low_qual_fin_sf', 'gr_liv_area', 'bsmt_full_bath', 'bsmt_half_bath',
       'full_bath', 'half_bath', 'bedroom_abvgr', 'kitchen_abvgr',
       'kitchen_qual', 'totrms_abvgrd', 'functional', 'fireplaces',
       'fireplace_qu', 'garage_finish', 'garage_cars', 'garage_area',
       'garage_qual', 'garage_cond', 'paved_drive', 'wood_deck_sf',
       'open_porch_sf', 'enclosed_porch', '3ssn_porch', 'screen_porch',
       'pool_area', 'fence', 'misc_val', 'yr_sold', 'age_at_sale',
       'age_since_remod', 'bsmnt_fin_sf', 'has_wood_deck', 'has_pool'],
     

In [9]:
numeric_test.info()

<class 'pandas.core.frame.DataFrame'>
Index: 878 entries, 2658 to 1939
Data columns (total 59 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   ms_subclass      878 non-null    int64  
 1   lot_frontage     878 non-null    float64
 2   lot_area         878 non-null    int64  
 3   lot_shape        878 non-null    int64  
 4   land_slope       878 non-null    int64  
 5   overall_qual     878 non-null    int64  
 6   overall_cond     878 non-null    int64  
 7   year_built       878 non-null    int64  
 8   year_remod/add   878 non-null    int64  
 9   mas_vnr_area     878 non-null    float64
 10  exter_qual       878 non-null    int64  
 11  exter_cond       878 non-null    int64  
 12  bsmt_qual        878 non-null    int64  
 13  bsmt_cond        878 non-null    int64  
 14  bsmt_exposure    878 non-null    int64  
 15  bsmtfin_type_1   878 non-null    int64  
 16  bsmtfin_sf_1     878 non-null    int64  
 17  bsmtfin_type_2   

In [16]:
comb_test_data = numeric_test.join(fewer_cats, how = 'inner')
comb_test_data.head()

Unnamed: 0_level_0,ms_subclass,lot_frontage,lot_area,lot_shape,land_slope,overall_qual,overall_cond,year_built,year_remod/add,mas_vnr_area,...,bsmnt_fin_sf,has_wood_deck,has_pool,ms_zoning,neighborhood,bldg_type,house_style,foundation,garage_type,mo_sold
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2658,190,69.0,9142,4,3,6,8,1910,1950,0.0,...,0,0,0,RM,OldTown,2fmCon,2Story,Stone,Detchd,4
2718,90,0.0,9662,3,3,5,4,1977,1977,0.0,...,0,1,0,RL,Sawyer,Duplex,1Story,CBlock,Attchd,8
2414,60,58.0,17104,3,3,7,5,2006,2006,0.0,...,554,1,0,RL,Gilbert,1Fam,2Story,PConc,Attchd,9
1989,30,60.0,8520,4,3,5,6,1923,2006,0.0,...,0,0,0,RM,OldTown,1Fam,1Story,CBlock,Detchd,7
625,20,0.0,9500,3,3,6,5,1963,1963,247.0,...,609,0,0,RL,NAmes,1Fam,1Story,CBlock,Attchd,7


In [17]:
comb_test_data.columns

Index(['ms_subclass', 'lot_frontage', 'lot_area', 'lot_shape', 'land_slope',
       'overall_qual', 'overall_cond', 'year_built', 'year_remod/add',
       'mas_vnr_area', 'exter_qual', 'exter_cond', 'bsmt_qual', 'bsmt_cond',
       'bsmt_exposure', 'bsmtfin_type_1', 'bsmtfin_sf_1', 'bsmtfin_type_2',
       'bsmtfin_sf_2', 'bsmt_unf_sf', 'total_bsmt_sf', 'heating_qc',
       'central_air', 'electrical', '1st_flr_sf', '2nd_flr_sf',
       'low_qual_fin_sf', 'gr_liv_area', 'bsmt_full_bath', 'bsmt_half_bath',
       'full_bath', 'half_bath', 'bedroom_abvgr', 'kitchen_abvgr',
       'kitchen_qual', 'totrms_abvgrd', 'functional', 'fireplaces',
       'fireplace_qu', 'garage_finish', 'garage_cars', 'garage_area',
       'garage_qual', 'garage_cond', 'paved_drive', 'wood_deck_sf',
       'open_porch_sf', 'enclosed_porch', '3ssn_porch', 'screen_porch',
       'pool_area', 'fence', 'misc_val', 'yr_sold', 'age_at_sale',
       'age_since_remod', 'bsmnt_fin_sf', 'has_wood_deck', 'has_pool',
      

In [20]:
comb_test_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 878 entries, 2658 to 1939
Data columns (total 66 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   ms_subclass      878 non-null    int64  
 1   lot_frontage     878 non-null    float64
 2   lot_area         878 non-null    int64  
 3   lot_shape        878 non-null    int64  
 4   land_slope       878 non-null    int64  
 5   overall_qual     878 non-null    int64  
 6   overall_cond     878 non-null    int64  
 7   year_built       878 non-null    int64  
 8   year_remod/add   878 non-null    int64  
 9   mas_vnr_area     878 non-null    float64
 10  exter_qual       878 non-null    int64  
 11  exter_cond       878 non-null    int64  
 12  bsmt_qual        878 non-null    int64  
 13  bsmt_cond        878 non-null    int64  
 14  bsmt_exposure    878 non-null    int64  
 15  bsmtfin_type_1   878 non-null    int64  
 16  bsmtfin_sf_1     878 non-null    int64  
 17  bsmtfin_type_2   

In [41]:
comb_test_data['neighborhood'].value_counts()
    #Some neighborhoods that appear in training data like Landmark, GreenHill, Blueste don't appear in test data.

neighborhood
NAmes      133
CollgCr     87
OldTown     76
Somerst     52
Edwards     50
Gilbert     49
NridgHt     44
NWAmes      44
Sawyer      40
SawyerW     38
Mitchel     32
Crawfor     32
BrkSide     32
Timber      24
IDOTRR      24
NoRidge     23
ClearCr     17
SWISU       16
MeadowV     13
StoneBr     13
BrDale      11
Veenker      7
Blmngtn      6
NPkVill      6
Greens       5
Blueste      4
Name: count, dtype: int64

In [40]:
test_dum = pd.get_dummies(comb_test_data, dtype = int, sparse = False, drop_first = True)
test_dum

Unnamed: 0_level_0,ms_subclass,lot_frontage,lot_area,lot_shape,land_slope,overall_qual,overall_cond,year_built,year_remod/add,mas_vnr_area,exter_qual,exter_cond,bsmt_qual,bsmt_cond,bsmt_exposure,bsmtfin_type_1,bsmtfin_sf_1,bsmtfin_type_2,bsmtfin_sf_2,bsmt_unf_sf,total_bsmt_sf,heating_qc,central_air,electrical,1st_flr_sf,2nd_flr_sf,low_qual_fin_sf,gr_liv_area,bsmt_full_bath,bsmt_half_bath,full_bath,half_bath,bedroom_abvgr,kitchen_abvgr,kitchen_qual,totrms_abvgrd,functional,fireplaces,fireplace_qu,garage_finish,garage_cars,garage_area,garage_qual,garage_cond,paved_drive,wood_deck_sf,open_porch_sf,enclosed_porch,3ssn_porch,screen_porch,pool_area,fence,misc_val,yr_sold,age_at_sale,age_since_remod,bsmnt_fin_sf,has_wood_deck,has_pool,ms_zoning_FV,ms_zoning_I (all),ms_zoning_RH,ms_zoning_RL,ms_zoning_RM,neighborhood_Blueste,neighborhood_BrDale,neighborhood_BrkSide,neighborhood_ClearCr,neighborhood_CollgCr,neighborhood_Crawfor,neighborhood_Edwards,neighborhood_Gilbert,neighborhood_Greens,neighborhood_IDOTRR,neighborhood_MeadowV,neighborhood_Mitchel,neighborhood_NAmes,neighborhood_NPkVill,neighborhood_NWAmes,neighborhood_NoRidge,neighborhood_NridgHt,neighborhood_OldTown,neighborhood_SWISU,neighborhood_Sawyer,neighborhood_SawyerW,neighborhood_Somerst,neighborhood_StoneBr,neighborhood_Timber,neighborhood_Veenker,bldg_type_2fmCon,bldg_type_Duplex,bldg_type_Twnhs,bldg_type_TwnhsE,house_style_1.5Unf,house_style_1Story,house_style_2.5Fin,house_style_2.5Unf,house_style_2Story,house_style_SFoyer,house_style_SLvl,foundation_CBlock,foundation_PConc,foundation_Slab,foundation_Stone,foundation_Wood,garage_type_Attchd,garage_type_Basment,garage_type_BuiltIn,garage_type_CarPort,garage_type_Detchd,mo_sold_10,mo_sold_11,mo_sold_12,mo_sold_2,mo_sold_3,mo_sold_4,mo_sold_5,mo_sold_6,mo_sold_7,mo_sold_8,mo_sold_9
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1
2658,190,69.0,9142,4,3,6,8,1910,1950,0.0,3,2,2,3,1,1,0,1,0,1020,1020,4,0,2.0,908,1020,0,1928,0,0,2,0,4,2,2,9,8,0,0,1,1,440,1,1,3,0,60,112,0,0,0,0,0,2006,96,56,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0
2718,90,0.0,9662,3,3,5,4,1977,1977,0.0,3,3,4,3,1,1,0,1,0,1967,1967,3,1,5.0,1967,0,0,1967,0,0,2,0,6,2,3,10,8,0,0,3,2,580,3,3,3,170,0,0,0,0,0,0,0,2006,29,29,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
2414,60,58.0,17104,3,3,7,5,2006,2006,0.0,4,3,4,4,3,6,554,1,0,100,654,5,1,5.0,664,832,0,1496,1,0,2,1,3,1,4,7,8,1,4,2,2,426,3,3,3,100,24,0,0,0,0,0,0,2006,0,0,554,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1989,30,60.0,8520,4,3,5,6,1923,2006,0.0,4,3,3,3,1,1,0,1,0,968,968,3,1,5.0,968,0,0,968,0,0,1,0,2,1,3,5,8,0,0,1,2,480,2,3,1,0,0,184,0,0,0,0,0,2007,84,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0
625,20,0.0,9500,3,3,6,5,1963,1963,247.0,3,3,4,3,1,4,609,1,0,785,1394,4,1,5.0,1394,0,0,1394,1,0,1,1,3,1,3,6,8,2,4,2,2,514,3,3,3,0,76,0,0,185,0,0,0,2009,46,46,609,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
333,160,21.0,1890,4,3,4,6,1972,1972,0.0,3,3,3,3,1,3,294,1,0,252,546,3,1,5.0,546,546,0,1092,0,0,1,1,3,1,3,5,8,0,0,1,1,286,3,3,3,0,0,64,0,0,0,0,0,2010,38,38,294,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
1327,20,52.0,8516,4,3,4,6,1958,2006,0.0,3,3,3,3,1,1,0,1,0,869,869,3,1,5.0,1093,0,0,1093,0,0,1,0,2,1,3,5,8,0,0,1,1,308,3,3,3,0,0,0,0,0,0,0,0,2008,50,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0
858,20,0.0,9286,3,2,5,7,1977,1989,0.0,3,3,4,4,3,5,196,1,0,1072,1268,3,1,5.0,1268,0,0,1268,0,0,1,1,3,1,4,5,8,0,0,1,1,252,3,3,3,173,0,0,0,0,0,0,0,2009,32,20,196,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0
95,160,39.0,3515,4,3,7,5,2004,2004,0.0,4,3,4,3,1,1,0,1,0,840,840,5,1,5.0,840,840,0,1680,0,0,2,1,2,1,4,3,8,0,0,2,2,588,3,3,3,0,111,0,0,0,0,0,0,2010,6,6,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1568,20,75.0,10125,4,3,6,6,1977,1977,0.0,3,3,3,3,1,5,641,2,279,276,1196,3,1,5.0,1279,0,0,1279,0,1,2,0,3,1,3,6,8,2,2,1,2,473,3,3,3,238,83,0,0,0,0,3,0,2008,31,31,920,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0


In [None]:
#### This has 121 columns - final test data had 125 columns. Need to fix to run model.

In [37]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [38]:
test_dum.head()

Unnamed: 0_level_0,ms_subclass,lot_frontage,lot_area,lot_shape,land_slope,overall_qual,overall_cond,year_built,year_remod/add,mas_vnr_area,exter_qual,exter_cond,bsmt_qual,bsmt_cond,bsmt_exposure,bsmtfin_type_1,bsmtfin_sf_1,bsmtfin_type_2,bsmtfin_sf_2,bsmt_unf_sf,total_bsmt_sf,heating_qc,central_air,electrical,1st_flr_sf,2nd_flr_sf,low_qual_fin_sf,gr_liv_area,bsmt_full_bath,bsmt_half_bath,full_bath,half_bath,bedroom_abvgr,kitchen_abvgr,kitchen_qual,totrms_abvgrd,functional,fireplaces,fireplace_qu,garage_finish,garage_cars,garage_area,garage_qual,garage_cond,paved_drive,wood_deck_sf,open_porch_sf,enclosed_porch,3ssn_porch,screen_porch,pool_area,fence,misc_val,yr_sold,age_at_sale,age_since_remod,bsmnt_fin_sf,has_wood_deck,has_pool,ms_zoning_FV,ms_zoning_I (all),ms_zoning_RH,ms_zoning_RL,ms_zoning_RM,neighborhood_Blueste,neighborhood_BrDale,neighborhood_BrkSide,neighborhood_ClearCr,neighborhood_CollgCr,neighborhood_Crawfor,neighborhood_Edwards,neighborhood_Gilbert,neighborhood_Greens,neighborhood_IDOTRR,neighborhood_MeadowV,neighborhood_Mitchel,neighborhood_NAmes,neighborhood_NPkVill,neighborhood_NWAmes,neighborhood_NoRidge,neighborhood_NridgHt,neighborhood_OldTown,neighborhood_SWISU,neighborhood_Sawyer,neighborhood_SawyerW,neighborhood_Somerst,neighborhood_StoneBr,neighborhood_Timber,neighborhood_Veenker,bldg_type_2fmCon,bldg_type_Duplex,bldg_type_Twnhs,bldg_type_TwnhsE,house_style_1.5Unf,house_style_1Story,house_style_2.5Fin,house_style_2.5Unf,house_style_2Story,house_style_SFoyer,house_style_SLvl,foundation_CBlock,foundation_PConc,foundation_Slab,foundation_Stone,foundation_Wood,garage_type_Attchd,garage_type_Basment,garage_type_BuiltIn,garage_type_CarPort,garage_type_Detchd,mo_sold_10,mo_sold_11,mo_sold_12,mo_sold_2,mo_sold_3,mo_sold_4,mo_sold_5,mo_sold_6,mo_sold_7,mo_sold_8,mo_sold_9
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1
2658,190,69.0,9142,4,3,6,8,1910,1950,0.0,3,2,2,3,1,1,0,1,0,1020,1020,4,0,2.0,908,1020,0,1928,0,0,2,0,4,2,2,9,8,0,0,1,1,440,1,1,3,0,60,112,0,0,0,0,0,2006,96,56,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0
2718,90,0.0,9662,3,3,5,4,1977,1977,0.0,3,3,4,3,1,1,0,1,0,1967,1967,3,1,5.0,1967,0,0,1967,0,0,2,0,6,2,3,10,8,0,0,3,2,580,3,3,3,170,0,0,0,0,0,0,0,2006,29,29,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
2414,60,58.0,17104,3,3,7,5,2006,2006,0.0,4,3,4,4,3,6,554,1,0,100,654,5,1,5.0,664,832,0,1496,1,0,2,1,3,1,4,7,8,1,4,2,2,426,3,3,3,100,24,0,0,0,0,0,0,2006,0,0,554,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1989,30,60.0,8520,4,3,5,6,1923,2006,0.0,4,3,3,3,1,1,0,1,0,968,968,3,1,5.0,968,0,0,968,0,0,1,0,2,1,3,5,8,0,0,1,2,480,2,3,1,0,0,184,0,0,0,0,0,2007,84,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0
625,20,0.0,9500,3,3,6,5,1963,1963,247.0,3,3,4,3,1,4,609,1,0,785,1394,4,1,5.0,1394,0,0,1394,1,0,1,1,3,1,3,6,8,2,4,2,2,514,3,3,3,0,76,0,0,185,0,0,0,2009,46,46,609,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
