## Importing libraries

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

---

## Importing data

In [2]:
train_path = os.path.join('data', 'train.csv')
test_path = os.path.join('data', 'test.csv')

train_data = pd.read_csv(train_path, index_col = 'Id')
test_data = pd.read_csv(test_path, index_col = 'Id')

---

## Data overview

In [3]:
print(f'Train data shape: {train_data.shape}\nTest data shape: {test_data.shape}')

Train data shape: (1460, 80)
Test data shape: (1459, 79)


In [4]:
train_data.head()

Unnamed: 0_level_0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,Inside,...,0,,,,0,2,2008,WD,Normal,208500
2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,FR2,...,0,,,,0,5,2007,WD,Normal,181500
3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,Inside,...,0,,,,0,9,2008,WD,Normal,223500
4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,Corner,...,0,,,,0,2,2006,WD,Abnorml,140000
5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,FR2,...,0,,,,0,12,2008,WD,Normal,250000


In [5]:
test_data.head()

Unnamed: 0_level_0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,...,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1461,20,RH,80.0,11622,Pave,,Reg,Lvl,AllPub,Inside,...,120,0,,MnPrv,,0,6,2010,WD,Normal
1462,20,RL,81.0,14267,Pave,,IR1,Lvl,AllPub,Corner,...,0,0,,,Gar2,12500,6,2010,WD,Normal
1463,60,RL,74.0,13830,Pave,,IR1,Lvl,AllPub,Inside,...,0,0,,MnPrv,,0,3,2010,WD,Normal
1464,60,RL,78.0,9978,Pave,,IR1,Lvl,AllPub,Inside,...,0,0,,,,0,6,2010,WD,Normal
1465,120,RL,43.0,5005,Pave,,IR1,HLS,AllPub,Inside,...,144,0,,,,0,1,2010,WD,Normal


In [6]:
train_data['MSSubClass'] = train_data['MSSubClass'].apply(str)
test_data['MSSubClass'] = test_data['MSSubClass'].apply(str)

Columns descriptions (from Kaggle):

- SalePrice: the property's sale price in dollars. This is the target variable that you're trying to predict.

- MSSubClass: The building class

- MSZoning: The general zoning classification

- LotFrontage: Linear feet of street connected to property

- LotArea: Lot size in square feet

- Street: Type of road access

- Alley: Type of alley access

- LotShape: General shape of property

- LandContour: Flatness of the property

- Utilities: Type of utilities available

- LotConfig: Lot configuration

- LandSlope: Slope of property

- Neighborhood: Physical locations within Ames city limits

- Condition1: Proximity to main road or railroad

- Condition2: Proximity to main road or railroad (if a second is present)

- BldgType: Type of dwelling

- HouseStyle: Style of dwelling

- OverallQual: Overall material and finish quality

- OverallCond: Overall condition rating

- YearBuilt: Original construction date

- YearRemodAdd: Remodel date

- RoofStyle: Type of roof

- RoofMatl: Roof material

- Exterior1st: Exterior covering on house

- Exterior2nd: Exterior covering on house (if more than one material)

- MasVnrType: Masonry veneer type

- MasVnrArea: Masonry veneer area in square feet

- ExterQual: Exterior material quality

- ExterCond: Present condition of the material on the exterior

- Foundation: Type of foundation

- BsmtQual: Height of the basement

- BsmtCond: General condition of the basement

- BsmtExposure: Walkout or garden level basement walls

- BsmtFinType1: Quality of basement finished area

- BsmtFinSF1: Type 1 finished square feet

- BsmtFinType2: Quality of second finished area (if present)

- BsmtFinSF2: Type 2 finished square feet

- BsmtUnfSF: Unfinished square feet of basement area

- TotalBsmtSF: Total square feet of basement area

- Heating: Type of heating

- HeatingQC: Heating quality and condition

- CentralAir: Central air conditioning

- Electrical: Electrical system

- 1stFlrSF: First Floor square feet

- 2ndFlrSF: Second floor square feet

- LowQualFinSF: Low quality finished square feet (all floors)

- GrLivArea: Above grade (ground) living area square feet

- BsmtFullBath: Basement full bathrooms

- BsmtHalfBath: Basement half bathrooms

- FullBath: Full bathrooms above grade

- HalfBath: Half baths above grade

- Bedroom: Number of bedrooms above basement level

- Kitchen: Number of kitchens

- KitchenQual: Kitchen quality

- TotRmsAbvGrd: Total rooms above grade (does not include bathrooms)

- Functional: Home functionality rating

- Fireplaces: Number of fireplaces

- FireplaceQu: Fireplace quality

- GarageType: Garage location

- GarageYrBlt: Year garage was built

- GarageFinish: Interior finish of the garage

- GarageCars: Size of garage in car capacity

- GarageArea: Size of garage in square feet

- GarageQual: Garage quality

- GarageCond: Garage condition

- PavedDrive: Paved driveway

- WoodDeckSF: Wood deck area in square feet

- OpenPorchSF: Open porch area in square feet

- EnclosedPorch: Enclosed porch area in square feet

- 3SsnPorch: Three season porch area in square feet

- ScreenPorch: Screen porch area in square feet

- PoolArea: Pool area in square feet

- PoolQC: Pool quality

- Fence: Fence quality

- MiscFeature: Miscellaneous feature not covered in other categories

- MiscVal: $Value of miscellaneous feature

- MoSold: Month Sold

- YrSold: Year Sold

- SaleType: Type of sale

- SaleCondition: Condition of sale

Now we will analyse how many and which columns we need to transform from categorical to numerical:

In [7]:
col_types = train_data.dtypes

In [8]:
col_types

MSSubClass        object
MSZoning          object
LotFrontage      float64
LotArea            int64
Street            object
                  ...   
MoSold             int64
YrSold             int64
SaleType          object
SaleCondition     object
SalePrice          int64
Length: 80, dtype: object

In [9]:
col_types.value_counts()

object     44
int64      33
float64     3
Name: count, dtype: int64

There are 44 categorical columns and 36 numerical ones (one of them is the label).

We have many columns to detail and to analyse. So, we will only focus in the multivariate analysis, specifically in the correlation with the label __'SalePrice'__.

---

## Multivariate Analysis

In [10]:
train_data['YrSold'].value_counts()

YrSold
2009    338
2007    329
2006    314
2008    304
2010    175
Name: count, dtype: int64

In [11]:
train_data.corr(numeric_only=True)['SalePrice'].sort_values()

KitchenAbvGr    -0.135907
EnclosedPorch   -0.128578
OverallCond     -0.077856
YrSold          -0.028923
LowQualFinSF    -0.025606
MiscVal         -0.021190
BsmtHalfBath    -0.016844
BsmtFinSF2      -0.011378
3SsnPorch        0.044584
MoSold           0.046432
PoolArea         0.092404
ScreenPorch      0.111447
BedroomAbvGr     0.168213
BsmtUnfSF        0.214479
BsmtFullBath     0.227122
LotArea          0.263843
HalfBath         0.284108
OpenPorchSF      0.315856
2ndFlrSF         0.319334
WoodDeckSF       0.324413
LotFrontage      0.351799
BsmtFinSF1       0.386420
Fireplaces       0.466929
MasVnrArea       0.477493
GarageYrBlt      0.486362
YearRemodAdd     0.507101
YearBuilt        0.522897
TotRmsAbvGrd     0.533723
FullBath         0.560664
1stFlrSF         0.605852
TotalBsmtSF      0.613581
GarageArea       0.623431
GarageCars       0.640409
GrLivArea        0.708624
OverallQual      0.790982
SalePrice        1.000000
Name: SalePrice, dtype: float64

In [12]:
train_data.corr(numeric_only=True)['GarageArea'].sort_values()

OverallCond     -0.151521
EnclosedPorch   -0.121777
LowQualFinSF    -0.067601
KitchenAbvGr    -0.064433
MiscVal         -0.027400
YrSold          -0.027378
BsmtHalfBath    -0.024536
BsmtFinSF2      -0.018227
MoSold           0.027974
3SsnPorch        0.035087
ScreenPorch      0.051412
PoolArea         0.061047
BedroomAbvGr     0.065253
2ndFlrSF         0.138347
HalfBath         0.163549
BsmtFullBath     0.179189
LotArea          0.180403
BsmtUnfSF        0.183303
WoodDeckSF       0.224666
OpenPorchSF      0.241435
Fireplaces       0.269141
BsmtFinSF1       0.296970
TotRmsAbvGrd     0.337822
LotFrontage      0.344997
YearRemodAdd     0.371600
MasVnrArea       0.373066
FullBath         0.405656
GrLivArea        0.468997
YearBuilt        0.478954
TotalBsmtSF      0.486665
1stFlrSF         0.489782
OverallQual      0.562022
GarageYrBlt      0.564567
SalePrice        0.623431
GarageCars       0.882475
GarageArea       1.000000
Name: GarageArea, dtype: float64

The values shows that majority of the numerical features are relevant, in a first view, to predict a house's sale price.

Now, it's time to preprocess the data.

---

## Preparing data for Machine Learning 

Initially, to make our preprocessing easy, we will separate the data into:

**X_train**: (possible) training features.

**y_train**: labels.

**X_test**: (possible) test features.

In [13]:
features = test_data.columns
label = 'SalePrice'

X_train = train_data[features].copy()
y_train = train_data[label].copy()
X_test = test_data[features].copy()

In [14]:
categorical = list(X_train.select_dtypes(['object']).columns)
numerical = list(set(features).difference(set(categorical)))

### Treating numerical data

For numerical data, we will do the standard scaling to improve our model's training speed and to avoid exploding/vanishing gradients during gradient descent process, then avoiding model's divergence. To handle the missing values, we will do the mean imputation.

In [15]:
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

In [16]:
X_train['LotArea'] = np.log(X_train['LotArea'])
X_test['LotArea'] = np.log(X_test['LotArea'])

In [17]:
scaler = StandardScaler()

X_train[numerical] = scaler.fit_transform(X_train[numerical])
X_test[numerical] = scaler.transform(X_test[numerical])

In [18]:
imputer = SimpleImputer(strategy = 'mean')

X_train[numerical] = pd.DataFrame(imputer.fit_transform(X_train[numerical]), columns = numerical, index = X_train.index)
X_test[numerical] = pd.DataFrame(imputer.transform(X_test[numerical]), columns = numerical, index = X_test.index)

In [19]:
X_train[numerical].isnull().sum().sum()

0

### Treating categorical data

In [20]:
categorical_description = X_train[categorical].describe()
categorical_description

Unnamed: 0,MSSubClass,MSZoning,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,...,GarageType,GarageFinish,GarageQual,GarageCond,PavedDrive,PoolQC,Fence,MiscFeature,SaleType,SaleCondition
count,1460,1460,1460,91,1460,1460,1460,1460,1460,1460,...,1379,1379,1379,1379,1460,7,281,54,1460,1460
unique,15,5,2,2,4,4,2,5,3,25,...,6,3,5,5,3,3,4,4,9,6
top,20,RL,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,NAmes,...,Attchd,Unf,TA,TA,Y,Gd,MnPrv,Shed,WD,Normal
freq,536,1151,1454,50,925,1311,1459,1052,1382,225,...,870,605,1311,1326,1340,3,157,49,1267,1198


For missing values, we are going to use the mode imputation.

Then, we will separate in three groups:

- **unique = 2**: we will just transform to a boolean (0 or 1) numerical variable.

- **unique > 2 and non-ordered classes**: we will do one-hot encoding.

- **unique > 2 and ordered classes**: we will transform in ordered numerical variable.

Besides, we will scale the data using MinMaxScaler.

In [21]:
X_train.isnull().sum().sum()

7481

Mode imputation:

In [22]:
X_train.isnull().sum().sort_values(ascending=False).head(20)

PoolQC          1453
MiscFeature     1406
Alley           1369
Fence           1179
MasVnrType       872
FireplaceQu      690
GarageType        81
GarageQual        81
GarageCond        81
GarageFinish      81
BsmtExposure      38
BsmtFinType2      38
BsmtFinType1      37
BsmtCond          37
BsmtQual          37
Electrical         1
HalfBath           0
BedroomAbvGr       0
KitchenAbvGr       0
Fireplaces         0
dtype: int64

In [23]:
X_train = X_train.fillna('NA')
X_test = X_test.fillna('NA')

In [24]:
imputer = SimpleImputer(strategy = 'most_frequent')

X_train[categorical] = pd.DataFrame(imputer.fit_transform(X_train[categorical]), columns = categorical, index = X_train.index)
X_test[categorical] = pd.DataFrame(imputer.transform(X_test[categorical]), columns = categorical, index = X_test.index)

- #### unique = 2

In [25]:
from sklearn.preprocessing import OrdinalEncoder

In [26]:
unique2 = categorical_description.loc['unique'] == 2
unique2 = unique2[unique2 == True].index

unique2

Index(['Street', 'Alley', 'Utilities', 'CentralAir'], dtype='object')

In [27]:
ordinal_encoder = OrdinalEncoder(handle_unknown = 'use_encoded_value', unknown_value = -1)
X_train[unique2] = ordinal_encoder.fit_transform(X_train[unique2])
X_test[unique2] = ordinal_encoder.transform(X_test[unique2])

We will drop __'Utilities'__ and __'Street'__ because they are almost all of the same value, so it's correlation with the target variable won't be good.

In [28]:
X_train['Utilities'].value_counts()

Utilities
0.0    1459
1.0       1
Name: count, dtype: int64

In [29]:
X_train['Street'].value_counts()

Street
1.0    1454
0.0       6
Name: count, dtype: int64

In [30]:
X_train = X_train.drop(['Utilities', 'Street'], axis = 1)
X_test = X_test.drop(['Utilities', 'Street'], axis = 1)

- **unique > 2 and non-ordered classes**

In [31]:
uniquegr2 = categorical_description.loc['unique'] > 2
uniquegr2 = uniquegr2[uniquegr2 == True].index

uniquegr2

Index(['MSSubClass', 'MSZoning', 'LotShape', 'LandContour', 'LotConfig',
       'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType',
       'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd',
       'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual',
       'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating',
       'HeatingQC', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu',
       'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive',
       'PoolQC', 'Fence', 'MiscFeature', 'SaleType', 'SaleCondition'],
      dtype='object')

In [32]:
# To see all the columns
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

In [33]:
X_train[uniquegr2]

Unnamed: 0_level_0,MSSubClass,MSZoning,LotShape,LandContour,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinType2,Heating,HeatingQC,Electrical,KitchenQual,Functional,FireplaceQu,GarageType,GarageFinish,GarageQual,GarageCond,PavedDrive,PoolQC,Fence,MiscFeature,SaleType,SaleCondition
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1
1,60,RL,Reg,Lvl,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,Gable,CompShg,VinylSd,VinylSd,BrkFace,Gd,TA,PConc,Gd,TA,No,GLQ,Unf,GasA,Ex,SBrkr,Gd,Typ,,Attchd,RFn,TA,TA,Y,,,,WD,Normal
2,20,RL,Reg,Lvl,FR2,Gtl,Veenker,Feedr,Norm,1Fam,1Story,Gable,CompShg,MetalSd,MetalSd,,TA,TA,CBlock,Gd,TA,Gd,ALQ,Unf,GasA,Ex,SBrkr,TA,Typ,TA,Attchd,RFn,TA,TA,Y,,,,WD,Normal
3,60,RL,IR1,Lvl,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,Gable,CompShg,VinylSd,VinylSd,BrkFace,Gd,TA,PConc,Gd,TA,Mn,GLQ,Unf,GasA,Ex,SBrkr,Gd,Typ,TA,Attchd,RFn,TA,TA,Y,,,,WD,Normal
4,70,RL,IR1,Lvl,Corner,Gtl,Crawfor,Norm,Norm,1Fam,2Story,Gable,CompShg,Wd Sdng,Wd Shng,,TA,TA,BrkTil,TA,Gd,No,ALQ,Unf,GasA,Gd,SBrkr,Gd,Typ,Gd,Detchd,Unf,TA,TA,Y,,,,WD,Abnorml
5,60,RL,IR1,Lvl,FR2,Gtl,NoRidge,Norm,Norm,1Fam,2Story,Gable,CompShg,VinylSd,VinylSd,BrkFace,Gd,TA,PConc,Gd,TA,Av,GLQ,Unf,GasA,Ex,SBrkr,Gd,Typ,TA,Attchd,RFn,TA,TA,Y,,,,WD,Normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1456,60,RL,Reg,Lvl,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,Gable,CompShg,VinylSd,VinylSd,,TA,TA,PConc,Gd,TA,No,Unf,Unf,GasA,Ex,SBrkr,TA,Typ,TA,Attchd,RFn,TA,TA,Y,,,,WD,Normal
1457,20,RL,Reg,Lvl,Inside,Gtl,NWAmes,Norm,Norm,1Fam,1Story,Gable,CompShg,Plywood,Plywood,Stone,TA,TA,CBlock,Gd,TA,No,ALQ,Rec,GasA,TA,SBrkr,TA,Min1,TA,Attchd,Unf,TA,TA,Y,,MnPrv,,WD,Normal
1458,70,RL,Reg,Lvl,Inside,Gtl,Crawfor,Norm,Norm,1Fam,2Story,Gable,CompShg,CemntBd,CmentBd,,Ex,Gd,Stone,TA,Gd,No,GLQ,Unf,GasA,Ex,SBrkr,Gd,Typ,Gd,Attchd,RFn,TA,TA,Y,,GdPrv,Shed,WD,Normal
1459,20,RL,Reg,Lvl,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,Hip,CompShg,MetalSd,MetalSd,,TA,TA,CBlock,TA,TA,Mn,GLQ,Rec,GasA,Gd,FuseA,Gd,Typ,,Attchd,Unf,TA,TA,Y,,,,WD,Normal


In [34]:
X_train['GarageFinish'].value_counts()

GarageFinish
Unf    605
RFn    422
Fin    352
NA      81
Name: count, dtype: int64

In [35]:
ordered_classes = ['Fence', 'PoolQC', 'GarageCond', 'GarageQual', 'GarageFinish', 'FireplaceQu', 'Functional',
                  'KitchenQual', 'HeatingQC', 'BsmtCond', 'BsmtQual', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2',
                  'ExterCond', 'ExterQual', 'LotShape', 'LandSlope', 'PavedDrive']

unordered_classes = list(set(uniquegr2).difference(ordered_classes))

Now, we can do the transformations:

In [36]:
from sklearn.preprocessing import OneHotEncoder

In [37]:
onehot_encoder = OneHotEncoder(handle_unknown = 'infrequent_if_exist', min_frequency = 10)
onehot_encoder.fit(X_train[unordered_classes])

train_encoded = onehot_encoder.transform(X_train[unordered_classes]).toarray()
train_encoded = pd.DataFrame(train_encoded, columns = onehot_encoder.get_feature_names_out(), index = X_train.index)

test_encoded = onehot_encoder.transform(X_test[unordered_classes]).toarray()
test_encoded = pd.DataFrame(test_encoded, columns = onehot_encoder.get_feature_names_out(), index = X_test.index)

X_train = pd.concat([X_train, train_encoded], axis = 1)
X_test = pd.concat([X_test, test_encoded], axis = 1)

X_train = X_train.drop(columns = unordered_classes)
X_test = X_test.drop(columns = unordered_classes)

- **unique > 2 and ordered classes**

In [38]:
categories = {'Fence':['GdPrv', 'MnPrv', 'GdWo', 'MnWw', 'NA'],
              'PoolQC':['Ex', 'Gd', 'TA', 'Fa', 'NA'],
              'GarageCond':['Ex', 'Gd', 'TA', 'Fa', 'Po', 'NA'],
              'GarageQual':['Ex', 'Gd', 'TA', 'Fa', 'Po', 'NA'],
              'GarageFinish':['Fin', 'RFn', 'Unf', 'NA'],
              'FireplaceQu':['Ex', 'Gd', 'TA', 'Fa', 'Po', 'NA'],
              'Functional':['Typ', 'Min1', 'Min2', 'Mod', 'Maj1', 'Maj2', 'Sev', 'Sal'],
              'KitchenQual':['Ex', 'Gd', 'TA', 'Fa', 'Po'],
              'HeatingQC':['Ex', 'Gd', 'TA', 'Fa', 'Po'],
              'BsmtCond':['Ex', 'Gd', 'TA', 'Fa', 'Po', 'NA'],
              'BsmtQual':['Ex', 'Gd', 'TA', 'Fa', 'Po', 'NA'],
              'BsmtExposure':['Gd','Av','Mn','No','NA'],
              'BsmtFinType1':['GLQ','ALQ','BLQ','Rec','LwQ','Unf','NA'],
              'BsmtFinType2':['GLQ','ALQ','BLQ','Rec','LwQ','Unf','NA'],
              'ExterCond':['Ex', 'Gd', 'TA', 'Fa', 'Po'],
              'ExterQual':['Ex', 'Gd', 'TA', 'Fa', 'Po'],
              'LotShape':['Reg', 'IR1', 'IR2', 'IR3'],
              'LandSlope':['Gtl', 'Mod', 'Sev'],
              'PavedDrive':['Y','P','N']
              }


In [39]:
for key in categories.keys():
    ordinal_encoder = OrdinalEncoder(categories = [categories[key]], handle_unknown = 'use_encoded_value', unknown_value = -1)
    X_train[key] = ordinal_encoder.fit_transform(X_train[key].values.reshape(-1, 1))
    X_test[key] = ordinal_encoder.transform(X_test[key].values.reshape(-1, 1))

Finally, the scaling:

In [40]:
X_train

Unnamed: 0_level_0,LotFrontage,LotArea,Alley,LotShape,LandSlope,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,ExterQual,ExterCond,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,HeatingQC,CentralAir,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscVal,MoSold,YrSold,Foundation_BrkTil,Foundation_CBlock,Foundation_PConc,Foundation_Slab,Foundation_infrequent_sklearn,MiscFeature_NA,MiscFeature_Shed,MiscFeature_infrequent_sklearn,Condition1_Artery,Condition1_Feedr,Condition1_Norm,Condition1_PosN,Condition1_RRAe,Condition1_RRAn,Condition1_infrequent_sklearn,RoofStyle_Flat,RoofStyle_Gable,RoofStyle_Gambrel,RoofStyle_Hip,RoofStyle_infrequent_sklearn,Exterior2nd_AsbShng,Exterior2nd_BrkFace,Exterior2nd_CmentBd,Exterior2nd_HdBoard,Exterior2nd_ImStucc,Exterior2nd_MetalSd,Exterior2nd_Plywood,Exterior2nd_Stucco,Exterior2nd_VinylSd,Exterior2nd_Wd Sdng,Exterior2nd_Wd Shng,Exterior2nd_infrequent_sklearn,HouseStyle_1.5Fin,HouseStyle_1.5Unf,HouseStyle_1Story,HouseStyle_2.5Unf,HouseStyle_2Story,HouseStyle_SFoyer,HouseStyle_SLvl,HouseStyle_infrequent_sklearn,BldgType_1Fam,BldgType_2fmCon,BldgType_Duplex,BldgType_Twnhs,BldgType_TwnhsE,LotConfig_Corner,LotConfig_CulDSac,LotConfig_FR2,LotConfig_Inside,LotConfig_infrequent_sklearn,Heating_GasA,Heating_GasW,Heating_infrequent_sklearn,SaleCondition_Abnorml,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial,SaleCondition_infrequent_sklearn,GarageType_Attchd,GarageType_Basment,GarageType_BuiltIn,GarageType_Detchd,GarageType_NA,GarageType_infrequent_sklearn,Electrical_FuseA,Electrical_FuseF,Electrical_SBrkr,Electrical_infrequent_sklearn,Exterior1st_AsbShng,Exterior1st_BrkFace,Exterior1st_CemntBd,Exterior1st_HdBoard,Exterior1st_MetalSd,Exterior1st_Plywood,Exterior1st_Stucco,Exterior1st_VinylSd,Exterior1st_Wd Sdng,Exterior1st_WdShing,Exterior1st_infrequent_sklearn,Condition2_Norm,Condition2_infrequent_sklearn,MSZoning_C (all),MSZoning_FV,MSZoning_RH,MSZoning_RL,MSZoning_RM,LandContour_Bnk,LandContour_HLS,LandContour_Low,LandContour_Lvl,SaleType_COD,SaleType_New,SaleType_WD,SaleType_infrequent_sklearn,Neighborhood_Blmngtn,Neighborhood_BrDale,Neighborhood_BrkSide,Neighborhood_ClearCr,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,Neighborhood_infrequent_sklearn,MSSubClass_120,MSSubClass_160,MSSubClass_180,MSSubClass_190,MSSubClass_20,MSSubClass_30,MSSubClass_45,MSSubClass_50,MSSubClass_60,MSSubClass_70,MSSubClass_75,MSSubClass_80,MSSubClass_85,MSSubClass_90,MSSubClass_infrequent_sklearn,MasVnrType_BrkCmn,MasVnrType_BrkFace,MasVnrType_NA,MasVnrType_Stone,RoofMatl_CompShg,RoofMatl_Tar&Grv,RoofMatl_infrequent_sklearn
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1
1,-0.208034,-0.133231,1.0,0.0,0.0,0.651479,-0.517200,1.050994,0.878668,0.510015,1.0,2.0,1.0,2.0,3.0,0.0,0.575425,5.0,-0.288653,-0.944591,-0.459303,0.0,1.0,-0.793434,1.161852,-0.120242,0.370333,1.107810,-0.241061,0.789741,1.227585,0.163779,-0.211454,1.0,0.912210,0.0,-0.951226,5.0,0.992426,1.0,0.311725,0.351000,2.0,2.0,0.0,-0.752176,0.216503,-0.359325,-0.116339,-0.270208,-0.068692,4.0,4.0,-0.087688,-1.599111,0.138777,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
2,0.409895,0.113442,1.0,0.0,0.0,-0.071836,2.179628,0.156734,-0.429577,-0.572835,2.0,2.0,1.0,2.0,0.0,1.0,1.171992,5.0,-0.288653,-0.641228,0.466465,0.0,1.0,0.257140,-0.795163,-0.120242,-0.482512,-0.819964,3.948809,0.789741,-0.761621,0.163779,-0.211454,2.0,-0.318683,0.0,0.600495,2.0,-0.101543,1.0,0.311725,-0.060731,2.0,2.0,0.0,1.626195,-0.704483,-0.359325,-0.116339,-0.270208,-0.068692,4.0,4.0,-0.087688,-0.489110,-0.614439,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
3,-0.084449,0.420061,1.0,1.0,0.0,0.651479,-0.517200,0.984752,0.830215,0.322174,1.0,2.0,1.0,2.0,2.0,0.0,0.092907,5.0,-0.288653,-0.301643,-0.313369,0.0,1.0,-0.627826,1.189351,-0.120242,0.515013,1.107810,-0.241061,0.789741,1.227585,0.163779,-0.211454,1.0,-0.318683,0.0,0.600495,2.0,0.911391,1.0,0.311725,0.631726,2.0,2.0,0.0,-0.752176,-0.070361,-0.359325,-0.116339,-0.270208,-0.068692,4.0,4.0,-0.087688,0.990891,0.138777,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
4,-0.414011,0.103347,1.0,1.0,0.0,0.651479,-0.517200,-1.863632,-0.720298,-0.572835,2.0,2.0,2.0,1.0,3.0,1.0,-0.499274,5.0,-0.288653,-0.061670,-0.687324,1.0,1.0,-0.521734,0.937276,-0.120242,0.383659,1.107810,-0.241061,-1.026041,-0.761621,0.163779,-0.211454,1.0,0.296763,0.0,0.600495,1.0,0.789839,2.0,1.650307,0.790804,2.0,2.0,0.0,-0.752176,-0.176048,4.092524,-0.116339,-0.270208,-0.068692,4.0,4.0,-0.087688,-1.599111,-1.367655,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
5,0.574676,0.878409,1.0,1.0,0.0,1.374795,-0.517200,0.951632,0.733308,1.360826,1.0,2.0,1.0,2.0,1.0,0.0,0.463568,5.0,-0.288653,-0.174865,0.199680,0.0,1.0,-0.045611,1.617877,-0.120242,1.299326,1.107810,-0.241061,0.789741,1.227585,1.390023,-0.211454,1.0,1.527656,0.0,0.600495,2.0,0.870874,1.0,1.650307,1.698485,2.0,2.0,0.0,0.780197,0.563760,-0.359325,-0.116339,-0.270208,-0.068692,4.0,4.0,-0.087688,2.100892,0.138777,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1456,-0.331620,-0.259188,1.0,0.0,0.0,-0.071836,-0.517200,0.918511,0.733308,-0.572835,2.0,2.0,1.0,2.0,3.0,5.0,-0.973018,5.0,-0.288653,0.873321,-0.238122,0.0,1.0,-0.542435,0.795198,-0.120242,0.250402,-0.819964,-0.241061,0.789741,1.227585,0.163779,-0.211454,2.0,0.296763,0.0,0.600495,2.0,0.830356,1.0,0.311725,-0.060731,2.0,2.0,0.0,-0.752176,-0.100558,-0.359325,-0.116339,-0.270208,-0.068692,4.0,4.0,-0.087688,0.620891,-0.614439,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
1457,0.615871,0.725419,1.0,0.0,0.0,-0.071836,0.381743,0.222975,0.151865,0.084610,2.0,2.0,1.0,2.0,3.0,1.0,0.759659,3.0,0.722112,0.049262,1.104925,2.0,1.0,2.355701,-0.795163,-0.120242,1.061367,1.107810,-0.241061,0.789741,-0.761621,0.163779,-0.211454,2.0,0.296763,1.0,2.152216,2.0,-0.020508,2.0,0.311725,0.126420,2.0,2.0,0.0,2.033231,-0.704483,-0.359325,-0.116339,-0.270208,-0.068692,4.0,1.0,-0.087688,-1.599111,1.645210,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
1458,-0.166839,-0.002325,1.0,0.0,0.0,0.651479,3.078570,-1.002492,1.024029,-0.572835,0.0,1.0,2.0,1.0,3.0,0.0,-0.369871,5.0,-0.288653,0.701265,0.215641,0.0,1.0,0.065656,1.844744,-0.120242,1.569647,-0.819964,-0.241061,0.789741,-0.761621,1.390023,-0.211454,1.0,1.527656,0.0,2.152216,1.0,-1.519651,1.0,-1.026858,-1.033914,2.0,2.0,0.0,-0.752176,0.201405,-0.359325,-0.116339,-0.270208,-0.068692,4.0,0.0,4.953112,-0.489110,1.645210,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
1459,-0.084449,0.136861,1.0,0.0,0.0,-0.795151,0.381743,-0.704406,0.539493,-0.572835,2.0,2.0,2.0,2.0,2.0,0.0,-0.865548,3.0,6.092188,-1.284176,0.046905,1.0,1.0,-0.218982,-0.795163,-0.120242,-0.832788,1.107810,-0.241061,-1.026041,-0.761621,-1.062465,-0.211454,1.0,-0.934130,0.0,-0.951226,5.0,-1.154995,2.0,-1.026858,-1.090059,2.0,2.0,0.0,2.168910,-0.704483,1.473789,-0.116339,-0.270208,-0.068692,4.0,4.0,-0.087688,-0.859110,1.645210,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0


In [41]:
from sklearn.preprocessing import MinMaxScaler

In [42]:
to_scale = set(X_train.columns).difference(numerical)

In [43]:
len(X_train.columns)

197

In [44]:
scaler = MinMaxScaler()
to_scale = list(set(X_train.columns).difference(numerical))

X_train[to_scale] = scaler.fit_transform(X_train[to_scale])
X_test[to_scale] = scaler.transform(X_test[to_scale])

In [45]:
X_train.head()

Unnamed: 0_level_0,LotFrontage,LotArea,Alley,LotShape,LandSlope,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,ExterQual,ExterCond,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,HeatingQC,CentralAir,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscVal,MoSold,YrSold,Foundation_BrkTil,Foundation_CBlock,Foundation_PConc,Foundation_Slab,Foundation_infrequent_sklearn,MiscFeature_NA,MiscFeature_Shed,MiscFeature_infrequent_sklearn,Condition1_Artery,Condition1_Feedr,Condition1_Norm,Condition1_PosN,Condition1_RRAe,Condition1_RRAn,Condition1_infrequent_sklearn,RoofStyle_Flat,RoofStyle_Gable,RoofStyle_Gambrel,RoofStyle_Hip,RoofStyle_infrequent_sklearn,Exterior2nd_AsbShng,Exterior2nd_BrkFace,Exterior2nd_CmentBd,Exterior2nd_HdBoard,Exterior2nd_ImStucc,Exterior2nd_MetalSd,Exterior2nd_Plywood,Exterior2nd_Stucco,Exterior2nd_VinylSd,Exterior2nd_Wd Sdng,Exterior2nd_Wd Shng,Exterior2nd_infrequent_sklearn,HouseStyle_1.5Fin,HouseStyle_1.5Unf,HouseStyle_1Story,HouseStyle_2.5Unf,HouseStyle_2Story,HouseStyle_SFoyer,HouseStyle_SLvl,HouseStyle_infrequent_sklearn,BldgType_1Fam,BldgType_2fmCon,BldgType_Duplex,BldgType_Twnhs,BldgType_TwnhsE,LotConfig_Corner,LotConfig_CulDSac,LotConfig_FR2,LotConfig_Inside,LotConfig_infrequent_sklearn,Heating_GasA,Heating_GasW,Heating_infrequent_sklearn,SaleCondition_Abnorml,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial,SaleCondition_infrequent_sklearn,GarageType_Attchd,GarageType_Basment,GarageType_BuiltIn,GarageType_Detchd,GarageType_NA,GarageType_infrequent_sklearn,Electrical_FuseA,Electrical_FuseF,Electrical_SBrkr,Electrical_infrequent_sklearn,Exterior1st_AsbShng,Exterior1st_BrkFace,Exterior1st_CemntBd,Exterior1st_HdBoard,Exterior1st_MetalSd,Exterior1st_Plywood,Exterior1st_Stucco,Exterior1st_VinylSd,Exterior1st_Wd Sdng,Exterior1st_WdShing,Exterior1st_infrequent_sklearn,Condition2_Norm,Condition2_infrequent_sklearn,MSZoning_C (all),MSZoning_FV,MSZoning_RH,MSZoning_RL,MSZoning_RM,LandContour_Bnk,LandContour_HLS,LandContour_Low,LandContour_Lvl,SaleType_COD,SaleType_New,SaleType_WD,SaleType_infrequent_sklearn,Neighborhood_Blmngtn,Neighborhood_BrDale,Neighborhood_BrkSide,Neighborhood_ClearCr,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,Neighborhood_infrequent_sklearn,MSSubClass_120,MSSubClass_160,MSSubClass_180,MSSubClass_190,MSSubClass_20,MSSubClass_30,MSSubClass_45,MSSubClass_50,MSSubClass_60,MSSubClass_70,MSSubClass_75,MSSubClass_80,MSSubClass_85,MSSubClass_90,MSSubClass_infrequent_sklearn,MasVnrType_BrkCmn,MasVnrType_BrkFace,MasVnrType_NA,MasVnrType_Stone,RoofMatl_CompShg,RoofMatl_Tar&Grv,RoofMatl_infrequent_sklearn
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1
1,-0.208034,-0.133231,0.5,0.0,0.0,0.651479,-0.5172,1.050994,0.878668,0.510015,0.333333,0.5,0.2,0.25,0.75,0.0,0.575425,0.833333,-0.288653,-0.944591,-0.459303,0.0,1.0,-0.793434,1.161852,-0.120242,0.370333,1.10781,-0.241061,0.789741,1.227585,0.163779,-0.211454,0.333333,0.91221,0.0,-0.951226,1.0,0.992426,0.333333,0.311725,0.351,0.4,0.4,0.0,-0.752176,0.216503,-0.359325,-0.116339,-0.270208,-0.068692,1.0,1.0,-0.087688,-1.599111,0.138777,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
2,0.409895,0.113442,0.5,0.0,0.0,-0.071836,2.179628,0.156734,-0.429577,-0.572835,0.666667,0.5,0.2,0.25,0.0,0.166667,1.171992,0.833333,-0.288653,-0.641228,0.466465,0.0,1.0,0.25714,-0.795163,-0.120242,-0.482512,-0.819964,3.948809,0.789741,-0.761621,0.163779,-0.211454,0.666667,-0.318683,0.0,0.600495,0.4,-0.101543,0.333333,0.311725,-0.060731,0.4,0.4,0.0,1.626195,-0.704483,-0.359325,-0.116339,-0.270208,-0.068692,1.0,1.0,-0.087688,-0.48911,-0.614439,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
3,-0.084449,0.420061,0.5,0.333333,0.0,0.651479,-0.5172,0.984752,0.830215,0.322174,0.333333,0.5,0.2,0.25,0.5,0.0,0.092907,0.833333,-0.288653,-0.301643,-0.313369,0.0,1.0,-0.627826,1.189351,-0.120242,0.515013,1.10781,-0.241061,0.789741,1.227585,0.163779,-0.211454,0.333333,-0.318683,0.0,0.600495,0.4,0.911391,0.333333,0.311725,0.631726,0.4,0.4,0.0,-0.752176,-0.070361,-0.359325,-0.116339,-0.270208,-0.068692,1.0,1.0,-0.087688,0.990891,0.138777,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
4,-0.414011,0.103347,0.5,0.333333,0.0,0.651479,-0.5172,-1.863632,-0.720298,-0.572835,0.666667,0.5,0.4,0.0,0.75,0.166667,-0.499274,0.833333,-0.288653,-0.06167,-0.687324,0.25,1.0,-0.521734,0.937276,-0.120242,0.383659,1.10781,-0.241061,-1.026041,-0.761621,0.163779,-0.211454,0.333333,0.296763,0.0,0.600495,0.2,0.789839,0.666667,1.650307,0.790804,0.4,0.4,0.0,-0.752176,-0.176048,4.092524,-0.116339,-0.270208,-0.068692,1.0,1.0,-0.087688,-1.599111,-1.367655,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
5,0.574676,0.878409,0.5,0.333333,0.0,1.374795,-0.5172,0.951632,0.733308,1.360826,0.333333,0.5,0.2,0.25,0.25,0.0,0.463568,0.833333,-0.288653,-0.174865,0.19968,0.0,1.0,-0.045611,1.617877,-0.120242,1.299326,1.10781,-0.241061,0.789741,1.227585,1.390023,-0.211454,0.333333,1.527656,0.0,0.600495,0.4,0.870874,0.333333,1.650307,1.698485,0.4,0.4,0.0,0.780197,0.56376,-0.359325,-0.116339,-0.270208,-0.068692,1.0,1.0,-0.087688,2.100892,0.138777,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0


## Transforming the labels

In [50]:
y_train = np.log(y_train)

### To make this easy, we will save the preprocessed data in .csv files.

In [46]:
y_train_path = os.path.join('data', 'preprocessed_y_train.csv')
X_train_path = os.path.join('data', 'preprocessed_X_train.csv')
X_test_path = os.path.join('data', 'preprocessed_X_test.csv')

y_train.to_csv(y_train_path)
X_train.to_csv(X_train_path)
X_test.to_csv(X_test_path)

---

### Final correlations

In [47]:
train_data = pd.concat([X_train, y_train], axis = 1)

In [48]:
train_data.corr()

Unnamed: 0,LotFrontage,LotArea,Alley,LotShape,LandSlope,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,ExterQual,ExterCond,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,HeatingQC,CentralAir,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscVal,MoSold,YrSold,Foundation_BrkTil,Foundation_CBlock,Foundation_PConc,Foundation_Slab,Foundation_infrequent_sklearn,MiscFeature_NA,MiscFeature_Shed,MiscFeature_infrequent_sklearn,Condition1_Artery,Condition1_Feedr,Condition1_Norm,Condition1_PosN,Condition1_RRAe,Condition1_RRAn,Condition1_infrequent_sklearn,RoofStyle_Flat,RoofStyle_Gable,RoofStyle_Gambrel,RoofStyle_Hip,RoofStyle_infrequent_sklearn,Exterior2nd_AsbShng,Exterior2nd_BrkFace,Exterior2nd_CmentBd,Exterior2nd_HdBoard,Exterior2nd_ImStucc,Exterior2nd_MetalSd,Exterior2nd_Plywood,Exterior2nd_Stucco,Exterior2nd_VinylSd,Exterior2nd_Wd Sdng,Exterior2nd_Wd Shng,Exterior2nd_infrequent_sklearn,HouseStyle_1.5Fin,HouseStyle_1.5Unf,HouseStyle_1Story,HouseStyle_2.5Unf,HouseStyle_2Story,HouseStyle_SFoyer,HouseStyle_SLvl,HouseStyle_infrequent_sklearn,BldgType_1Fam,BldgType_2fmCon,BldgType_Duplex,BldgType_Twnhs,BldgType_TwnhsE,LotConfig_Corner,LotConfig_CulDSac,LotConfig_FR2,LotConfig_Inside,LotConfig_infrequent_sklearn,Heating_GasA,Heating_GasW,Heating_infrequent_sklearn,SaleCondition_Abnorml,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial,SaleCondition_infrequent_sklearn,GarageType_Attchd,GarageType_Basment,GarageType_BuiltIn,GarageType_Detchd,GarageType_NA,GarageType_infrequent_sklearn,Electrical_FuseA,Electrical_FuseF,Electrical_SBrkr,Electrical_infrequent_sklearn,Exterior1st_AsbShng,Exterior1st_BrkFace,Exterior1st_CemntBd,Exterior1st_HdBoard,Exterior1st_MetalSd,Exterior1st_Plywood,Exterior1st_Stucco,Exterior1st_VinylSd,Exterior1st_Wd Sdng,Exterior1st_WdShing,Exterior1st_infrequent_sklearn,Condition2_Norm,Condition2_infrequent_sklearn,MSZoning_C (all),MSZoning_FV,MSZoning_RH,MSZoning_RL,MSZoning_RM,LandContour_Bnk,LandContour_HLS,LandContour_Low,LandContour_Lvl,SaleType_COD,SaleType_New,SaleType_WD,SaleType_infrequent_sklearn,Neighborhood_Blmngtn,Neighborhood_BrDale,Neighborhood_BrkSide,Neighborhood_ClearCr,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,Neighborhood_infrequent_sklearn,MSSubClass_120,MSSubClass_160,MSSubClass_180,MSSubClass_190,MSSubClass_20,MSSubClass_30,MSSubClass_45,MSSubClass_50,MSSubClass_60,MSSubClass_70,MSSubClass_75,MSSubClass_80,MSSubClass_85,MSSubClass_90,MSSubClass_infrequent_sklearn,MasVnrType_BrkCmn,MasVnrType_BrkFace,MasVnrType_NA,MasVnrType_Stone,RoofMatl_CompShg,RoofMatl_Tar&Grv,RoofMatl_infrequent_sklearn,SalePrice
LotFrontage,1.000000,0.568206,-0.065017,0.197078,0.067608,0.234196,-0.052820,0.117598,0.082746,0.179283,-0.165052,0.015562,-0.141798,-0.047242,-0.162264,-0.074168,0.215828,-0.018695,0.043340,0.122156,0.363358,-0.088956,0.068666,0.414266,0.072483,0.036849,0.368392,0.091481,-0.006419,0.180424,0.048258,0.237023,-0.005805,-0.169389,0.320146,-0.023432,0.235755,-0.225856,0.064324,-0.217695,0.269729,0.323663,-0.108565,-0.103817,-0.089073,0.077106,0.137454,0.009790,0.062335,0.037684,0.180868,-0.234893,-0.030812,0.001168,0.010158,0.006768,-0.130029,0.023369,0.056171,-0.022497,0.030446,-0.007426,0.007808,-0.000080,0.015476,0.055129,-0.077071,0.027601,-0.028583,-0.021217,0.131596,0.082676,-0.175395,-0.043370,0.165704,0.034440,-0.042274,-0.000503,-0.070788,0.002734,0.043585,-0.121578,0.068381,0.039966,0.083830,0.025756,-0.066365,-0.057361,-0.085661,-0.064053,0.065962,-0.029681,0.007361,-0.076110,0.041450,0.029332,0.371545,-0.021882,0.012195,-0.333971,-0.299456,0.227633,-0.057831,-0.038008,-0.148562,0.001667,0.005407,0.010178,-0.019648,-0.021846,-0.018090,0.015818,-0.072074,0.124842,-0.037020,0.214428,0.041244,0.076699,-0.243313,-0.096993,0.014300,-0.049307,-0.028922,0.062871,-0.028357,-0.037992,0.021888,-0.076761,0.015076,-0.119003,0.010989,0.048668,0.074592,0.029545,-0.031814,0.029589,-0.028183,0.028183,-0.001320,-0.090781,-0.043224,0.325129,-0.307167,0.038613,0.063071,0.045367,-0.087050,-0.014095,0.126580,-0.089928,-0.015946,-0.093002,-0.231824,-0.101868,0.039548,0.021077,0.012191,-0.020765,0.066219,-0.050813,-0.183786,0.000208,0.102769,0.072192,0.135648,0.125157,-0.092140,-0.061426,0.029869,0.011454,-0.055482,-0.035251,0.059107,-0.026029,-0.128239,-0.214652,-0.354548,-0.169173,-0.020032,0.209123,-0.089600,-0.062229,-0.080799,0.195155,-0.037886,0.033543,0.060384,0.005701,0.012195,-0.035799,0.021149,0.070696,-0.120436,0.086247,-0.155745,0.099889,0.118608,0.334901
LotArea,0.568206,1.000000,-0.102316,0.360950,0.255881,0.178215,-0.006305,0.021937,0.027670,0.123308,-0.103355,-0.038401,-0.081702,-0.026879,-0.212134,-0.043052,0.230969,-0.059631,0.092214,0.077247,0.351838,-0.047097,0.057671,0.443869,0.080465,0.012810,0.394577,0.138273,0.045218,0.179187,0.038717,0.279176,0.001678,-0.118496,0.360129,0.022416,0.327754,-0.275547,-0.019684,-0.190834,0.272007,0.322045,-0.145972,-0.137060,-0.023293,0.199846,0.147224,0.010951,0.055700,0.097381,0.091791,-0.100568,-0.003270,0.047791,0.010521,-0.027164,-0.047922,0.034122,-0.004257,-0.012064,0.014211,-0.079816,0.072575,0.034112,0.005031,0.021346,-0.082990,0.054224,0.035433,0.057726,0.058582,0.143136,-0.185677,0.001585,0.148774,0.052337,-0.032791,0.055465,-0.152360,0.017084,-0.010516,-0.132271,0.160777,0.005303,0.035862,0.050250,-0.044417,-0.064661,0.005033,-0.051499,0.063116,-0.002677,-0.047072,-0.103630,0.038198,0.030117,0.494274,0.043622,0.012114,-0.469607,-0.418659,0.096141,0.209135,0.007723,-0.197532,-0.019146,-0.024728,0.060766,-0.031656,-0.041526,0.016973,-0.004322,-0.018264,0.061605,-0.013795,0.206957,0.007468,0.076297,-0.214002,-0.140278,0.059532,-0.018726,-0.027047,0.033494,-0.019947,-0.046878,0.065189,-0.138938,0.003688,-0.128593,0.100719,0.006645,0.022207,0.078631,-0.003889,0.019974,-0.033856,0.033856,0.001886,-0.186757,-0.052386,0.460918,-0.405321,0.016084,0.071478,0.246582,-0.180058,-0.020645,0.058056,-0.036543,-0.001446,-0.206738,-0.329596,-0.099719,0.216391,0.018230,0.063025,0.000603,0.078170,-0.047513,-0.299686,0.051593,0.061974,0.109268,0.123028,0.048653,-0.095835,-0.038008,0.044773,0.026881,-0.137662,-0.014213,0.160589,0.067460,-0.201678,-0.284807,-0.528867,-0.236357,0.048166,0.225843,-0.090755,-0.052343,-0.009399,0.210105,0.013581,0.028329,0.059665,0.001361,0.012114,0.018751,0.067873,0.006192,-0.044357,0.042644,-0.201957,0.113926,0.167179,0.388520
Alley,-0.065017,-0.102316,1.000000,0.036179,0.015513,0.116905,-0.098240,0.223182,0.130684,0.083449,-0.103079,0.009759,-0.101486,-0.031892,-0.019730,-0.062404,0.038292,0.011613,-0.015963,0.022355,0.056456,-0.092434,0.104749,0.023936,0.004484,0.036846,0.024743,0.025543,-0.028541,0.095105,0.073401,-0.057889,-0.032155,-0.101764,-0.052932,-0.018815,0.002202,-0.008229,0.171163,-0.105901,0.065771,0.056943,-0.088455,-0.084988,-0.097880,-0.015284,0.037247,-0.065148,0.002873,0.003621,0.001697,-0.001576,0.052213,-0.013881,-0.011274,0.015176,-0.147279,-0.050339,0.132523,0.003193,0.037007,0.053324,-0.056349,0.001448,-0.164741,0.017974,0.061994,0.027051,0.002152,0.003326,0.002516,0.002341,-0.019701,0.065625,0.005276,0.001945,-0.091528,0.024414,0.005113,0.002172,0.002051,0.072310,0.008107,-0.058926,0.052567,-0.094669,-0.047672,0.002681,-0.089795,-0.025730,0.030053,-0.029584,0.034239,-0.013479,0.005331,0.039009,-0.129429,-0.015399,-0.024869,0.085462,0.150387,0.004436,0.006479,0.004505,-0.009266,0.001295,0.071276,-0.071849,-0.025730,0.028363,0.002248,-0.067919,-0.054461,0.076216,0.001295,0.057955,0.002836,0.006255,-0.034909,-0.053956,-0.024699,-0.038247,-0.078088,0.090139,-0.092503,-0.115138,0.019741,0.005157,0.002816,0.071773,0.006981,-0.081356,0.064180,-0.108238,-0.017425,0.001714,-0.002516,0.002516,0.068599,0.324673,0.055320,-0.006079,-0.212957,-0.021767,-0.010439,0.003927,0.018870,-0.011929,0.076877,-0.042050,-0.036565,0.002681,0.002600,0.075277,0.003454,0.008358,-0.010248,-0.058491,0.005907,-0.048402,0.002681,0.004603,0.002941,0.005666,0.004198,0.005828,-0.290673,0.045568,0.005707,0.005069,0.285921,0.003260,0.004038,0.002152,0.002152,0.052589,0.207832,0.002051,-0.035112,0.047278,-0.123829,-0.028147,-0.102278,0.012534,-0.077834,-0.023760,0.005024,0.002911,-0.024869,0.053797,0.002516,0.040201,-0.059451,0.036767,-0.003326,0.002152,0.002516,0.083121
LotShape,0.197078,0.360950,0.036179,1.000000,0.144248,0.198994,-0.033747,0.229365,0.175488,0.089025,-0.188668,-0.014073,-0.211557,-0.108252,-0.183540,-0.111492,0.157718,-0.061513,0.060419,0.014179,0.200469,-0.119556,0.099138,0.189251,0.089380,-0.007973,0.212741,0.064638,0.057729,0.184213,0.116576,0.060028,-0.094870,-0.157933,0.137148,-0.023444,0.202019,-0.193112,0.196807,-0.236527,0.194984,0.173472,-0.109218,-0.102864,-0.103643,0.161717,0.093135,-0.095092,0.033801,0.065182,0.047100,-0.053321,0.061751,0.028638,0.026617,-0.037391,-0.112175,-0.087387,0.163160,-0.044393,0.019934,-0.012196,0.006516,0.019314,-0.076510,-0.010618,-0.025174,0.064826,-0.020281,0.074613,0.091912,0.083836,-0.024996,-0.047495,0.021492,-0.025165,-0.072525,0.025346,-0.044415,0.035414,-0.001173,-0.091020,0.087493,0.012334,0.094684,-0.077213,-0.048128,-0.043213,-0.106744,-0.056930,-0.000864,-0.020281,0.087786,-0.023240,0.019772,-0.004236,0.142780,-0.078804,-0.058584,-0.108246,-0.046211,0.066253,0.348190,0.038756,-0.260769,-0.014247,0.048725,-0.025037,-0.044856,-0.028889,-0.037777,-0.011787,-0.000143,0.046144,-0.014247,0.143509,-0.018233,0.133870,-0.178567,-0.082581,-0.024776,-0.083288,-0.070061,0.102392,0.019314,-0.062402,0.010281,-0.040585,0.043830,-0.098043,0.058051,-0.010934,0.100391,-0.064536,-0.041048,-0.014607,-0.056905,0.056905,-0.043972,-0.060099,-0.028611,0.236614,-0.217874,0.030588,0.146153,0.138882,-0.179482,-0.024730,0.047611,-0.021587,-0.012270,-0.076118,-0.073820,-0.052272,0.099262,0.057229,0.026793,-0.096997,0.243155,-0.083135,-0.076118,-0.000018,-0.067947,0.017276,0.108708,0.018779,-0.132667,-0.065354,0.014976,-0.006483,-0.045512,0.097907,0.099678,0.074971,-0.033888,-0.007531,-0.114181,-0.058239,-0.076693,0.054179,-0.078559,-0.050809,-0.101751,0.215599,-0.068125,-0.028611,0.032070,0.018582,-0.058584,-0.036758,0.033568,0.005988,-0.047904,0.061359,-0.110201,0.088578,0.068574,0.267759
LandSlope,0.067608,0.255881,0.015513,0.144248,1.000000,-0.066450,0.010355,-0.073639,-0.059105,-0.021812,0.086525,0.039610,0.007085,-0.026608,-0.261883,-0.051000,0.113834,-0.076465,0.085518,-0.116456,0.032498,0.057444,-0.010849,0.068323,-0.009255,0.013888,0.043860,0.116165,0.070299,-0.046936,0.000781,-0.048196,-0.036467,0.039474,-0.033768,0.095140,0.123995,-0.051922,-0.072285,0.025383,-0.002683,0.004826,0.005955,0.015895,0.024538,0.095156,-0.032622,-0.008843,0.008694,0.052976,-0.015505,0.014400,0.042196,-0.003518,0.007072,-0.002305,-0.009095,0.067475,-0.066543,-0.009673,0.045603,-0.047764,0.054345,-0.013232,0.000114,-0.011362,0.025002,-0.025918,-0.019666,-0.030393,0.026198,0.269049,-0.054726,0.009018,-0.017662,0.045603,-0.026601,0.065805,-0.021738,0.007807,0.011329,0.025679,0.084959,0.025871,-0.132586,0.070577,-0.005741,-0.024499,0.011316,0.003243,0.023561,-0.019666,-0.030910,-0.036397,0.011408,0.050447,0.039880,0.052784,-0.029994,-0.039320,-0.037954,-0.021896,0.082275,-0.041166,-0.008678,-0.011831,-0.017034,0.019737,0.003243,0.026439,0.034397,0.037417,-0.004330,-0.042473,-0.011831,-0.021361,0.061625,-0.025898,-0.006301,0.010309,0.099991,0.021638,-0.012572,-0.010128,-0.013232,-0.026601,0.052968,0.002454,-0.005782,0.022795,0.116269,0.027566,-0.135483,0.065268,-0.030393,-0.015667,-0.001601,0.001601,0.071477,-0.048723,0.000065,0.050155,-0.045848,0.159584,0.230273,0.539909,-0.522045,-0.024649,-0.059194,0.073449,-0.031562,-0.024499,-0.023760,-0.020511,0.348227,-0.035532,0.119169,0.007533,-0.043022,0.042515,-0.024499,0.068117,-0.041384,-0.051783,-0.023354,-0.053259,-0.018965,-0.010673,-0.040847,-0.008532,-0.056470,0.027566,0.087732,-0.019666,-0.019666,-0.004428,-0.047933,-0.018745,0.037244,0.008191,0.031553,0.006925,0.000205,-0.028497,0.040737,0.000065,0.017588,-0.026601,-0.029994,0.083075,0.050796,-0.068628,0.053858,0.000192,-0.232174,0.181123,0.149187,0.051152
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
MasVnrType_Stone,0.086247,0.042644,0.036767,0.061359,0.000192,0.331584,-0.125495,0.289626,0.260178,0.232906,-0.347300,0.060011,-0.283049,-0.062869,-0.180083,-0.108187,0.188412,0.056062,-0.070979,0.130075,0.300793,-0.213137,0.081780,0.267445,-0.045165,-0.037274,0.155787,0.114646,-0.034139,0.205237,-0.043397,-0.035352,-0.043559,-0.312101,0.144162,0.002714,0.129780,-0.185098,0.274247,-0.220831,0.284658,0.285959,-0.081386,-0.082340,-0.084940,0.077735,0.123900,-0.089746,0.001700,-0.003641,0.007650,-0.015841,0.107440,-0.025474,0.019530,0.061262,-0.103331,-0.173864,0.249988,-0.040076,-0.024414,0.047919,-0.044321,-0.018172,-0.043573,-0.011652,0.031937,0.007143,-0.027009,-0.005117,0.016450,-0.029383,-0.082244,-0.027009,0.103284,-0.024414,-0.015698,-0.040916,0.143223,-0.077398,0.003620,-0.094232,-0.060885,-0.023429,0.207901,-0.115340,0.025379,-0.033647,-0.106449,-0.030502,0.108261,-0.027009,-0.000072,-0.019167,-0.055173,-0.023010,-0.038938,-0.028858,-0.033438,0.089243,0.036156,-0.006664,-0.012246,0.012067,0.004155,0.030084,0.046405,-0.034634,-0.030502,-0.055878,-0.001396,-0.015698,-0.195840,0.329271,-0.016248,0.112159,-0.014227,0.104660,-0.136786,-0.075130,-0.007567,-0.061584,-0.042551,0.078022,-0.018172,-0.015698,-0.045058,0.141034,-0.084059,-0.096724,-0.059855,-0.022248,0.232386,-0.118686,-0.023429,-0.021516,0.007567,-0.007567,-0.025744,0.062250,-0.032631,0.042039,-0.068716,-0.030074,0.061476,-0.002439,-0.015496,-0.039677,0.326471,-0.250831,0.009627,-0.033647,-0.032631,-0.063051,-0.025689,0.006775,-0.019406,0.011821,-0.052733,-0.049986,-0.033647,-0.030873,-0.071947,-0.037781,-0.052693,0.371100,-0.080723,-0.040916,-0.060588,-0.039018,0.200171,0.071092,0.040590,-0.027009,-0.027009,0.075426,0.041438,-0.025744,-0.027829,0.110576,-0.057629,-0.028220,-0.102543,0.010720,-0.051975,-0.032631,-0.050652,-0.015698,-0.033438,-0.016248,-0.031584,-0.205258,-0.377505,1.000000,0.005117,-0.027009,0.016450,0.330476
RoofMatl_CompShg,-0.155745,-0.201957,-0.003326,-0.110201,-0.232174,-0.072739,-0.023469,0.016804,0.028735,-0.049268,0.024419,0.056483,0.025327,0.025256,0.172868,0.026562,-0.105754,0.049763,-0.093091,0.022127,-0.121891,-0.041982,-0.014526,-0.190345,-0.010648,-0.044755,-0.153020,-0.069298,-0.076036,-0.002898,-0.021070,0.022401,0.028473,0.028857,-0.052709,-0.068949,-0.113003,0.070209,0.039653,0.019673,-0.035042,-0.062674,0.035352,0.006972,0.039015,-0.101542,-0.070672,0.033635,-0.007307,-0.091002,-0.147806,0.143739,0.009851,0.011807,-0.024202,0.028123,0.027621,-0.070106,0.057568,-0.023322,0.010605,-0.026389,0.025093,0.007893,-0.004217,-0.035237,0.066852,-0.075931,0.011732,0.018131,-0.088998,-0.648794,0.166929,0.011732,0.014264,-0.187894,-0.028686,-0.022148,0.027876,0.025036,0.011182,0.055803,-0.183015,-0.021028,0.097768,-0.037772,-0.010516,-0.033661,0.046238,0.013249,-0.000738,0.011732,0.021654,-0.011240,-0.096485,-0.060161,-0.031775,0.019832,-0.030011,0.023456,0.039187,-0.058169,-0.091283,0.024558,0.089263,0.007058,0.015215,-0.031888,0.013249,-0.024519,-0.045103,0.015869,0.031504,0.004184,-0.092021,-0.037010,-0.030235,0.012341,0.045665,0.010010,-0.037639,0.014221,-0.019957,-0.004497,0.007893,-0.028686,-0.003121,0.028117,0.042597,0.056717,-0.199393,-0.022148,0.099403,-0.019809,-0.099346,-0.140602,-0.013719,0.013719,0.011182,0.029066,0.014174,-0.057089,0.041882,0.003107,-0.003121,-0.245753,0.125685,-0.007175,0.021945,-0.021972,0.018829,0.014615,0.014174,0.027387,-0.245497,0.045564,0.025618,-0.045500,0.032205,0.021713,0.014615,0.025093,-0.057277,-0.016634,-0.008460,0.031772,0.019620,0.017773,0.031113,0.027632,0.033687,0.017773,-0.043043,-0.048159,0.011732,0.033895,0.028595,0.011182,0.019503,-0.026375,0.029990,0.012258,0.044542,0.004166,0.027876,-0.035571,-0.105191,0.015869,-0.030011,-0.092021,-0.037639,0.021654,-0.015536,0.005117,1.000000,-0.647068,-0.756658,-0.104505
RoofMatl_Tar&Grv,0.099889,0.113926,0.002152,0.088578,0.181123,-0.023447,0.004779,-0.019662,-0.033974,-0.020735,0.032490,-0.069536,0.021511,-0.024621,-0.134127,-0.018778,0.015044,-0.038006,0.088310,-0.047352,0.000420,0.037964,-0.009128,0.071021,-0.023777,-0.010477,0.031528,0.020175,0.111745,-0.003103,-0.034847,0.004557,-0.054384,0.007498,-0.013142,0.098016,0.040030,-0.021533,-0.048563,0.025499,-0.015250,-0.006663,-0.000953,0.009886,-0.025245,0.025559,0.058567,-0.031308,-0.010137,0.073800,0.139559,-0.110930,0.001469,-0.007640,0.045305,-0.011773,-0.029043,0.051509,-0.045836,0.051029,-0.006862,0.017075,-0.016237,-0.005108,-0.016064,-0.021117,0.011676,-0.010005,-0.007591,-0.011732,0.069672,0.834914,-0.145614,-0.007591,-0.043004,-0.006862,-0.010268,-0.011500,-0.018037,-0.035414,-0.007236,-0.036109,0.158520,-0.011732,-0.063263,0.035141,-0.014243,0.064378,-0.029919,-0.008573,0.008398,-0.007591,-0.023277,0.036349,0.058001,-0.006467,0.017273,-0.012833,0.025994,-0.015178,-0.025357,0.000381,0.106234,-0.015891,-0.051649,-0.004567,-0.041055,0.062045,-0.008573,0.038676,0.079798,-0.010268,-0.062462,0.001648,0.146968,0.007186,0.059885,-0.022066,-0.034380,0.013485,0.069672,-0.022856,0.046832,-0.001430,-0.005108,-0.010268,-0.016407,-0.018194,-0.036896,-0.036700,0.156957,-0.011500,-0.064321,0.032945,0.048159,0.108620,0.008877,-0.008877,-0.007236,-0.018808,-0.009171,0.045144,-0.036503,-0.018503,0.027147,0.139374,-0.075288,-0.015178,-0.026310,0.034006,-0.012183,-0.009457,-0.009171,-0.017722,0.161074,-0.029483,-0.016576,0.070449,-0.020839,-0.014049,-0.009457,-0.016237,0.072501,-0.019989,-0.014810,-0.020559,-0.025236,-0.011500,-0.020132,-0.017880,-0.021798,-0.011500,-0.014243,-0.007591,-0.007591,-0.021932,-0.018503,-0.007236,-0.012620,0.032235,-0.019405,-0.007932,-0.028821,-0.004961,-0.018037,-0.009171,0.063387,-0.010268,0.025994,-0.004567,0.069672,-0.023277,0.023097,-0.027009,-0.647068,1.000000,-0.008877,0.004921
RoofMatl_infrequent_sklearn,0.118608,0.167179,0.002516,0.068574,0.149187,0.115506,0.026682,-0.005178,-0.008552,0.082398,-0.059888,-0.014450,-0.051664,-0.012011,-0.111705,-0.018735,0.125800,-0.032675,0.046363,0.011585,0.159504,0.022506,0.026879,0.188742,0.034354,0.067682,0.173656,0.073587,0.003899,0.006462,0.057516,-0.033288,0.009293,-0.044277,0.080400,0.006378,0.113881,-0.073617,-0.010362,-0.047668,0.059037,0.087913,-0.045548,-0.017621,-0.029521,0.111258,0.042466,-0.017266,0.018276,0.056067,0.074178,-0.093393,-0.014180,-0.008934,-0.007109,-0.026790,-0.011321,0.047775,-0.036197,-0.013172,-0.008024,0.019967,-0.018987,-0.005973,0.019307,0.064323,-0.097692,0.108166,-0.008877,-0.013719,0.056978,0.134953,-0.094065,-0.008877,0.018169,0.252315,0.046428,0.038909,-0.021092,-0.002467,-0.008461,-0.042224,0.104096,0.037639,-0.073977,0.019404,0.026006,-0.011059,-0.034986,-0.010025,-0.006234,-0.008877,-0.008439,-0.016429,0.076806,0.084449,0.026863,-0.015006,0.017070,-0.017748,-0.029651,0.075964,0.028623,-0.018582,-0.072781,-0.005340,0.015252,-0.011383,-0.010025,-0.001008,-0.009275,-0.012007,0.012245,-0.006900,-0.005340,0.042378,-0.011699,0.002737,-0.030410,-0.024693,-0.010381,0.000948,-0.013985,0.007124,-0.005973,0.046428,0.018163,-0.021275,-0.024228,-0.042915,0.126917,0.038909,-0.075214,-0.002272,0.088998,0.091259,0.010381,-0.010381,-0.008461,-0.021993,-0.010725,0.036162,-0.023627,0.011791,-0.019186,0.202797,-0.100278,0.022426,-0.006220,-0.000343,-0.014247,-0.011059,-0.010725,-0.020723,0.183852,-0.034476,-0.019384,-0.000737,-0.024368,-0.016429,-0.011059,-0.018987,0.012950,0.038957,0.023795,-0.024041,-0.004091,-0.013448,-0.023542,-0.020908,-0.025490,-0.013448,0.068667,0.069672,-0.008877,-0.025647,-0.021636,-0.008461,-0.014757,0.006949,-0.022692,-0.009275,-0.033703,-0.001210,-0.021092,0.054518,0.083606,-0.012007,0.017070,0.124606,-0.010381,-0.008439,0.000569,0.016450,-0.756658,-0.008877,1.000000,0.132843
