## Importing libraries

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

---

## Importing data

In [2]:
train_path = os.path.join('data', 'train.csv')
test_path = os.path.join('data', 'test.csv')

train_data = pd.read_csv(train_path, index_col = 'Id')
test_data = pd.read_csv(test_path, index_col = 'Id')

---

## Data overview

In [3]:
print(f'Train data shape: {train_data.shape}\nTest data shape: {test_data.shape}')

Train data shape: (1460, 80)
Test data shape: (1459, 79)


In [4]:
train_data.head()

Unnamed: 0_level_0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,Inside,...,0,,,,0,2,2008,WD,Normal,208500
2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,FR2,...,0,,,,0,5,2007,WD,Normal,181500
3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,Inside,...,0,,,,0,9,2008,WD,Normal,223500
4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,Corner,...,0,,,,0,2,2006,WD,Abnorml,140000
5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,FR2,...,0,,,,0,12,2008,WD,Normal,250000


In [5]:
test_data.head()

Unnamed: 0_level_0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,...,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1461,20,RH,80.0,11622,Pave,,Reg,Lvl,AllPub,Inside,...,120,0,,MnPrv,,0,6,2010,WD,Normal
1462,20,RL,81.0,14267,Pave,,IR1,Lvl,AllPub,Corner,...,0,0,,,Gar2,12500,6,2010,WD,Normal
1463,60,RL,74.0,13830,Pave,,IR1,Lvl,AllPub,Inside,...,0,0,,MnPrv,,0,3,2010,WD,Normal
1464,60,RL,78.0,9978,Pave,,IR1,Lvl,AllPub,Inside,...,0,0,,,,0,6,2010,WD,Normal
1465,120,RL,43.0,5005,Pave,,IR1,HLS,AllPub,Inside,...,144,0,,,,0,1,2010,WD,Normal


In [6]:
train_data['MSSubClass'] = train_data['MSSubClass'].apply(str)
test_data['MSSubClass'] = test_data['MSSubClass'].apply(str)

Columns descriptions (from Kaggle):

- SalePrice: the property's sale price in dollars. This is the target variable that you're trying to predict.

- MSSubClass: The building class

- MSZoning: The general zoning classification

- LotFrontage: Linear feet of street connected to property

- LotArea: Lot size in square feet

- Street: Type of road access

- Alley: Type of alley access

- LotShape: General shape of property

- LandContour: Flatness of the property

- Utilities: Type of utilities available

- LotConfig: Lot configuration

- LandSlope: Slope of property

- Neighborhood: Physical locations within Ames city limits

- Condition1: Proximity to main road or railroad

- Condition2: Proximity to main road or railroad (if a second is present)

- BldgType: Type of dwelling

- HouseStyle: Style of dwelling

- OverallQual: Overall material and finish quality

- OverallCond: Overall condition rating

- YearBuilt: Original construction date

- YearRemodAdd: Remodel date

- RoofStyle: Type of roof

- RoofMatl: Roof material

- Exterior1st: Exterior covering on house

- Exterior2nd: Exterior covering on house (if more than one material)

- MasVnrType: Masonry veneer type

- MasVnrArea: Masonry veneer area in square feet

- ExterQual: Exterior material quality

- ExterCond: Present condition of the material on the exterior

- Foundation: Type of foundation

- BsmtQual: Height of the basement

- BsmtCond: General condition of the basement

- BsmtExposure: Walkout or garden level basement walls

- BsmtFinType1: Quality of basement finished area

- BsmtFinSF1: Type 1 finished square feet

- BsmtFinType2: Quality of second finished area (if present)

- BsmtFinSF2: Type 2 finished square feet

- BsmtUnfSF: Unfinished square feet of basement area

- TotalBsmtSF: Total square feet of basement area

- Heating: Type of heating

- HeatingQC: Heating quality and condition

- CentralAir: Central air conditioning

- Electrical: Electrical system

- 1stFlrSF: First Floor square feet

- 2ndFlrSF: Second floor square feet

- LowQualFinSF: Low quality finished square feet (all floors)

- GrLivArea: Above grade (ground) living area square feet

- BsmtFullBath: Basement full bathrooms

- BsmtHalfBath: Basement half bathrooms

- FullBath: Full bathrooms above grade

- HalfBath: Half baths above grade

- Bedroom: Number of bedrooms above basement level

- Kitchen: Number of kitchens

- KitchenQual: Kitchen quality

- TotRmsAbvGrd: Total rooms above grade (does not include bathrooms)

- Functional: Home functionality rating

- Fireplaces: Number of fireplaces

- FireplaceQu: Fireplace quality

- GarageType: Garage location

- GarageYrBlt: Year garage was built

- GarageFinish: Interior finish of the garage

- GarageCars: Size of garage in car capacity

- GarageArea: Size of garage in square feet

- GarageQual: Garage quality

- GarageCond: Garage condition

- PavedDrive: Paved driveway

- WoodDeckSF: Wood deck area in square feet

- OpenPorchSF: Open porch area in square feet

- EnclosedPorch: Enclosed porch area in square feet

- 3SsnPorch: Three season porch area in square feet

- ScreenPorch: Screen porch area in square feet

- PoolArea: Pool area in square feet

- PoolQC: Pool quality

- Fence: Fence quality

- MiscFeature: Miscellaneous feature not covered in other categories

- MiscVal: $Value of miscellaneous feature

- MoSold: Month Sold

- YrSold: Year Sold

- SaleType: Type of sale

- SaleCondition: Condition of sale

Now we will analyse how many and which columns we need to transform from categorical to numerical:

In [7]:
col_types = train_data.dtypes

In [8]:
col_types

MSSubClass        object
MSZoning          object
LotFrontage      float64
LotArea            int64
Street            object
                  ...   
MoSold             int64
YrSold             int64
SaleType          object
SaleCondition     object
SalePrice          int64
Length: 80, dtype: object

In [9]:
col_types.value_counts()

object     44
int64      33
float64     3
Name: count, dtype: int64

There are 44 categorical columns and 36 numerical ones (one of them is the label).

We have many columns to detail and to analyse. So, we will only focus in the multivariate analysis, specifically in the correlation with the label __'SalePrice'__.

---

## Multivariate Analysis

In [10]:
train_data['YrSold'].value_counts()

YrSold
2009    338
2007    329
2006    314
2008    304
2010    175
Name: count, dtype: int64

In [11]:
train_data.corr(numeric_only=True)['SalePrice'].sort_values()

KitchenAbvGr    -0.135907
EnclosedPorch   -0.128578
OverallCond     -0.077856
YrSold          -0.028923
LowQualFinSF    -0.025606
MiscVal         -0.021190
BsmtHalfBath    -0.016844
BsmtFinSF2      -0.011378
3SsnPorch        0.044584
MoSold           0.046432
PoolArea         0.092404
ScreenPorch      0.111447
BedroomAbvGr     0.168213
BsmtUnfSF        0.214479
BsmtFullBath     0.227122
LotArea          0.263843
HalfBath         0.284108
OpenPorchSF      0.315856
2ndFlrSF         0.319334
WoodDeckSF       0.324413
LotFrontage      0.351799
BsmtFinSF1       0.386420
Fireplaces       0.466929
MasVnrArea       0.477493
GarageYrBlt      0.486362
YearRemodAdd     0.507101
YearBuilt        0.522897
TotRmsAbvGrd     0.533723
FullBath         0.560664
1stFlrSF         0.605852
TotalBsmtSF      0.613581
GarageArea       0.623431
GarageCars       0.640409
GrLivArea        0.708624
OverallQual      0.790982
SalePrice        1.000000
Name: SalePrice, dtype: float64

In [12]:
train_data.corr(numeric_only=True)['GarageArea'].sort_values()

OverallCond     -0.151521
EnclosedPorch   -0.121777
LowQualFinSF    -0.067601
KitchenAbvGr    -0.064433
MiscVal         -0.027400
YrSold          -0.027378
BsmtHalfBath    -0.024536
BsmtFinSF2      -0.018227
MoSold           0.027974
3SsnPorch        0.035087
ScreenPorch      0.051412
PoolArea         0.061047
BedroomAbvGr     0.065253
2ndFlrSF         0.138347
HalfBath         0.163549
BsmtFullBath     0.179189
LotArea          0.180403
BsmtUnfSF        0.183303
WoodDeckSF       0.224666
OpenPorchSF      0.241435
Fireplaces       0.269141
BsmtFinSF1       0.296970
TotRmsAbvGrd     0.337822
LotFrontage      0.344997
YearRemodAdd     0.371600
MasVnrArea       0.373066
FullBath         0.405656
GrLivArea        0.468997
YearBuilt        0.478954
TotalBsmtSF      0.486665
1stFlrSF         0.489782
OverallQual      0.562022
GarageYrBlt      0.564567
SalePrice        0.623431
GarageCars       0.882475
GarageArea       1.000000
Name: GarageArea, dtype: float64

The values shows that majority of the numerical features are relevant, in a first view, to predict a house's sale price.

Now, it's time to preprocess the data.

---

## Preparing data for Machine Learning 

Initially, to make our preprocessing easy, we will separate the data into:

**X_train**: (possible) training features.

**y_train**: labels.

**X_test**: (possible) test features.

In [13]:
features = test_data.columns
label = 'SalePrice'

X_train = train_data[features].copy()
y_train = train_data[label].copy()
X_test = test_data[features].copy()

In [14]:
categorical = list(X_train.select_dtypes(['object']).columns)
numerical = list(set(features).difference(set(categorical)))

### Treating numerical data

For numerical data, we will do the standard scaling to improve our model's training speed and to avoid exploding/vanishing gradients during gradient descent process, then avoiding model's divergence. To handle the missing values, we will do the mean imputation.

In [15]:
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

In [16]:
scaler = StandardScaler()

X_train[numerical] = scaler.fit_transform(X_train[numerical])
X_test[numerical] = scaler.transform(X_test[numerical])

In [17]:
imputer = SimpleImputer(strategy = 'mean')

X_train[numerical] = pd.DataFrame(imputer.fit_transform(X_train[numerical]), columns = numerical, index = X_train.index)
X_test[numerical] = pd.DataFrame(imputer.transform(X_test[numerical]), columns = numerical, index = X_test.index)

In [18]:
X_train[numerical].isnull().sum().sum()

0

### Treating categorical data

In [19]:
categorical_description = X_train[categorical].describe()
categorical_description

Unnamed: 0,MSSubClass,MSZoning,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,...,GarageType,GarageFinish,GarageQual,GarageCond,PavedDrive,PoolQC,Fence,MiscFeature,SaleType,SaleCondition
count,1460,1460,1460,91,1460,1460,1460,1460,1460,1460,...,1379,1379,1379,1379,1460,7,281,54,1460,1460
unique,15,5,2,2,4,4,2,5,3,25,...,6,3,5,5,3,3,4,4,9,6
top,20,RL,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,NAmes,...,Attchd,Unf,TA,TA,Y,Gd,MnPrv,Shed,WD,Normal
freq,536,1151,1454,50,925,1311,1459,1052,1382,225,...,870,605,1311,1326,1340,3,157,49,1267,1198


For missing values, we are going to use the mode imputation.

Then, we will separate in three groups:

- **unique = 2**: we will just transform to a boolean (0 or 1) numerical variable.

- **unique > 2 and non-ordered classes**: we will do one-hot encoding.

- **unique > 2 and ordered classes**: we will transform in ordered numerical variable.

Besides, we will scale the data using MinMaxScaler.

In [20]:
X_train.isnull().sum().sum()

7481

Mode imputation:

In [21]:
X_train.isnull().sum().sort_values(ascending=False).head(20)

PoolQC          1453
MiscFeature     1406
Alley           1369
Fence           1179
MasVnrType       872
FireplaceQu      690
GarageType        81
GarageQual        81
GarageCond        81
GarageFinish      81
BsmtExposure      38
BsmtFinType2      38
BsmtFinType1      37
BsmtCond          37
BsmtQual          37
Electrical         1
HalfBath           0
BedroomAbvGr       0
KitchenAbvGr       0
Fireplaces         0
dtype: int64

In [22]:
X_train = X_train.fillna('NA')
X_test = X_test.fillna('NA')

In [23]:
imputer = SimpleImputer(strategy = 'most_frequent')

X_train[categorical] = pd.DataFrame(imputer.fit_transform(X_train[categorical]), columns = categorical, index = X_train.index)
X_test[categorical] = pd.DataFrame(imputer.transform(X_test[categorical]), columns = categorical, index = X_test.index)

- #### unique = 2

In [24]:
from sklearn.preprocessing import OrdinalEncoder

In [25]:
unique2 = categorical_description.loc['unique'] == 2
unique2 = unique2[unique2 == True].index

unique2

Index(['Street', 'Alley', 'Utilities', 'CentralAir'], dtype='object')

In [26]:
ordinal_encoder = OrdinalEncoder(handle_unknown = 'use_encoded_value', unknown_value = -1)
X_train[unique2] = ordinal_encoder.fit_transform(X_train[unique2])
X_test[unique2] = ordinal_encoder.transform(X_test[unique2])

We will drop __'Utilities'__ and __'Street'__ because they are almost all of the same value, so it's correlation with the target variable won't be good.

In [27]:
X_train['Utilities'].value_counts()

Utilities
0.0    1459
1.0       1
Name: count, dtype: int64

In [28]:
X_train['Street'].value_counts()

Street
1.0    1454
0.0       6
Name: count, dtype: int64

In [29]:
X_train = X_train.drop('Utilities', axis = 1)
X_train = X_train.drop('Street', axis = 1)

X_test = X_test.drop('Utilities', axis = 1)
X_test = X_test.drop('Street', axis = 1)

- **unique > 2 and non-ordered classes**

In [30]:
uniquegr2 = categorical_description.loc['unique'] > 2
uniquegr2 = uniquegr2[uniquegr2 == True].index

uniquegr2

Index(['MSSubClass', 'MSZoning', 'LotShape', 'LandContour', 'LotConfig',
       'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType',
       'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd',
       'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual',
       'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating',
       'HeatingQC', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu',
       'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive',
       'PoolQC', 'Fence', 'MiscFeature', 'SaleType', 'SaleCondition'],
      dtype='object')

In [31]:
# To see all the columns
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

In [32]:
X_train[uniquegr2]

Unnamed: 0_level_0,MSSubClass,MSZoning,LotShape,LandContour,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinType2,Heating,HeatingQC,Electrical,KitchenQual,Functional,FireplaceQu,GarageType,GarageFinish,GarageQual,GarageCond,PavedDrive,PoolQC,Fence,MiscFeature,SaleType,SaleCondition
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1
1,60,RL,Reg,Lvl,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,Gable,CompShg,VinylSd,VinylSd,BrkFace,Gd,TA,PConc,Gd,TA,No,GLQ,Unf,GasA,Ex,SBrkr,Gd,Typ,,Attchd,RFn,TA,TA,Y,,,,WD,Normal
2,20,RL,Reg,Lvl,FR2,Gtl,Veenker,Feedr,Norm,1Fam,1Story,Gable,CompShg,MetalSd,MetalSd,,TA,TA,CBlock,Gd,TA,Gd,ALQ,Unf,GasA,Ex,SBrkr,TA,Typ,TA,Attchd,RFn,TA,TA,Y,,,,WD,Normal
3,60,RL,IR1,Lvl,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,Gable,CompShg,VinylSd,VinylSd,BrkFace,Gd,TA,PConc,Gd,TA,Mn,GLQ,Unf,GasA,Ex,SBrkr,Gd,Typ,TA,Attchd,RFn,TA,TA,Y,,,,WD,Normal
4,70,RL,IR1,Lvl,Corner,Gtl,Crawfor,Norm,Norm,1Fam,2Story,Gable,CompShg,Wd Sdng,Wd Shng,,TA,TA,BrkTil,TA,Gd,No,ALQ,Unf,GasA,Gd,SBrkr,Gd,Typ,Gd,Detchd,Unf,TA,TA,Y,,,,WD,Abnorml
5,60,RL,IR1,Lvl,FR2,Gtl,NoRidge,Norm,Norm,1Fam,2Story,Gable,CompShg,VinylSd,VinylSd,BrkFace,Gd,TA,PConc,Gd,TA,Av,GLQ,Unf,GasA,Ex,SBrkr,Gd,Typ,TA,Attchd,RFn,TA,TA,Y,,,,WD,Normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1456,60,RL,Reg,Lvl,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,Gable,CompShg,VinylSd,VinylSd,,TA,TA,PConc,Gd,TA,No,Unf,Unf,GasA,Ex,SBrkr,TA,Typ,TA,Attchd,RFn,TA,TA,Y,,,,WD,Normal
1457,20,RL,Reg,Lvl,Inside,Gtl,NWAmes,Norm,Norm,1Fam,1Story,Gable,CompShg,Plywood,Plywood,Stone,TA,TA,CBlock,Gd,TA,No,ALQ,Rec,GasA,TA,SBrkr,TA,Min1,TA,Attchd,Unf,TA,TA,Y,,MnPrv,,WD,Normal
1458,70,RL,Reg,Lvl,Inside,Gtl,Crawfor,Norm,Norm,1Fam,2Story,Gable,CompShg,CemntBd,CmentBd,,Ex,Gd,Stone,TA,Gd,No,GLQ,Unf,GasA,Ex,SBrkr,Gd,Typ,Gd,Attchd,RFn,TA,TA,Y,,GdPrv,Shed,WD,Normal
1459,20,RL,Reg,Lvl,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,Hip,CompShg,MetalSd,MetalSd,,TA,TA,CBlock,TA,TA,Mn,GLQ,Rec,GasA,Gd,FuseA,Gd,Typ,,Attchd,Unf,TA,TA,Y,,,,WD,Normal


In [33]:
X_train['GarageFinish'].value_counts()

GarageFinish
Unf    605
RFn    422
Fin    352
NA      81
Name: count, dtype: int64

In [34]:
ordered_classes = ['Fence', 'PoolQC', 'GarageCond', 'GarageQual', 'GarageFinish', 'FireplaceQu', 'Functional',
                  'KitchenQual', 'HeatingQC', 'BsmtCond', 'BsmtQual', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2',
                  'ExterCond', 'ExterQual', 'LotShape', 'LandSlope', 'PavedDrive']

unordered_classes = list(set(uniquegr2).difference(ordered_classes))

Now, we can do the transformations:

In [35]:
from sklearn.preprocessing import OneHotEncoder

In [36]:
onehot_encoder = OneHotEncoder(handle_unknown = 'infrequent_if_exist', min_frequency = 10)
onehot_encoder.fit(X_train[unordered_classes])

train_encoded = onehot_encoder.transform(X_train[unordered_classes]).toarray()
train_encoded = pd.DataFrame(train_encoded, columns = onehot_encoder.get_feature_names_out(), index = X_train.index)

test_encoded = onehot_encoder.transform(X_test[unordered_classes]).toarray()
test_encoded = pd.DataFrame(test_encoded, columns = onehot_encoder.get_feature_names_out(), index = X_test.index)

X_train = pd.concat([X_train, train_encoded], axis = 1)
X_test = pd.concat([X_test, test_encoded], axis = 1)

X_train = X_train.drop(columns = unordered_classes)
X_test = X_test.drop(columns = unordered_classes)

- **unique > 2 and ordered classes**

In [37]:
categories = {'Fence':['GdPrv', 'MnPrv', 'GdWo', 'MnWw', 'NA'],
              'PoolQC':['Ex', 'Gd', 'TA', 'Fa', 'NA'],
              'GarageCond':['Ex', 'Gd', 'TA', 'Fa', 'Po', 'NA'],
              'GarageQual':['Ex', 'Gd', 'TA', 'Fa', 'Po', 'NA'],
              'GarageFinish':['Fin', 'RFn', 'Unf', 'NA'],
              'FireplaceQu':['Ex', 'Gd', 'TA', 'Fa', 'Po', 'NA'],
              'Functional':['Typ', 'Min1', 'Min2', 'Mod', 'Maj1', 'Maj2', 'Sev', 'Sal'],
              'KitchenQual':['Ex', 'Gd', 'TA', 'Fa', 'Po'],
              'HeatingQC':['Ex', 'Gd', 'TA', 'Fa', 'Po'],
              'BsmtCond':['Ex', 'Gd', 'TA', 'Fa', 'Po', 'NA'],
              'BsmtQual':['Ex', 'Gd', 'TA', 'Fa', 'Po', 'NA'],
              'BsmtExposure':['Gd','Av','Mn','No','NA'],
              'BsmtFinType1':['GLQ','ALQ','BLQ','Rec','LwQ','Unf','NA'],
              'BsmtFinType2':['GLQ','ALQ','BLQ','Rec','LwQ','Unf','NA'],
              'ExterCond':['Ex', 'Gd', 'TA', 'Fa', 'Po'],
              'ExterQual':['Ex', 'Gd', 'TA', 'Fa', 'Po'],
              'LotShape':['Reg', 'IR1', 'IR2', 'IR3'],
              'LandSlope':['Gtl', 'Mod', 'Sev'],
              'PavedDrive':['Y','P','N']
              }


In [38]:
for key in categories.keys():
    ordinal_encoder = OrdinalEncoder(categories = [categories[key]], handle_unknown = 'use_encoded_value', unknown_value = -1)
    X_train[key] = ordinal_encoder.fit_transform(X_train[key].values.reshape(-1, 1))
    X_test[key] = ordinal_encoder.transform(X_test[key].values.reshape(-1, 1))

Finally, the scaling:

In [39]:
X_train

Unnamed: 0_level_0,LotFrontage,LotArea,Alley,LotShape,LandSlope,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,ExterQual,ExterCond,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,HeatingQC,CentralAir,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscVal,MoSold,YrSold,Neighborhood_Blmngtn,Neighborhood_BrDale,Neighborhood_BrkSide,Neighborhood_ClearCr,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,Neighborhood_infrequent_sklearn,Heating_GasA,Heating_GasW,Heating_infrequent_sklearn,MSZoning_C (all),MSZoning_FV,MSZoning_RH,MSZoning_RL,MSZoning_RM,SaleCondition_Abnorml,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial,SaleCondition_infrequent_sklearn,MSSubClass_120,MSSubClass_160,MSSubClass_180,MSSubClass_190,MSSubClass_20,MSSubClass_30,MSSubClass_45,MSSubClass_50,MSSubClass_60,MSSubClass_70,MSSubClass_75,MSSubClass_80,MSSubClass_85,MSSubClass_90,MSSubClass_infrequent_sklearn,LandContour_Bnk,LandContour_HLS,LandContour_Low,LandContour_Lvl,HouseStyle_1.5Fin,HouseStyle_1.5Unf,HouseStyle_1Story,HouseStyle_2.5Unf,HouseStyle_2Story,HouseStyle_SFoyer,HouseStyle_SLvl,HouseStyle_infrequent_sklearn,MiscFeature_NA,MiscFeature_Shed,MiscFeature_infrequent_sklearn,LotConfig_Corner,LotConfig_CulDSac,LotConfig_FR2,LotConfig_Inside,LotConfig_infrequent_sklearn,RoofStyle_Flat,RoofStyle_Gable,RoofStyle_Gambrel,RoofStyle_Hip,RoofStyle_infrequent_sklearn,BldgType_1Fam,BldgType_2fmCon,BldgType_Duplex,BldgType_Twnhs,BldgType_TwnhsE,Exterior2nd_AsbShng,Exterior2nd_BrkFace,Exterior2nd_CmentBd,Exterior2nd_HdBoard,Exterior2nd_ImStucc,Exterior2nd_MetalSd,Exterior2nd_Plywood,Exterior2nd_Stucco,Exterior2nd_VinylSd,Exterior2nd_Wd Sdng,Exterior2nd_Wd Shng,Exterior2nd_infrequent_sklearn,SaleType_COD,SaleType_New,SaleType_WD,SaleType_infrequent_sklearn,GarageType_Attchd,GarageType_Basment,GarageType_BuiltIn,GarageType_Detchd,GarageType_NA,GarageType_infrequent_sklearn,Condition2_Norm,Condition2_infrequent_sklearn,MasVnrType_BrkCmn,MasVnrType_BrkFace,MasVnrType_NA,MasVnrType_Stone,RoofMatl_CompShg,RoofMatl_Tar&Grv,RoofMatl_infrequent_sklearn,Condition1_Artery,Condition1_Feedr,Condition1_Norm,Condition1_PosN,Condition1_RRAe,Condition1_RRAn,Condition1_infrequent_sklearn,Electrical_FuseA,Electrical_FuseF,Electrical_SBrkr,Electrical_infrequent_sklearn,Exterior1st_AsbShng,Exterior1st_BrkFace,Exterior1st_CemntBd,Exterior1st_HdBoard,Exterior1st_MetalSd,Exterior1st_Plywood,Exterior1st_Stucco,Exterior1st_VinylSd,Exterior1st_Wd Sdng,Exterior1st_WdShing,Exterior1st_infrequent_sklearn,Foundation_BrkTil,Foundation_CBlock,Foundation_PConc,Foundation_Slab,Foundation_infrequent_sklearn
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1
1,-0.208034,-0.207142,1.0,0.0,0.0,0.651479,-0.517200,1.050994,0.878668,0.510015,1.0,2.0,1.0,2.0,3.0,0.0,0.575425,5.0,-0.288653,-0.944591,-0.459303,0.0,1.0,-0.793434,1.161852,-0.120242,0.370333,1.107810,-0.241061,0.789741,1.227585,0.163779,-0.211454,1.0,0.912210,0.0,-0.951226,5.0,0.992426,1.0,0.311725,0.351000,2.0,2.0,0.0,-0.752176,0.216503,-0.359325,-0.116339,-0.270208,-0.068692,4.0,4.0,-0.087688,-1.599111,0.138777,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,0.409895,-0.091886,1.0,0.0,0.0,-0.071836,2.179628,0.156734,-0.429577,-0.572835,2.0,2.0,1.0,2.0,0.0,1.0,1.171992,5.0,-0.288653,-0.641228,0.466465,0.0,1.0,0.257140,-0.795163,-0.120242,-0.482512,-0.819964,3.948809,0.789741,-0.761621,0.163779,-0.211454,2.0,-0.318683,0.0,0.600495,2.0,-0.101543,1.0,0.311725,-0.060731,2.0,2.0,0.0,1.626195,-0.704483,-0.359325,-0.116339,-0.270208,-0.068692,4.0,4.0,-0.087688,-0.489110,-0.614439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,-0.084449,0.073480,1.0,1.0,0.0,0.651479,-0.517200,0.984752,0.830215,0.322174,1.0,2.0,1.0,2.0,2.0,0.0,0.092907,5.0,-0.288653,-0.301643,-0.313369,0.0,1.0,-0.627826,1.189351,-0.120242,0.515013,1.107810,-0.241061,0.789741,1.227585,0.163779,-0.211454,1.0,-0.318683,0.0,0.600495,2.0,0.911391,1.0,0.311725,0.631726,2.0,2.0,0.0,-0.752176,-0.070361,-0.359325,-0.116339,-0.270208,-0.068692,4.0,4.0,-0.087688,0.990891,0.138777,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,-0.414011,-0.096897,1.0,1.0,0.0,0.651479,-0.517200,-1.863632,-0.720298,-0.572835,2.0,2.0,2.0,1.0,3.0,1.0,-0.499274,5.0,-0.288653,-0.061670,-0.687324,1.0,1.0,-0.521734,0.937276,-0.120242,0.383659,1.107810,-0.241061,-1.026041,-0.761621,0.163779,-0.211454,1.0,0.296763,0.0,0.600495,1.0,0.789839,2.0,1.650307,0.790804,2.0,2.0,0.0,-0.752176,-0.176048,4.092524,-0.116339,-0.270208,-0.068692,4.0,4.0,-0.087688,-1.599111,-1.367655,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
5,0.574676,0.375148,1.0,1.0,0.0,1.374795,-0.517200,0.951632,0.733308,1.360826,1.0,2.0,1.0,2.0,1.0,0.0,0.463568,5.0,-0.288653,-0.174865,0.199680,0.0,1.0,-0.045611,1.617877,-0.120242,1.299326,1.107810,-0.241061,0.789741,1.227585,1.390023,-0.211454,1.0,1.527656,0.0,0.600495,2.0,0.870874,1.0,1.650307,1.698485,2.0,2.0,0.0,0.780197,0.563760,-0.359325,-0.116339,-0.270208,-0.068692,4.0,4.0,-0.087688,2.100892,0.138777,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1456,-0.331620,-0.260560,1.0,0.0,0.0,-0.071836,-0.517200,0.918511,0.733308,-0.572835,2.0,2.0,1.0,2.0,3.0,5.0,-0.973018,5.0,-0.288653,0.873321,-0.238122,0.0,1.0,-0.542435,0.795198,-0.120242,0.250402,-0.819964,-0.241061,0.789741,1.227585,0.163779,-0.211454,2.0,0.296763,0.0,0.600495,2.0,0.830356,1.0,0.311725,-0.060731,2.0,2.0,0.0,-0.752176,-0.100558,-0.359325,-0.116339,-0.270208,-0.068692,4.0,4.0,-0.087688,0.620891,-0.614439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1457,0.615871,0.266407,1.0,0.0,0.0,-0.071836,0.381743,0.222975,0.151865,0.084610,2.0,2.0,1.0,2.0,3.0,1.0,0.759659,3.0,0.722112,0.049262,1.104925,2.0,1.0,2.355701,-0.795163,-0.120242,1.061367,1.107810,-0.241061,0.789741,-0.761621,0.163779,-0.211454,2.0,0.296763,1.0,2.152216,2.0,-0.020508,2.0,0.311725,0.126420,2.0,2.0,0.0,2.033231,-0.704483,-0.359325,-0.116339,-0.270208,-0.068692,4.0,1.0,-0.087688,-1.599111,1.645210,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1458,-0.166839,-0.147810,1.0,0.0,0.0,0.651479,3.078570,-1.002492,1.024029,-0.572835,0.0,1.0,2.0,1.0,3.0,0.0,-0.369871,5.0,-0.288653,0.701265,0.215641,0.0,1.0,0.065656,1.844744,-0.120242,1.569647,-0.819964,-0.241061,0.789741,-0.761621,1.390023,-0.211454,1.0,1.527656,0.0,2.152216,1.0,-1.519651,1.0,-1.026858,-1.033914,2.0,2.0,0.0,-0.752176,0.201405,-0.359325,-0.116339,-0.270208,-0.068692,4.0,0.0,4.953112,-0.489110,1.645210,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1459,-0.084449,-0.080160,1.0,0.0,0.0,-0.795151,0.381743,-0.704406,0.539493,-0.572835,2.0,2.0,2.0,2.0,2.0,0.0,-0.865548,3.0,6.092188,-1.284176,0.046905,1.0,1.0,-0.218982,-0.795163,-0.120242,-0.832788,1.107810,-0.241061,-1.026041,-0.761621,-1.062465,-0.211454,1.0,-0.934130,0.0,-0.951226,5.0,-1.154995,2.0,-1.026858,-1.090059,2.0,2.0,0.0,2.168910,-0.704483,1.473789,-0.116339,-0.270208,-0.068692,4.0,4.0,-0.087688,-0.859110,1.645210,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [40]:
from sklearn.preprocessing import MinMaxScaler

In [41]:
to_scale = set(X_train.columns).difference(numerical)

In [42]:
len(X_train.columns)

197

In [43]:
scaler = MinMaxScaler()
to_scale = list(set(X_train.columns).difference(numerical))

X_train[to_scale] = scaler.fit_transform(X_train[to_scale])
X_test[to_scale] = scaler.transform(X_test[to_scale])

In [44]:
X_train.head()

Unnamed: 0_level_0,LotFrontage,LotArea,Alley,LotShape,LandSlope,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,ExterQual,ExterCond,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,HeatingQC,CentralAir,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscVal,MoSold,YrSold,Neighborhood_Blmngtn,Neighborhood_BrDale,Neighborhood_BrkSide,Neighborhood_ClearCr,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,Neighborhood_infrequent_sklearn,Heating_GasA,Heating_GasW,Heating_infrequent_sklearn,MSZoning_C (all),MSZoning_FV,MSZoning_RH,MSZoning_RL,MSZoning_RM,SaleCondition_Abnorml,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial,SaleCondition_infrequent_sklearn,MSSubClass_120,MSSubClass_160,MSSubClass_180,MSSubClass_190,MSSubClass_20,MSSubClass_30,MSSubClass_45,MSSubClass_50,MSSubClass_60,MSSubClass_70,MSSubClass_75,MSSubClass_80,MSSubClass_85,MSSubClass_90,MSSubClass_infrequent_sklearn,LandContour_Bnk,LandContour_HLS,LandContour_Low,LandContour_Lvl,HouseStyle_1.5Fin,HouseStyle_1.5Unf,HouseStyle_1Story,HouseStyle_2.5Unf,HouseStyle_2Story,HouseStyle_SFoyer,HouseStyle_SLvl,HouseStyle_infrequent_sklearn,MiscFeature_NA,MiscFeature_Shed,MiscFeature_infrequent_sklearn,LotConfig_Corner,LotConfig_CulDSac,LotConfig_FR2,LotConfig_Inside,LotConfig_infrequent_sklearn,RoofStyle_Flat,RoofStyle_Gable,RoofStyle_Gambrel,RoofStyle_Hip,RoofStyle_infrequent_sklearn,BldgType_1Fam,BldgType_2fmCon,BldgType_Duplex,BldgType_Twnhs,BldgType_TwnhsE,Exterior2nd_AsbShng,Exterior2nd_BrkFace,Exterior2nd_CmentBd,Exterior2nd_HdBoard,Exterior2nd_ImStucc,Exterior2nd_MetalSd,Exterior2nd_Plywood,Exterior2nd_Stucco,Exterior2nd_VinylSd,Exterior2nd_Wd Sdng,Exterior2nd_Wd Shng,Exterior2nd_infrequent_sklearn,SaleType_COD,SaleType_New,SaleType_WD,SaleType_infrequent_sklearn,GarageType_Attchd,GarageType_Basment,GarageType_BuiltIn,GarageType_Detchd,GarageType_NA,GarageType_infrequent_sklearn,Condition2_Norm,Condition2_infrequent_sklearn,MasVnrType_BrkCmn,MasVnrType_BrkFace,MasVnrType_NA,MasVnrType_Stone,RoofMatl_CompShg,RoofMatl_Tar&Grv,RoofMatl_infrequent_sklearn,Condition1_Artery,Condition1_Feedr,Condition1_Norm,Condition1_PosN,Condition1_RRAe,Condition1_RRAn,Condition1_infrequent_sklearn,Electrical_FuseA,Electrical_FuseF,Electrical_SBrkr,Electrical_infrequent_sklearn,Exterior1st_AsbShng,Exterior1st_BrkFace,Exterior1st_CemntBd,Exterior1st_HdBoard,Exterior1st_MetalSd,Exterior1st_Plywood,Exterior1st_Stucco,Exterior1st_VinylSd,Exterior1st_Wd Sdng,Exterior1st_WdShing,Exterior1st_infrequent_sklearn,Foundation_BrkTil,Foundation_CBlock,Foundation_PConc,Foundation_Slab,Foundation_infrequent_sklearn
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1
1,-0.208034,-0.207142,0.5,0.0,0.0,0.651479,-0.5172,1.050994,0.878668,0.510015,0.333333,0.5,0.2,0.25,0.75,0.0,0.575425,0.833333,-0.288653,-0.944591,-0.459303,0.0,1.0,-0.793434,1.161852,-0.120242,0.370333,1.10781,-0.241061,0.789741,1.227585,0.163779,-0.211454,0.333333,0.91221,0.0,-0.951226,1.0,0.992426,0.333333,0.311725,0.351,0.4,0.4,0.0,-0.752176,0.216503,-0.359325,-0.116339,-0.270208,-0.068692,1.0,1.0,-0.087688,-1.599111,0.138777,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,0.409895,-0.091886,0.5,0.0,0.0,-0.071836,2.179628,0.156734,-0.429577,-0.572835,0.666667,0.5,0.2,0.25,0.0,0.166667,1.171992,0.833333,-0.288653,-0.641228,0.466465,0.0,1.0,0.25714,-0.795163,-0.120242,-0.482512,-0.819964,3.948809,0.789741,-0.761621,0.163779,-0.211454,0.666667,-0.318683,0.0,0.600495,0.4,-0.101543,0.333333,0.311725,-0.060731,0.4,0.4,0.0,1.626195,-0.704483,-0.359325,-0.116339,-0.270208,-0.068692,1.0,1.0,-0.087688,-0.48911,-0.614439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,-0.084449,0.07348,0.5,0.333333,0.0,0.651479,-0.5172,0.984752,0.830215,0.322174,0.333333,0.5,0.2,0.25,0.5,0.0,0.092907,0.833333,-0.288653,-0.301643,-0.313369,0.0,1.0,-0.627826,1.189351,-0.120242,0.515013,1.10781,-0.241061,0.789741,1.227585,0.163779,-0.211454,0.333333,-0.318683,0.0,0.600495,0.4,0.911391,0.333333,0.311725,0.631726,0.4,0.4,0.0,-0.752176,-0.070361,-0.359325,-0.116339,-0.270208,-0.068692,1.0,1.0,-0.087688,0.990891,0.138777,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,-0.414011,-0.096897,0.5,0.333333,0.0,0.651479,-0.5172,-1.863632,-0.720298,-0.572835,0.666667,0.5,0.4,0.0,0.75,0.166667,-0.499274,0.833333,-0.288653,-0.06167,-0.687324,0.25,1.0,-0.521734,0.937276,-0.120242,0.383659,1.10781,-0.241061,-1.026041,-0.761621,0.163779,-0.211454,0.333333,0.296763,0.0,0.600495,0.2,0.789839,0.666667,1.650307,0.790804,0.4,0.4,0.0,-0.752176,-0.176048,4.092524,-0.116339,-0.270208,-0.068692,1.0,1.0,-0.087688,-1.599111,-1.367655,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
5,0.574676,0.375148,0.5,0.333333,0.0,1.374795,-0.5172,0.951632,0.733308,1.360826,0.333333,0.5,0.2,0.25,0.25,0.0,0.463568,0.833333,-0.288653,-0.174865,0.19968,0.0,1.0,-0.045611,1.617877,-0.120242,1.299326,1.10781,-0.241061,0.789741,1.227585,1.390023,-0.211454,0.333333,1.527656,0.0,0.600495,0.4,0.870874,0.333333,1.650307,1.698485,0.4,0.4,0.0,0.780197,0.56376,-0.359325,-0.116339,-0.270208,-0.068692,1.0,1.0,-0.087688,2.100892,0.138777,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


### To make this easy, we will save the preprocessed data in .csv files.

In [45]:
y_train_path = os.path.join('data', 'preprocessed_y_train.csv')
X_train_path = os.path.join('data', 'preprocessed_X_train.csv')
X_test_path = os.path.join('data', 'preprocessed_X_test.csv')

y_train.to_csv(y_train_path)
X_train.to_csv(X_train_path)
X_test.to_csv(X_test_path)

---

### Final correlations

In [46]:
train_data = pd.concat([X_train, y_train], axis = 1)

In [47]:
train_data.corr()

Unnamed: 0,LotFrontage,LotArea,Alley,LotShape,LandSlope,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,ExterQual,ExterCond,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,HeatingQC,CentralAir,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscVal,MoSold,YrSold,Neighborhood_Blmngtn,Neighborhood_BrDale,Neighborhood_BrkSide,Neighborhood_ClearCr,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,Neighborhood_infrequent_sklearn,Heating_GasA,Heating_GasW,Heating_infrequent_sklearn,MSZoning_C (all),MSZoning_FV,MSZoning_RH,MSZoning_RL,MSZoning_RM,SaleCondition_Abnorml,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial,SaleCondition_infrequent_sklearn,MSSubClass_120,MSSubClass_160,MSSubClass_180,MSSubClass_190,MSSubClass_20,MSSubClass_30,MSSubClass_45,MSSubClass_50,MSSubClass_60,MSSubClass_70,MSSubClass_75,MSSubClass_80,MSSubClass_85,MSSubClass_90,MSSubClass_infrequent_sklearn,LandContour_Bnk,LandContour_HLS,LandContour_Low,LandContour_Lvl,HouseStyle_1.5Fin,HouseStyle_1.5Unf,HouseStyle_1Story,HouseStyle_2.5Unf,HouseStyle_2Story,HouseStyle_SFoyer,HouseStyle_SLvl,HouseStyle_infrequent_sklearn,MiscFeature_NA,MiscFeature_Shed,MiscFeature_infrequent_sklearn,LotConfig_Corner,LotConfig_CulDSac,LotConfig_FR2,LotConfig_Inside,LotConfig_infrequent_sklearn,RoofStyle_Flat,RoofStyle_Gable,RoofStyle_Gambrel,RoofStyle_Hip,RoofStyle_infrequent_sklearn,BldgType_1Fam,BldgType_2fmCon,BldgType_Duplex,BldgType_Twnhs,BldgType_TwnhsE,Exterior2nd_AsbShng,Exterior2nd_BrkFace,Exterior2nd_CmentBd,Exterior2nd_HdBoard,Exterior2nd_ImStucc,Exterior2nd_MetalSd,Exterior2nd_Plywood,Exterior2nd_Stucco,Exterior2nd_VinylSd,Exterior2nd_Wd Sdng,Exterior2nd_Wd Shng,Exterior2nd_infrequent_sklearn,SaleType_COD,SaleType_New,SaleType_WD,SaleType_infrequent_sklearn,GarageType_Attchd,GarageType_Basment,GarageType_BuiltIn,GarageType_Detchd,GarageType_NA,GarageType_infrequent_sklearn,Condition2_Norm,Condition2_infrequent_sklearn,MasVnrType_BrkCmn,MasVnrType_BrkFace,MasVnrType_NA,MasVnrType_Stone,RoofMatl_CompShg,RoofMatl_Tar&Grv,RoofMatl_infrequent_sklearn,Condition1_Artery,Condition1_Feedr,Condition1_Norm,Condition1_PosN,Condition1_RRAe,Condition1_RRAn,Condition1_infrequent_sklearn,Electrical_FuseA,Electrical_FuseF,Electrical_SBrkr,Electrical_infrequent_sklearn,Exterior1st_AsbShng,Exterior1st_BrkFace,Exterior1st_CemntBd,Exterior1st_HdBoard,Exterior1st_MetalSd,Exterior1st_Plywood,Exterior1st_Stucco,Exterior1st_VinylSd,Exterior1st_Wd Sdng,Exterior1st_WdShing,Exterior1st_infrequent_sklearn,Foundation_BrkTil,Foundation_CBlock,Foundation_PConc,Foundation_Slab,Foundation_infrequent_sklearn,SalePrice
LotFrontage,1.000000,0.306795,-0.065017,0.197078,0.067608,0.234196,-0.052820,0.117598,0.082746,0.179283,-0.165052,0.015562,-0.141798,-0.047242,-0.162264,-0.074168,0.215828,-0.018695,0.043340,0.122156,0.363358,-0.088956,0.068666,0.414266,0.072483,0.036849,0.368392,0.091481,-0.006419,0.180424,0.048258,0.237023,-0.005805,-0.169389,0.320146,-0.023432,0.235755,-0.225856,0.064324,-0.217695,0.269729,0.323663,-0.108565,-0.103817,-0.089073,0.077106,0.137454,0.009790,0.062335,0.037684,0.180868,-0.234893,-0.030812,0.001168,0.010158,0.006768,-0.093002,-0.231824,-0.101868,0.039548,0.021077,0.012191,-0.020765,0.066219,-0.050813,-0.183786,0.000208,0.102769,0.072192,0.135648,0.125157,-0.092140,-0.061426,0.029869,0.011454,-0.055482,-0.035251,0.059107,-0.026029,-0.128239,0.005407,0.010178,-0.019648,-0.001320,-0.090781,-0.043224,0.325129,-0.307167,-0.021846,-0.018090,0.015818,-0.072074,0.124842,-0.037020,-0.214652,-0.354548,-0.169173,-0.020032,0.209123,-0.089600,-0.062229,-0.080799,0.195155,-0.037886,0.033543,0.060384,0.005701,0.012195,-0.035799,0.038613,0.063071,0.045367,-0.087050,-0.085661,-0.064053,0.065962,-0.029681,0.007361,-0.076110,0.041450,0.029332,-0.007426,0.007808,-0.000080,0.227633,-0.057831,-0.038008,-0.148562,0.001667,0.082676,-0.175395,-0.043370,0.165704,0.034440,0.371545,-0.021882,0.012195,-0.333971,-0.299456,-0.042274,-0.000503,-0.070788,0.002734,0.043585,-0.121578,0.068381,0.039966,0.083830,0.025756,-0.066365,-0.057361,-0.014095,0.126580,-0.089928,-0.015946,0.214428,0.041244,0.076699,-0.243313,-0.096993,0.014300,-0.028183,0.028183,0.021149,0.070696,-0.120436,0.086247,-0.155745,0.099889,0.118608,0.015476,0.055129,-0.077071,0.027601,-0.028583,-0.021217,0.131596,-0.049307,-0.028922,0.062871,-0.028357,-0.037992,0.021888,-0.076761,0.015076,-0.119003,0.010989,0.048668,0.074592,0.029545,-0.031814,0.029589,-0.130029,0.023369,0.056171,-0.022497,0.030446,0.334901
LotArea,0.306795,1.000000,-0.029676,0.315484,0.436868,0.105806,-0.005636,0.014228,0.013788,0.103960,-0.055570,-0.014732,-0.072336,-0.031349,-0.228830,-0.058995,0.214103,-0.086629,0.111170,-0.002618,0.260833,-0.003581,0.049755,0.299475,0.050986,0.004779,0.263116,0.158155,0.048046,0.126031,0.014259,0.119690,-0.017784,-0.067864,0.190015,0.025128,0.271364,-0.185332,-0.024812,-0.124756,0.154871,0.180403,-0.079313,-0.076097,-0.015134,0.171698,0.084774,-0.018340,0.020423,0.043160,0.077672,-0.091381,0.041423,0.038068,0.001205,-0.014261,-0.077438,-0.091949,-0.064342,0.285316,-0.030444,0.024652,-0.008103,0.020670,-0.038910,-0.089123,0.020684,-0.016124,0.030277,0.063068,0.008776,-0.067048,-0.031606,-0.001333,-0.012197,-0.067096,0.002052,0.215400,0.044440,-0.065911,-0.079682,0.127918,-0.025129,-0.008452,-0.083895,-0.033092,0.204065,-0.173709,-0.029126,0.008966,-0.010781,0.005711,0.022635,-0.013208,-0.129494,-0.166381,-0.068826,0.081210,0.094552,-0.058668,-0.032552,-0.001249,0.078389,-0.008230,0.011762,0.007719,-0.014166,-0.015346,0.010732,0.028095,0.089605,0.359475,-0.256843,0.032068,-0.033511,0.012717,-0.011907,-0.012719,-0.045961,0.000452,0.014516,-0.108671,0.108728,0.015879,0.042722,0.179440,-0.002167,-0.132773,-0.009511,0.114175,-0.111873,-0.009456,0.087112,0.022323,0.163554,0.077767,-0.015346,-0.141863,-0.167717,-0.025338,0.104502,-0.043005,0.020272,-0.007979,-0.080889,0.126782,0.016712,-0.040478,0.008122,-0.007627,-0.018385,-0.016019,0.020039,-0.002292,-0.015026,0.122839,-0.000956,0.037552,-0.126094,-0.074015,0.034599,-0.018044,0.018044,0.115884,-0.016388,-0.030547,0.038328,-0.183500,0.089207,0.164170,-0.009526,0.005168,-0.024955,0.025502,0.010452,0.019779,0.026634,-0.033353,-0.028974,0.046345,-0.015825,-0.032576,0.083839,-0.027589,-0.010126,-0.080428,0.111205,0.014606,-0.045292,0.048945,-0.001264,0.003437,-0.045356,0.066340,-0.033727,-0.018129,-0.002758,0.263843
Alley,-0.065017,-0.029676,1.000000,0.036179,0.015513,0.116905,-0.098240,0.223182,0.130684,0.083449,-0.103079,0.009759,-0.101486,-0.031892,-0.019730,-0.062404,0.038292,0.011613,-0.015963,0.022355,0.056456,-0.092434,0.104749,0.023936,0.004484,0.036846,0.024743,0.025543,-0.028541,0.095105,0.073401,-0.057889,-0.032155,-0.101764,-0.052932,-0.018815,0.002202,-0.008229,0.171163,-0.105901,0.065771,0.056943,-0.088455,-0.084988,-0.097880,-0.015284,0.037247,-0.065148,0.002873,0.003621,0.001697,-0.001576,0.052213,-0.013881,-0.011274,0.015176,0.002681,0.002600,0.075277,0.003454,0.008358,-0.010248,-0.058491,0.005907,-0.048402,0.002681,0.004603,0.002941,0.005666,0.004198,0.005828,-0.290673,0.045568,0.005707,0.005069,0.285921,0.003260,0.004038,0.002152,0.002152,0.071276,-0.071849,-0.025730,0.068599,0.324673,0.055320,-0.006079,-0.212957,0.028363,0.002248,-0.067919,-0.054461,0.076216,0.001295,0.052589,0.207832,0.002051,-0.035112,0.047278,-0.123829,-0.028147,-0.102278,0.012534,-0.077834,-0.023760,0.005024,0.002911,-0.024869,0.053797,-0.021767,-0.010439,0.003927,0.018870,-0.089795,-0.025730,0.030053,-0.029584,0.034239,-0.013479,0.005331,0.039009,0.053324,-0.056349,0.001448,0.004436,0.006479,0.004505,-0.009266,0.001295,0.002341,-0.019701,0.065625,0.005276,0.001945,-0.129429,-0.015399,-0.024869,0.085462,0.150387,-0.091528,0.024414,0.005113,0.002172,0.002051,0.072310,0.008107,-0.058926,0.052567,-0.094669,-0.047672,0.002681,-0.011929,0.076877,-0.042050,-0.036565,0.057955,0.002836,0.006255,-0.034909,-0.053956,-0.024699,-0.002516,0.002516,0.002516,0.040201,-0.059451,0.036767,-0.003326,0.002152,0.002516,-0.164741,0.017974,0.061994,0.027051,0.002152,0.003326,0.002516,-0.038247,-0.078088,0.090139,-0.092503,-0.115138,0.019741,0.005157,0.002816,0.071773,0.006981,-0.081356,0.064180,-0.108238,-0.017425,0.001714,-0.147279,-0.050339,0.132523,0.003193,0.037007,0.083121
LotShape,0.197078,0.315484,0.036179,1.000000,0.144248,0.198994,-0.033747,0.229365,0.175488,0.089025,-0.188668,-0.014073,-0.211557,-0.108252,-0.183540,-0.111492,0.157718,-0.061513,0.060419,0.014179,0.200469,-0.119556,0.099138,0.189251,0.089380,-0.007973,0.212741,0.064638,0.057729,0.184213,0.116576,0.060028,-0.094870,-0.157933,0.137148,-0.023444,0.202019,-0.193112,0.196807,-0.236527,0.194984,0.173472,-0.109218,-0.102864,-0.103643,0.161717,0.093135,-0.095092,0.033801,0.065182,0.047100,-0.053321,0.061751,0.028638,0.026617,-0.037391,-0.076118,-0.073820,-0.052272,0.099262,0.057229,0.026793,-0.096997,0.243155,-0.083135,-0.076118,-0.000018,-0.067947,0.017276,0.108708,0.018779,-0.132667,-0.065354,0.014976,-0.006483,-0.045512,0.097907,0.099678,0.074971,-0.033888,0.048725,-0.025037,-0.044856,-0.043972,-0.060099,-0.028611,0.236614,-0.217874,-0.028889,-0.037777,-0.011787,-0.000143,0.046144,-0.014247,-0.007531,-0.114181,-0.058239,-0.076693,0.054179,-0.078559,-0.050809,-0.101751,0.215599,-0.068125,-0.028611,0.032070,0.018582,-0.058584,-0.036758,0.030588,0.146153,0.138882,-0.179482,-0.106744,-0.056930,-0.000864,-0.020281,0.087786,-0.023240,0.019772,-0.004236,-0.012196,0.006516,0.019314,0.066253,0.348190,0.038756,-0.260769,-0.014247,0.083836,-0.024996,-0.047495,0.021492,-0.025165,0.142780,-0.078804,-0.058584,-0.108246,-0.046211,-0.072525,0.025346,-0.044415,0.035414,-0.001173,-0.091020,0.087493,0.012334,0.094684,-0.077213,-0.048128,-0.043213,-0.024730,0.047611,-0.021587,-0.012270,0.143509,-0.018233,0.133870,-0.178567,-0.082581,-0.024776,-0.056905,0.056905,0.033568,0.005988,-0.047904,0.061359,-0.110201,0.088578,0.068574,-0.076510,-0.010618,-0.025174,0.064826,-0.020281,0.074613,0.091912,-0.083288,-0.070061,0.102392,0.019314,-0.062402,0.010281,-0.040585,0.043830,-0.098043,0.058051,-0.010934,0.100391,-0.064536,-0.041048,-0.014607,-0.112175,-0.087387,0.163160,-0.044393,0.019934,0.267759
LandSlope,0.067608,0.436868,0.015513,0.144248,1.000000,-0.066450,0.010355,-0.073639,-0.059105,-0.021812,0.086525,0.039610,0.007085,-0.026608,-0.261883,-0.051000,0.113834,-0.076465,0.085518,-0.116456,0.032498,0.057444,-0.010849,0.068323,-0.009255,0.013888,0.043860,0.116165,0.070299,-0.046936,0.000781,-0.048196,-0.036467,0.039474,-0.033768,0.095140,0.123995,-0.051922,-0.072285,0.025383,-0.002683,0.004826,0.005955,0.015895,0.024538,0.095156,-0.032622,-0.008843,0.008694,0.052976,-0.015505,0.014400,0.042196,-0.003518,0.007072,-0.002305,-0.024499,-0.023760,-0.020511,0.348227,-0.035532,0.119169,0.007533,-0.043022,0.042515,-0.024499,0.068117,-0.041384,-0.051783,-0.023354,-0.053259,-0.018965,-0.010673,-0.040847,-0.008532,-0.056470,0.027566,0.087732,-0.019666,-0.019666,-0.017034,0.019737,0.003243,0.071477,-0.048723,0.000065,0.050155,-0.045848,0.026439,0.034397,0.037417,-0.004330,-0.042473,-0.011831,-0.004428,-0.047933,-0.018745,0.037244,0.008191,0.031553,0.006925,0.000205,-0.028497,0.040737,0.000065,0.017588,-0.026601,-0.029994,0.083075,0.159584,0.230273,0.539909,-0.522045,0.011316,0.003243,0.023561,-0.019666,-0.030910,-0.036397,0.011408,0.050447,-0.047764,0.054345,-0.013232,-0.021896,0.082275,-0.041166,-0.008678,-0.011831,0.269049,-0.054726,0.009018,-0.017662,0.045603,0.039880,0.052784,-0.029994,-0.039320,-0.037954,-0.026601,0.065805,-0.021738,0.007807,0.011329,0.025679,0.084959,0.025871,-0.132586,0.070577,-0.005741,-0.024499,-0.024649,-0.059194,0.073449,-0.031562,-0.021361,0.061625,-0.025898,-0.006301,0.010309,0.099991,-0.001601,0.001601,0.050796,-0.068628,0.053858,0.000192,-0.232174,0.181123,0.149187,0.000114,-0.011362,0.025002,-0.025918,-0.019666,-0.030393,0.026198,0.021638,-0.012572,-0.010128,-0.013232,-0.026601,0.052968,0.002454,-0.005782,0.022795,0.116269,0.027566,-0.135483,0.065268,-0.030393,-0.015667,-0.009095,0.067475,-0.066543,-0.009673,0.045603,0.051152
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Foundation_CBlock,0.023369,0.066340,-0.050339,-0.087387,0.067475,-0.429761,0.200286,-0.290574,-0.409630,-0.068558,0.469367,-0.074902,0.313935,-0.028173,0.058934,-0.024204,0.064342,-0.204852,0.186578,-0.245484,-0.111752,0.427970,0.040638,-0.093321,-0.237462,-0.009365,-0.266787,0.040846,0.118826,-0.382029,-0.092755,0.002844,0.040599,0.400653,-0.206910,0.059395,-0.037848,0.149821,-0.396280,0.281455,-0.318814,-0.254663,0.041521,0.043880,-0.050640,-0.082385,-0.170280,-0.009090,-0.002012,0.061245,0.025180,0.001698,-0.211528,0.048542,-0.021012,0.033112,-0.095092,0.120149,-0.072069,0.068928,-0.200881,-0.008629,0.041443,-0.197326,0.025787,0.072365,0.043903,0.402995,0.204796,-0.140556,-0.200541,-0.041731,-0.083682,0.200745,-0.046459,-0.219185,-0.083682,-0.021710,0.019549,0.099451,0.055645,-0.022747,-0.057846,0.044526,-0.189114,-0.012582,0.135936,-0.052986,0.077007,-0.003229,0.027522,0.128822,-0.253266,-0.019483,-0.167999,0.024771,0.044526,0.019215,0.212843,-0.051857,-0.018534,0.002171,-0.235725,-0.076954,-0.052402,0.118958,0.075076,0.122424,-0.019483,0.004369,-0.066200,0.074557,-0.001357,0.014063,-0.029486,0.123643,-0.028393,-0.222865,0.113713,0.125786,-0.027592,-0.084574,0.074595,0.043257,0.010044,0.029169,-0.050180,0.000532,-0.045920,0.064059,-0.081848,-0.028393,0.068958,0.036930,0.008274,0.024332,0.122424,0.035369,-0.131357,0.003746,0.044139,-0.035187,0.242090,-0.005738,0.101862,0.258067,-0.076174,-0.429760,0.054415,-0.021710,0.033721,0.149796,-0.259556,0.142020,-0.011677,-0.027581,0.033522,-0.146398,0.049926,0.065353,0.075183,0.020743,-0.020743,0.088886,0.056318,0.029125,-0.173864,-0.070106,0.051509,0.047775,0.055458,0.107609,-0.105100,0.045715,0.019549,-0.055277,0.020368,0.096733,0.002824,-0.075217,-0.051358,-0.020031,0.055375,-0.031003,0.271687,0.106093,0.217007,-0.062379,-0.429859,0.041859,0.038760,-0.000795,-0.292034,1.000000,-0.781558,-0.113262,-0.068999,-0.343263
Foundation_PConc,0.056171,-0.033727,0.132523,0.163160,-0.066543,0.568211,-0.299017,0.651199,0.569728,0.192934,-0.604901,0.082761,-0.561151,-0.175106,-0.204556,-0.180483,0.094711,0.122075,-0.139556,0.258687,0.307701,-0.529073,0.190622,0.197188,0.183758,-0.053568,0.292764,0.081876,-0.087953,0.461667,0.187270,-0.024661,-0.138556,-0.538126,0.192609,-0.130900,0.103510,-0.212506,0.645394,-0.477211,0.453421,0.388871,-0.155983,-0.150712,-0.164043,0.172161,0.228321,-0.170975,0.008892,-0.058786,-0.008171,-0.017440,0.233653,-0.061322,0.017712,-0.033018,0.121670,-0.093904,-0.117911,-0.044317,0.283952,-0.072097,-0.099976,0.243730,-0.091210,-0.045416,-0.013125,-0.331127,-0.147721,0.182197,0.258332,-0.180985,-0.022094,-0.162130,0.061995,0.280446,0.116073,0.053347,0.001999,-0.077726,0.133542,-0.099669,-0.087778,-0.074084,0.241971,-0.040929,0.043657,-0.160965,-0.091057,-0.035396,-0.033961,-0.157720,0.323299,0.005998,0.229754,0.027695,-0.023931,-0.090335,-0.053001,-0.146704,-0.050668,-0.184111,0.353619,-0.094385,-0.040929,-0.068495,-0.057685,-0.149119,-0.020380,-0.094445,0.082203,-0.052932,0.041127,-0.203094,-0.059481,-0.037855,-0.061781,0.265978,-0.082437,-0.072232,-0.028861,0.116382,-0.104995,-0.052295,-0.045022,0.024403,0.071647,-0.006743,0.032377,-0.055201,0.017902,-0.029891,0.011322,-0.052642,-0.039598,-0.093135,-0.149119,0.007703,0.202893,-0.069547,-0.043350,0.072311,-0.161002,0.026221,-0.155291,-0.209057,-0.057568,0.561608,-0.227214,-0.041912,-0.045416,-0.139091,0.328524,-0.193253,-0.014157,0.259788,-0.041606,0.191200,-0.292085,-0.131880,-0.090891,0.049870,-0.049870,-0.077217,0.089254,-0.212050,0.249988,0.057568,-0.045836,-0.036197,-0.118084,-0.119834,0.146891,-0.029440,-0.013946,0.025835,-0.008850,-0.194691,-0.091750,0.215245,-0.005092,-0.057685,-0.092174,0.068688,-0.185772,-0.163777,-0.167844,-0.053978,0.562049,-0.207102,-0.078419,-0.002037,-0.297362,-0.781558,1.000000,-0.115328,-0.070258,0.497734
Foundation_Slab,-0.022497,-0.018129,0.003193,-0.044393,-0.009673,-0.169032,-0.095917,-0.050031,-0.123396,-0.037405,0.098534,0.076831,0.514804,0.687403,0.197505,0.217560,-0.125791,0.180762,-0.037317,-0.166017,-0.311714,0.126273,-0.162450,-0.014789,-0.037968,-0.009671,-0.043316,-0.106004,-0.031164,0.023849,-0.098462,0.007963,0.241674,0.115922,-0.001417,0.090409,-0.056105,0.061917,-0.050781,0.115745,-0.046226,-0.059240,0.055507,0.062996,0.103604,-0.073509,-0.074077,0.002213,-0.015040,-0.034932,-0.008880,0.008248,0.029428,0.045142,-0.023371,0.021998,-0.014032,-0.013608,0.028864,-0.018077,-0.043746,-0.024596,0.092898,-0.007110,0.013428,-0.014032,0.005818,0.049254,-0.029659,-0.021975,-0.030504,-0.037444,-0.017064,0.019242,0.082887,-0.032343,-0.017064,-0.021133,-0.011264,-0.011264,-0.164598,0.034371,0.208376,-0.010736,-0.027906,0.038132,0.014235,-0.008820,-0.035244,0.167216,-0.015236,0.018345,-0.039559,-0.006776,-0.032543,-0.027454,-0.010736,0.019245,0.002113,-0.028793,-0.011769,-0.006632,-0.052259,-0.026763,-0.013608,-0.026295,0.031106,0.265803,-0.006776,0.025565,-0.024345,0.014179,-0.009799,0.008216,-0.012721,0.054575,-0.011264,-0.050495,0.013428,-0.027906,-0.009596,-0.060290,0.035727,0.084626,0.009485,0.009982,0.006940,-0.015523,-0.006776,0.045087,0.016214,-0.011264,-0.023091,-0.010182,-0.102534,0.018325,0.265803,-0.022520,-0.037623,0.031106,0.024458,-0.026763,0.009223,-0.010736,-0.023116,0.030282,0.023322,-0.059878,0.012008,0.046531,0.086393,-0.022520,-0.039037,0.034553,0.021198,-0.080147,-0.014845,-0.010108,0.044404,0.062793,0.093665,0.013172,-0.013172,-0.013172,-0.038793,0.062227,-0.040076,-0.023322,0.051029,-0.013172,-0.023836,0.039262,-0.011160,-0.014845,0.051029,-0.017408,-0.013172,0.053876,0.142183,-0.113729,-0.007578,0.031106,0.094133,-0.026995,0.005261,-0.024339,0.045786,0.024458,-0.050344,-0.021451,0.023322,0.069008,-0.043093,-0.113262,-0.115328,1.000000,-0.010182,-0.119740
Foundation_infrequent_sklearn,0.030446,-0.002758,0.037007,0.019934,0.045603,-0.005658,0.069397,-0.085329,-0.001184,-0.045239,-0.021904,0.018753,0.023983,-0.009284,0.005505,0.028710,-0.025771,0.021831,-0.022733,0.003682,-0.031444,0.002799,-0.085660,-0.016454,0.082941,-0.009470,0.055920,-0.047708,-0.018985,-0.001360,-0.007762,0.045090,0.062804,0.034352,0.061071,0.007552,-0.034179,0.040741,-0.048359,0.004331,0.001123,0.007950,0.003542,0.003407,0.047691,-0.002668,0.003839,0.078546,0.086385,-0.021281,-0.005410,0.005024,-0.057474,0.049556,0.000333,0.043886,-0.008548,-0.008290,-0.016019,-0.011013,-0.026650,0.032677,-0.021356,0.019843,0.042980,-0.008548,0.033912,-0.009379,-0.018068,-0.013387,-0.018583,0.075431,-0.010395,-0.018198,-0.016162,-0.019704,-0.010395,0.042087,-0.006862,-0.006862,-0.047977,0.070502,-0.007749,0.099559,-0.017000,-0.008290,-0.044886,0.040664,0.013014,-0.007170,-0.009282,0.014026,-0.024099,-0.004128,-0.019825,-0.016725,-0.006540,0.050276,-0.041829,-0.017541,-0.007170,0.061994,-0.018283,0.115941,-0.008290,-0.016019,-0.009282,-0.015135,-0.004128,0.026341,0.033287,-0.012522,-0.031263,0.058420,-0.007749,-0.060825,-0.006862,0.042903,-0.012699,-0.017000,-0.005846,-0.077299,0.082501,-0.004617,0.054166,-0.020660,-0.014364,-0.028958,-0.004128,-0.007465,0.020466,-0.006862,-0.038872,0.105598,0.011320,0.049102,-0.015135,-0.013719,-0.022920,0.066002,-0.010395,0.027778,-0.032011,-0.006540,0.016845,-0.025851,0.055562,-0.038778,0.020122,0.042087,-0.008548,-0.013719,-0.023782,0.030738,-0.011013,-0.006473,0.068169,-0.019946,0.012181,-0.019087,-0.008024,0.008024,-0.008024,-0.008024,-0.052148,0.064672,-0.024414,0.010605,-0.006862,-0.008024,-0.014521,-0.019087,0.031377,-0.009043,-0.006862,-0.010605,-0.008024,-0.020660,-0.010811,-0.006958,0.145173,0.066002,-0.014831,0.027289,-0.033351,0.015749,-0.022259,0.057058,-0.039826,0.018354,0.055562,-0.005466,-0.026252,-0.068999,-0.070258,-0.010182,1.000000,-0.008323
