# House Prices

## Import relevant packages

In [154]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import os
import warnings
warnings.filterwarnings('ignore')

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import RobustScaler, MinMaxScaler, OrdinalEncoder

## Loading the data

In [111]:
data_path = os.path.join(os.path.dirname(os.getcwd()), 'data')

train_df = pd.read_csv(os.path.join(data_path, 'train.csv'))
test_df = pd.read_csv(os.path.join(data_path, 'test.csv'))

train_df.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2003,2003,Gable,CompShg,VinylSd,VinylSd,BrkFace,196.0,Gd,TA,PConc,Gd,TA,No,GLQ,706,Unf,0,150,856,GasA,Ex,Y,SBrkr,856,854,0,1710,1,0,2,1,3,1,Gd,8,Typ,0,,Attchd,2003.0,RFn,2,548,TA,TA,Y,0,61,0,0,0,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,FR2,Gtl,Veenker,Feedr,Norm,1Fam,1Story,6,8,1976,1976,Gable,CompShg,MetalSd,MetalSd,,0.0,TA,TA,CBlock,Gd,TA,Gd,ALQ,978,Unf,0,284,1262,GasA,Ex,Y,SBrkr,1262,0,0,1262,0,1,2,0,3,1,TA,6,Typ,1,TA,Attchd,1976.0,RFn,2,460,TA,TA,Y,298,0,0,0,0,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2001,2002,Gable,CompShg,VinylSd,VinylSd,BrkFace,162.0,Gd,TA,PConc,Gd,TA,Mn,GLQ,486,Unf,0,434,920,GasA,Ex,Y,SBrkr,920,866,0,1786,1,0,2,1,3,1,Gd,6,Typ,1,TA,Attchd,2001.0,RFn,2,608,TA,TA,Y,0,42,0,0,0,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,Corner,Gtl,Crawfor,Norm,Norm,1Fam,2Story,7,5,1915,1970,Gable,CompShg,Wd Sdng,Wd Shng,,0.0,TA,TA,BrkTil,TA,Gd,No,ALQ,216,Unf,0,540,756,GasA,Gd,Y,SBrkr,961,756,0,1717,1,0,1,0,3,1,Gd,7,Typ,1,Gd,Detchd,1998.0,Unf,3,642,TA,TA,Y,0,35,272,0,0,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,FR2,Gtl,NoRidge,Norm,Norm,1Fam,2Story,8,5,2000,2000,Gable,CompShg,VinylSd,VinylSd,BrkFace,350.0,Gd,TA,PConc,Gd,TA,Av,GLQ,655,Unf,0,490,1145,GasA,Ex,Y,SBrkr,1145,1053,0,2198,1,0,2,1,4,1,Gd,9,Typ,1,TA,Attchd,2000.0,RFn,3,836,TA,TA,Y,192,84,0,0,0,0,,,,0,12,2008,WD,Normal,250000


In [112]:
pd.options.display.max_columns = None
pd.options.display.max_rows = None

In [113]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 81 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             1460 non-null   int64  
 1   MSSubClass     1460 non-null   int64  
 2   MSZoning       1460 non-null   object 
 3   LotFrontage    1201 non-null   float64
 4   LotArea        1460 non-null   int64  
 5   Street         1460 non-null   object 
 6   Alley          91 non-null     object 
 7   LotShape       1460 non-null   object 
 8   LandContour    1460 non-null   object 
 9   Utilities      1460 non-null   object 
 10  LotConfig      1460 non-null   object 
 11  LandSlope      1460 non-null   object 
 12  Neighborhood   1460 non-null   object 
 13  Condition1     1460 non-null   object 
 14  Condition2     1460 non-null   object 
 15  BldgType       1460 non-null   object 
 16  HouseStyle     1460 non-null   object 
 17  OverallQual    1460 non-null   int64  
 18  OverallC

## Preprocessing

### removing duplicates

In [114]:
train_df.duplicated().sum()

0

### removing columns with more than 50% missing values

In [115]:
train_df.isnull().sum()[train_df.isnull().sum() > 0.5 * len(train_df)].sort_values(ascending=False)

PoolQC         1453
MiscFeature    1406
Alley          1369
Fence          1179
MasVnrType      872
dtype: int64

In [116]:
columns_to_drop = train_df.isnull().sum()[train_df.isnull().sum() > 0.5 * len(train_df)].index.tolist()
columns_to_drop

['Alley', 'MasVnrType', 'PoolQC', 'Fence', 'MiscFeature']

In [117]:
train_df.drop(columns=columns_to_drop, inplace=True)

train_df.isnull().sum()[train_df.isnull().sum() > 0].sort_values(ascending=False)

FireplaceQu     690
LotFrontage     259
GarageType       81
GarageYrBlt      81
GarageFinish     81
GarageQual       81
GarageCond       81
BsmtExposure     38
BsmtFinType2     38
BsmtQual         37
BsmtCond         37
BsmtFinType1     37
MasVnrArea        8
Electrical        1
dtype: int64

### Filling in missing values

In [118]:
train_df['FireplaceQu'].fillna('No', inplace=True)
train_df['FireplaceQu'].value_counts()

FireplaceQu
No    690
Gd    380
TA    313
Fa     33
Ex     24
Po     20
Name: count, dtype: int64

In [119]:
median_imputer = SimpleImputer(strategy='median')
train_df[['LotFrontage']] = median_imputer.fit_transform(train_df[['LotFrontage']])

train_df['LotFrontage'].isnull().sum()

0

In [126]:
train_df['GarageType'].dtype == 'O'

True

In [129]:
garage_cols = [col for col in train_df.columns if ('Garage' in col) and train_df[col].dtype == 'O']

for col in garage_cols:
    train_df[col].fillna('No', inplace=True)

In [131]:
train_df['GarageYrBlt'].fillna(0, inplace=True)
train_df.isnull().sum()[train_df.isnull().sum() > 0].sort_values(ascending=False)

BsmtExposure    38
BsmtFinType2    38
BsmtQual        37
BsmtCond        37
BsmtFinType1    37
MasVnrArea       8
Electrical       1
dtype: int64

In [132]:
bsmt_cols = [col for col in train_df.columns if 'Bsmt' in col]

for col in bsmt_cols:
    train_df[col].fillna('No Bsmt', inplace=True)

In [133]:
train_df.isnull().sum()[train_df.isnull().sum() > 0].sort_values(ascending=False)

MasVnrArea    8
Electrical    1
dtype: int64

In [134]:
train_df[['MasVnrArea']] = median_imputer.fit_transform(train_df[['MasVnrArea']])
train_df['MasVnrArea'].isnull().sum()

0

In [135]:
train_df['Electrical'].value_counts()

Electrical
SBrkr    1334
FuseA      94
FuseF      27
FuseP       3
Mix         1
Name: count, dtype: int64

In [136]:
most_common_imputer = SimpleImputer(strategy='most_frequent')
train_df[['Electrical']] = most_common_imputer.fit_transform(train_df[['Electrical']])

train_df['Electrical'].isnull().sum()

0

In [137]:
train_df.set_index('Id', inplace=True)
train_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1460 entries, 1 to 1460
Data columns (total 75 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   MSSubClass     1460 non-null   int64  
 1   MSZoning       1460 non-null   object 
 2   LotFrontage    1460 non-null   float64
 3   LotArea        1460 non-null   int64  
 4   Street         1460 non-null   object 
 5   LotShape       1460 non-null   object 
 6   LandContour    1460 non-null   object 
 7   Utilities      1460 non-null   object 
 8   LotConfig      1460 non-null   object 
 9   LandSlope      1460 non-null   object 
 10  Neighborhood   1460 non-null   object 
 11  Condition1     1460 non-null   object 
 12  Condition2     1460 non-null   object 
 13  BldgType       1460 non-null   object 
 14  HouseStyle     1460 non-null   object 
 15  OverallQual    1460 non-null   int64  
 16  OverallCond    1460 non-null   int64  
 17  YearBuilt      1460 non-null   int64  
 18  YearRemodAdd 

### Separating the data to features and target

In [138]:
X_train = train_df.drop(columns=['SalePrice'])
y_train = train_df['SalePrice']

print(X_train.shape, y_train.shape)

(1460, 74) (1460,)


### Scaling numeric features

In [139]:
numeric_features = X_train.select_dtypes('number').columns.tolist()
X_train[numeric_features].describe()

Unnamed: 0,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,TotRmsAbvGrd,Fireplaces,GarageYrBlt,GarageCars,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold
count,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0
mean,56.89726,69.863699,10516.828082,6.099315,5.575342,1971.267808,1984.865753,103.117123,443.639726,46.549315,567.240411,1057.429452,1162.626712,346.992466,5.844521,1515.463699,0.425342,0.057534,1.565068,0.382877,2.866438,1.046575,6.517808,0.613014,1868.739726,1.767123,472.980137,94.244521,46.660274,21.95411,3.409589,15.060959,2.758904,43.489041,6.321918,2007.815753
std,42.300571,22.027677,9981.264932,1.382997,1.112799,30.202904,20.645407,180.731373,456.098091,161.319273,441.866955,438.705324,386.587738,436.528436,48.623081,525.480383,0.518911,0.238753,0.550916,0.502885,0.815778,0.220338,1.625393,0.644666,453.697295,0.747315,213.804841,125.338794,66.256028,61.119149,29.317331,55.757415,40.177307,496.123024,2.703626,1.328095
min,20.0,21.0,1300.0,1.0,1.0,1872.0,1950.0,0.0,0.0,0.0,0.0,0.0,334.0,0.0,0.0,334.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2006.0
25%,20.0,60.0,7553.5,5.0,5.0,1954.0,1967.0,0.0,0.0,0.0,223.0,795.75,882.0,0.0,0.0,1129.5,0.0,0.0,1.0,0.0,2.0,1.0,5.0,0.0,1958.0,1.0,334.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,2007.0
50%,50.0,69.0,9478.5,6.0,5.0,1973.0,1994.0,0.0,383.5,0.0,477.5,991.5,1087.0,0.0,0.0,1464.0,0.0,0.0,2.0,0.0,3.0,1.0,6.0,1.0,1977.0,2.0,480.0,0.0,25.0,0.0,0.0,0.0,0.0,0.0,6.0,2008.0
75%,70.0,79.0,11601.5,7.0,6.0,2000.0,2004.0,164.25,712.25,0.0,808.0,1298.25,1391.25,728.0,0.0,1776.75,1.0,0.0,2.0,1.0,3.0,1.0,7.0,1.0,2001.0,2.0,576.0,168.0,68.0,0.0,0.0,0.0,0.0,0.0,8.0,2009.0
max,190.0,313.0,215245.0,10.0,9.0,2010.0,2010.0,1600.0,5644.0,1474.0,2336.0,6110.0,4692.0,2065.0,572.0,5642.0,3.0,2.0,3.0,2.0,8.0,3.0,14.0,3.0,2010.0,4.0,1418.0,857.0,547.0,552.0,508.0,480.0,738.0,15500.0,12.0,2010.0


In [159]:
scaler = MinMaxScaler()
scaler.fit(X_train[numeric_features])

X_train_scaled = X_train.copy()
X_train_scaled[numeric_features] = scaler.transform(X_train[numeric_features])

X_train_scaled[numeric_features].describe()

Unnamed: 0,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,TotRmsAbvGrd,Fireplaces,GarageYrBlt,GarageCars,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold
count,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0
mean,0.217043,0.167341,0.04308,0.566591,0.571918,0.719332,0.581096,0.064448,0.078604,0.03158,0.242826,0.173065,0.190139,0.168035,0.010218,0.222582,0.141781,0.028767,0.521689,0.191438,0.358305,0.348858,0.376484,0.204338,0.929721,0.441781,0.333554,0.10997,0.085302,0.039772,0.006712,0.031377,0.003738,0.002806,0.483811,0.453938
std,0.248827,0.075437,0.046653,0.153666,0.1391,0.218862,0.34409,0.112957,0.080811,0.109443,0.189155,0.071801,0.088708,0.211394,0.085005,0.098998,0.17297,0.119376,0.183639,0.251443,0.101972,0.073446,0.135449,0.214889,0.22572,0.186829,0.150779,0.146253,0.121126,0.110723,0.057711,0.116161,0.054441,0.032008,0.245784,0.332024
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.133562,0.029229,0.444444,0.5,0.594203,0.283333,0.0,0.0,0.0,0.095462,0.130237,0.125746,0.0,0.0,0.149868,0.0,0.0,0.333333,0.0,0.25,0.333333,0.25,0.0,0.974129,0.25,0.235896,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.363636,0.25
50%,0.176471,0.164384,0.038227,0.555556,0.5,0.731884,0.733333,0.0,0.067948,0.0,0.204409,0.162275,0.172786,0.0,0.0,0.212886,0.0,0.0,0.666667,0.0,0.375,0.333333,0.333333,0.333333,0.983582,0.5,0.338505,0.0,0.045704,0.0,0.0,0.0,0.0,0.0,0.454545,0.5
75%,0.294118,0.19863,0.04815,0.666667,0.625,0.927536,0.9,0.102656,0.126196,0.0,0.34589,0.21248,0.2426,0.352542,0.0,0.271807,0.333333,0.0,0.666667,0.5,0.375,0.333333,0.416667,0.333333,0.995522,0.5,0.406206,0.196033,0.124314,0.0,0.0,0.0,0.0,0.0,0.636364,0.75
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


### Encoding categorical features

In [141]:
categorical_features = X_train.select_dtypes('object').columns.tolist()
categorical_features

['MSZoning',
 'Street',
 'LotShape',
 'LandContour',
 'Utilities',
 'LotConfig',
 'LandSlope',
 'Neighborhood',
 'Condition1',
 'Condition2',
 'BldgType',
 'HouseStyle',
 'RoofStyle',
 'RoofMatl',
 'Exterior1st',
 'Exterior2nd',
 'ExterQual',
 'ExterCond',
 'Foundation',
 'BsmtQual',
 'BsmtCond',
 'BsmtExposure',
 'BsmtFinType1',
 'BsmtFinType2',
 'Heating',
 'HeatingQC',
 'CentralAir',
 'Electrical',
 'KitchenQual',
 'Functional',
 'FireplaceQu',
 'GarageType',
 'GarageFinish',
 'GarageQual',
 'GarageCond',
 'PavedDrive',
 'SaleType',
 'SaleCondition']

In [182]:
ordinal_features = ['Utilities', 'LandSlope', 'ExterQual', 'ExterCond', 'BsmtQual', \
                    'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', \
                    'HeatingQC', 'CentralAir', 'KitchenQual', 'Functional', 'FireplaceQu', \
                    'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive']

ohe_features = ['MSZoning', 'Street', 'LotShape', 'LandContour', 'LotConfig', \
                'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', \
                'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'Foundation', \
                'Heating', 'Electrical', 'GarageType', 'SaleType', 'SaleCondition']

#### Encoding ordinal features

In [183]:
ordinal_features

['Utilities',
 'LandSlope',
 'ExterQual',
 'ExterCond',
 'BsmtQual',
 'BsmtCond',
 'BsmtExposure',
 'BsmtFinType1',
 'BsmtFinType2',
 'HeatingQC',
 'CentralAir',
 'KitchenQual',
 'Functional',
 'FireplaceQu',
 'GarageFinish',
 'GarageQual',
 'GarageCond',
 'PavedDrive']

In [220]:
X_train_scaled['PavedDrive'].value_counts()

PavedDrive
Y    1340
N      90
P      30
Name: count, dtype: int64

In [223]:
ord_encoder = OrdinalEncoder(categories=[['ELO', 'NoSeWa', 'NoSewr', 'AllPub']])
X_train_scaled[['Utilities']] = ord_encoder.fit_transform(X_train_scaled[['Utilities']])

In [170]:
ord_encoder = OrdinalEncoder(categories=[['Sev', 'Mod', 'Gtl']])
X_train_scaled[['LandSlope']] = ord_encoder.fit_transform(X_train_scaled[['LandSlope']])

In [173]:
ord_encoder = OrdinalEncoder(categories=[['Po', 'Fa', 'TA', 'Gd', 'Ex']])
X_train_scaled[['ExterQual']] = ord_encoder.fit_transform(X_train_scaled[['ExterQual']])

In [176]:
ord_encoder = OrdinalEncoder(categories=[['Po', 'Fa', 'TA', 'Gd', 'Ex']])
X_train_scaled[['ExterCond']] = ord_encoder.fit_transform(X_train_scaled[['ExterCond']])

In [179]:
ord_encoder = OrdinalEncoder(categories=[['No Bsmt', 'Po', 'Fa', 'TA', 'Gd', 'Ex']])
X_train_scaled[['BsmtQual']] = ord_encoder.fit_transform(X_train_scaled[['BsmtQual']])

In [185]:
ord_encoder = OrdinalEncoder(categories=[['No Bsmt', 'Po', 'Fa', 'TA', 'Gd', 'Ex']])
X_train_scaled[['BsmtCond']] = ord_encoder.fit_transform(X_train_scaled[['BsmtCond']])

In [188]:
ord_encoder = OrdinalEncoder(categories=[['No Bsmt', 'No', 'Mn', 'Av', 'Gd']])
X_train_scaled[['BsmtExposure']] = ord_encoder.fit_transform(X_train_scaled[['BsmtExposure']])

In [191]:
ord_encoder = OrdinalEncoder(categories=[['No Bsmt', 'Unf', 'LwQ', 'Rec', 'BLQ', 'ALQ', 'GLQ']])
X_train_scaled[['BsmtFinType1']] = ord_encoder.fit_transform(X_train_scaled[['BsmtFinType1']])

In [194]:
ord_encoder = OrdinalEncoder(categories=[['No Bsmt', 'Unf', 'LwQ', 'Rec', 'BLQ', 'ALQ', 'GLQ']])
X_train_scaled[['BsmtFinType2']] = ord_encoder.fit_transform(X_train_scaled[['BsmtFinType2']])

In [197]:
ord_encoder = OrdinalEncoder(categories=[['Po', 'Fa', 'TA', 'Gd', 'Ex']])
X_train_scaled[['HeatingQC']] = ord_encoder.fit_transform(X_train_scaled[['HeatingQC']])

In [200]:
ord_encoder = OrdinalEncoder(categories=[['N', 'Y']])
X_train_scaled[['CentralAir']] = ord_encoder.fit_transform(X_train_scaled[['CentralAir']])

In [203]:
ord_encoder = OrdinalEncoder(categories=[['Po', 'Fa', 'TA', 'Gd', 'Ex']])
X_train_scaled[['KitchenQual']] = ord_encoder.fit_transform(X_train_scaled[['KitchenQual']])

In [206]:
ord_encoder = OrdinalEncoder(categories=[['Sal', 'Sev', 'Maj2', 'Maj1', 'Mod', 'Min2', 'Min1', 'Typ']])
X_train_scaled[['Functional']] = ord_encoder.fit_transform(X_train_scaled[['Functional']])

In [209]:
ord_encoder = OrdinalEncoder(categories=[['No', 'Po', 'Fa', 'TA', 'Gd', 'Ex']])
X_train_scaled[['FireplaceQu']] = ord_encoder.fit_transform(X_train_scaled[['FireplaceQu']])

In [212]:
ord_encoder = OrdinalEncoder(categories=[['No', 'Unf', 'RFn', 'Fin']])
X_train_scaled[['GarageFinish']] = ord_encoder.fit_transform(X_train_scaled[['GarageFinish']])

In [215]:
ord_encoder = OrdinalEncoder(categories=[['No', 'Po', 'Fa', 'TA', 'Gd', 'Ex']])
X_train_scaled[['GarageQual']] = ord_encoder.fit_transform(X_train_scaled[['GarageQual']])

In [218]:
ord_encoder = OrdinalEncoder(categories=[['No', 'Po', 'Fa', 'TA', 'Gd', 'Ex']])
X_train_scaled[['GarageCond']] = ord_encoder.fit_transform(X_train_scaled[['GarageCond']])

In [221]:
ord_encoder = OrdinalEncoder(categories=[['N', 'P', 'Y']])
X_train_scaled[['PavedDrive']] = ord_encoder.fit_transform(X_train_scaled[['PavedDrive']])

#### Min max scaling the ordinal features

In [224]:
scaler2 = MinMaxScaler()
scaler2.fit(X_train_scaled[ordinal_features])

X_train_scaled[ordinal_features] = scaler2.transform(X_train_scaled[ordinal_features])

X_train_scaled[ordinal_features].describe()

Unnamed: 0,Utilities,LandSlope,ExterQual,ExterCond,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinType2,HeatingQC,CentralAir,KitchenQual,Functional,FireplaceQu,GarageFinish,GarageQual,GarageCond,PavedDrive
count,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0
mean,0.999315,0.968836,0.465297,0.52089,0.697808,0.733733,0.407534,0.590982,0.207877,0.786301,0.934932,0.503881,0.97363,0.365068,0.571918,0.562055,0.561781,0.928082
std,0.026171,0.138116,0.191427,0.087763,0.175296,0.13804,0.266848,0.351296,0.148722,0.239875,0.246731,0.221253,0.111283,0.362175,0.29761,0.14458,0.143937,0.248296
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,1.0,1.0,0.333333,0.5,0.6,0.75,0.25,0.166667,0.166667,0.5,1.0,0.333333,1.0,0.0,0.333333,0.6,0.6,1.0
50%,1.0,1.0,0.333333,0.5,0.8,0.75,0.25,0.666667,0.166667,1.0,1.0,0.333333,1.0,0.4,0.666667,0.6,0.6,1.0
75%,1.0,1.0,0.666667,0.5,0.8,0.75,0.5,1.0,0.166667,1.0,1.0,0.666667,1.0,0.8,0.666667,0.6,0.6,1.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
