# Packages

In [1]:
import numpy as np
import pandas as pd

from matplotlib import pyplot as plt
from plotly import express as px, graph_objects as go

from sklearn.metrics import mean_squared_log_error
from sklearn.preprocessing import StandardScaler

%matplotlib inline

# Reading Data

In [2]:
print('Reading data...')

df = pd.read_csv('../input/house-prices-advanced-regression-techniques/train.csv', index_col='Id')

df.info()
df.head()

Reading data...
<class 'pandas.core.frame.DataFrame'>
Int64Index: 1460 entries, 1 to 1460
Data columns (total 80 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   MSSubClass     1460 non-null   int64  
 1   MSZoning       1460 non-null   object 
 2   LotFrontage    1201 non-null   float64
 3   LotArea        1460 non-null   int64  
 4   Street         1460 non-null   object 
 5   Alley          91 non-null     object 
 6   LotShape       1460 non-null   object 
 7   LandContour    1460 non-null   object 
 8   Utilities      1460 non-null   object 
 9   LotConfig      1460 non-null   object 
 10  LandSlope      1460 non-null   object 
 11  Neighborhood   1460 non-null   object 
 12  Condition1     1460 non-null   object 
 13  Condition2     1460 non-null   object 
 14  BldgType       1460 non-null   object 
 15  HouseStyle     1460 non-null   object 
 16  OverallQual    1460 non-null   int64  
 17  OverallCond    1460 non-null   int64

Unnamed: 0_level_0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,Inside,...,0,,,,0,2,2008,WD,Normal,208500
2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,FR2,...,0,,,,0,5,2007,WD,Normal,181500
3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,Inside,...,0,,,,0,9,2008,WD,Normal,223500
4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,Corner,...,0,,,,0,2,2006,WD,Abnorml,140000
5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,FR2,...,0,,,,0,12,2008,WD,Normal,250000


# Exploratory Data Analysis

### Drop columns with many missing values

In [3]:
df.loc[:, df.isna().mean() > 0.4].isna().mean()

Alley          0.937671
FireplaceQu    0.472603
PoolQC         0.995205
Fence          0.807534
MiscFeature    0.963014
dtype: float64

In [4]:
df = df.drop(['Alley', 'FireplaceQu', 'PoolQC', 'Fence', 'MiscFeature'], axis=1)

### Splitting features and target

In [5]:
X = df.drop('SalePrice', axis=1)
y = np.log(df['SalePrice'])

### Defining numerical and categorical features

In [6]:
nominal_features = [
    'MSSubClass', 'MSZoning', 'Street', 'LandContour', 'LotConfig', 'Neighborhood', 
    'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 
    'Exterior1st', 'Exterior2nd', 'MasVnrType', 'Foundation', 'Heating', 'SaleType', 
    'SaleCondition','GarageType'
]

ordinal_features = [
    'LotShape', 'Utilities', 'LandSlope', 'OverallQual', 'OverallCond', 'ExterQual', 
    'ExterCond', 'HeatingQC', 'CentralAir', 'KitchenQual', 'Functional', 'PavedDrive', 
    'Electrical', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 
    'GarageFinish', 'GarageQual', 'GarageCond'
]

continuous_features = [
    'LotFrontage', 'LotArea', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 
    'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'GarageArea', 
    'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 
    'MiscVal'
]

discrete_features = [
    'YearBuilt', 'YearRemodAdd', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 
    'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt', 'GarageCars', 
    'MoSold', 'YrSold'
]

### Filling missing values

#### Filling missing categorical values with None

In [7]:
for col in (nominal_features + ordinal_features):
    X[col] = X[col].fillna('None')

#### Filling missing numerical values with 0

In [8]:
for col in (continuous_features + discrete_features):
    X[col] = X[col].fillna(0)

### Encoding Categorical Features

#### One-hot encoding

In [9]:
print('\nOne-hot encoding...\n')

dummies = pd.get_dummies(X[nominal_features]).sort_index()

X = pd.concat([X, dummies], axis=1)
X = X.drop(nominal_features, axis=1)

X.info()
X.head()


One-hot encoding...

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1460 entries, 1 to 1460
Columns: 208 entries, LotFrontage to GarageType_None
dtypes: float64(3), int64(32), object(19), uint8(154)
memory usage: 846.9+ KB


Unnamed: 0_level_0,LotFrontage,LotArea,LotShape,Utilities,LandSlope,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,...,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial,GarageType_2Types,GarageType_Attchd,GarageType_Basment,GarageType_BuiltIn,GarageType_CarPort,GarageType_Detchd,GarageType_None
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,65.0,8450,Reg,AllPub,Gtl,7,5,2003,2003,196.0,...,0,1,0,0,1,0,0,0,0,0
2,80.0,9600,Reg,AllPub,Gtl,6,8,1976,1976,0.0,...,0,1,0,0,1,0,0,0,0,0
3,68.0,11250,IR1,AllPub,Gtl,7,5,2001,2002,162.0,...,0,1,0,0,1,0,0,0,0,0
4,60.0,9550,IR1,AllPub,Gtl,7,5,1915,1970,0.0,...,0,0,0,0,0,0,0,0,1,0
5,84.0,14260,IR1,AllPub,Gtl,8,5,2000,2000,350.0,...,0,1,0,0,1,0,0,0,0,0


#### Ordinal encoding

In [10]:
rating = {'None': 0, 'Po': 1, 'Fa': 2, 'TA': 3, 'Gd': 4, 'Ex': 5}

ordinal_encoding = {
    'LotShape': {'None': 0, 'Reg': 1, 'IR1': 2, 'IR2': 3, 'IR3': 4}, 
    'Utilities': {'None': 0, 'ElO': 1, 'NoSeWa': 2, 'NoSeWr': 3, 'AllPub': 4}, 
    'LandSlope': {'None': 0, 'Gtl': 1, 'Mod': 2, 'Sev': 3}, 
    'ExterQual': rating, 
    'ExterCond': rating, 
    'BsmtQual': rating, 
    'BsmtCond': rating, 
    'BsmtExposure': {'None': 0, 'No': 1, 'Mn': 2, 'Av': 3, 'Gd': 4}, 
    'BsmtFinType1': {'None': 0, 'Unf': 1, 'LwQ': 2, 'Rec': 3, 'BLQ': 4, 'ALQ': 5, 'GLQ': 6}, 
    'BsmtFinType2': {'None': 0, 'Unf': 1, 'LwQ': 2, 'Rec': 3, 'BLQ': 4, 'ALQ': 5, 'GLQ': 6}, 
    'HeatingQC': rating, 
    'CentralAir': {'None': 0, 'N': 1, 'Y': 2}, 
    'Electrical': {'None': 0, 'Mix': 1, 'FuseP': 2, 'FuseF': 3, 'FuseA': 4, 'SBrkr': 5}, 
    'KitchenQual': rating, 
    'Functional': {'None': 0, 'Sal': 1, 'Sev': 2, 'Maj2': 3, 'Maj1': 4, 'Mod': 5, 'Min2': 6, 'Min1': 7, 'Typ': 8}, 
    'GarageFinish': {'None': 0, 'Unf': 1, 'RFn': 2, 'Fin': 3}, 
    'GarageQual': rating, 
    'GarageCond': rating, 
    'PavedDrive': {'None': 0, 'N': 1, 'P': 2, 'Y': 3}
}

In [11]:
print('\nOrdinal encoding...\n')

X = X.replace(ordinal_encoding)

X.info()
X.head()


Ordinal encoding...

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1460 entries, 1 to 1460
Columns: 208 entries, LotFrontage to GarageType_None
dtypes: float64(3), int64(51), uint8(154)
memory usage: 846.9 KB


Unnamed: 0_level_0,LotFrontage,LotArea,LotShape,Utilities,LandSlope,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,...,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial,GarageType_2Types,GarageType_Attchd,GarageType_Basment,GarageType_BuiltIn,GarageType_CarPort,GarageType_Detchd,GarageType_None
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,65.0,8450,1,4,1,7,5,2003,2003,196.0,...,0,1,0,0,1,0,0,0,0,0
2,80.0,9600,1,4,1,6,8,1976,1976,0.0,...,0,1,0,0,1,0,0,0,0,0
3,68.0,11250,2,4,1,7,5,2001,2002,162.0,...,0,1,0,0,1,0,0,0,0,0
4,60.0,9550,2,4,1,7,5,1915,1970,0.0,...,0,0,0,0,0,0,0,0,1,0
5,84.0,14260,2,4,1,8,5,2000,2000,350.0,...,0,1,0,0,1,0,0,0,0,0


### Scaling features

In [12]:
print('\nScaling features...\n')

scaler = StandardScaler().fit(X)

X[:] = scaler.transform(X)

X.info()
X.head()


Scaling features...

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1460 entries, 1 to 1460
Columns: 208 entries, LotFrontage to GarageType_None
dtypes: float64(208)
memory usage: 2.3 MB


Unnamed: 0_level_0,LotFrontage,LotArea,LotShape,Utilities,LandSlope,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,...,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial,GarageType_2Types,GarageType_Attchd,GarageType_Basment,GarageType_BuiltIn,GarageType_CarPort,GarageType_Detchd,GarageType_None
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.212877,-0.207142,-0.701291,0.02618,-0.225716,0.651479,-0.5172,1.050994,0.878668,0.514104,...,-0.117851,0.467651,-0.305995,-0.064238,0.823505,-0.114827,-0.253259,-0.078757,-0.600559,-0.24236
2,0.645747,-0.091886,-0.701291,0.02618,-0.225716,-0.071836,2.179628,0.156734,-0.429577,-0.57075,...,-0.117851,0.467651,-0.305995,-0.064238,0.823505,-0.114827,-0.253259,-0.078757,-0.600559,-0.24236
3,0.299451,0.07348,1.016637,0.02618,-0.225716,0.651479,-0.5172,0.984752,0.830215,0.325915,...,-0.117851,0.467651,-0.305995,-0.064238,0.823505,-0.114827,-0.253259,-0.078757,-0.600559,-0.24236
4,0.068587,-0.096897,1.016637,0.02618,-0.225716,0.651479,-0.5172,-1.863632,-0.720298,-0.57075,...,-0.117851,-2.138345,-0.305995,-0.064238,-1.214321,-0.114827,-0.253259,-0.078757,1.665116,-0.24236
5,0.761179,0.375148,1.016637,0.02618,-0.225716,1.374795,-0.5172,0.951632,0.733308,1.366489,...,-0.117851,0.467651,-0.305995,-0.064238,0.823505,-0.114827,-0.253259,-0.078757,-0.600559,-0.24236


In [13]:
pd.concat([X, y], axis=1).to_csv('HousePrices.csv', index=False)

# Data Preparation Pipeline

### Prepare features function (Pipeline for all data preparation above)

In [14]:
def prepare_features(df: pd.DataFrame, scaler, feature_names: list = None):
    """Preparing features for training"""
    
    # Creating DF
    
    if feature_names is not None:
        X = pd.concat([pd.DataFrame(columns=feature_names), df])
    else:
        X = df
    
    # Defining numerical and categorical features
    
    nominal_features = [
        'MSSubClass', 'MSZoning', 'Street', 'LandContour', 'LotConfig', 'Neighborhood', 
        'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 
        'Exterior1st', 'Exterior2nd', 'MasVnrType', 'Foundation', 'Heating', 'SaleType', 
        'SaleCondition','GarageType'
    ]

    ordinal_features = [
        'LotShape', 'Utilities', 'LandSlope', 'OverallQual', 'OverallCond', 'ExterQual', 
        'ExterCond', 'HeatingQC', 'CentralAir', 'KitchenQual', 'Functional', 'PavedDrive', 
        'Electrical', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 
        'GarageFinish', 'GarageQual', 'GarageCond'
    ]

    continuous_features = [
        'LotFrontage', 'LotArea', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 
        'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'GarageArea', 
        'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 
        'MiscVal'
    ]

    discrete_features = [
        'YearBuilt', 'YearRemodAdd', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 
        'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt', 'GarageCars', 
        'MoSold', 'YrSold'
    ]
    
    # Filling missing categorical values with None

    for col in (nominal_features + ordinal_features):
        X[col] = X[col].fillna('None')

    # Filling numerical missing values with 0

    for col in (continuous_features + discrete_features):
        X[col] = X[col].fillna(0)
    
    # One Hot Encoding

    dummies = pd.get_dummies(X[nominal_features]).sort_index()
    dummies_cols = list(set(dummies.columns) & set(X.columns))
    X[dummies_cols] = dummies[dummies_cols]
    X = X.drop(nominal_features, axis=1)
    
    # Ordinal Encoding

    rating = {'None': 0, 'Po': 1, 'Fa': 2, 'TA': 3, 'Gd': 4, 'Ex': 5}
    
    ordinal_encoding = {
        'LotShape': {'None': 0, 'Reg': 1, 'IR1': 2, 'IR2': 3, 'IR3': 4}, 
        'Utilities': {'None': 0, 'ElO': 1, 'NoSeWa': 2, 'NoSeWr': 3, 'AllPub': 4}, 
        'LandSlope': {'None': 0, 'Gtl': 1, 'Mod': 2, 'Sev': 3}, 
        'ExterQual': rating, 
        'ExterCond': rating, 
        'BsmtQual': rating, 
        'BsmtCond': rating, 
        'BsmtExposure': {'None': 0, 'No': 1, 'Mn': 2, 'Av': 3, 'Gd': 4}, 
        'BsmtFinType1': {'None': 0, 'Unf': 1, 'LwQ': 2, 'Rec': 3, 'BLQ': 4, 'ALQ': 5, 'GLQ': 6}, 
        'BsmtFinType2': {'None': 0, 'Unf': 1, 'LwQ': 2, 'Rec': 3, 'BLQ': 4, 'ALQ': 5, 'GLQ': 6}, 
        'HeatingQC': rating, 
        'CentralAir': {'None': 0, 'N': 1, 'Y': 2}, 
        'Electrical': {'None': 0, 'Mix': 1, 'FuseP': 2, 'FuseF': 3, 'FuseA': 4, 'SBrkr': 5}, 
        'KitchenQual': rating, 
        'Functional': {'None': 0, 'Sal': 1, 'Sev': 2, 'Maj2': 3, 'Maj1': 4, 'Mod': 5, 'Min2': 6, 'Min1': 7, 'Typ': 8}, 
        'GarageFinish': {'None': 0, 'Unf': 1, 'RFn': 2, 'Fin': 3}, 
        'GarageQual': rating, 
        'GarageCond': rating, 
        'PavedDrive': {'None': 0, 'N': 1, 'P': 2, 'Y': 3}
    }

    X = X.replace(ordinal_encoding)
    
    # Feature selection
    
    if feature_names is not None:
        X = X[feature_names]
    
    # Filling NAs
    
    X = X.fillna(0)
    
    # Scaling features
    
    X[:] = scaler.transform(X)
    
    return X


# Plot Function

In [15]:
def plot_actual_vs_pred(model, X, y):
    """Plotting actual vs predicted label"""
    
    y_pred = np.exp(model.predict(X.values))
    
    plot_data = pd.concat([np.exp(y), pd.Series(y_pred, name='PredictedPrice', index=y.index)], axis=1)
    plot_data = plot_data.sort_values('SalePrice')
    plot_data.index = y.index
    plot_data = plot_data.reset_index()
    
    fig = px.scatter(plot_data, x='Id', y='SalePrice')
    fig.add_trace(go.Scatter(x=plot_data['Id'], y=plot_data['PredictedPrice'], name='Prediction'))
    fig.show()

# Model

### sklearn packages

In [16]:
from sklearn.ensemble import (
    BaggingRegressor,
    ExtraTreesRegressor,
    HistGradientBoostingRegressor,
    RandomForestRegressor,
    StackingRegressor
)
from xgboost import XGBRegressor

### Build model

In [17]:
bagging = BaggingRegressor(n_jobs=-1)
extraTrees = ExtraTreesRegressor(max_depth=10, n_jobs=-1)
randomForest = RandomForestRegressor(n_jobs=-1)
histGradientBoosting = HistGradientBoostingRegressor()
XGB = XGBRegressor(n_jobs=-1)

model = StackingRegressor([
    ('bagging', bagging),
    ('extraTress', extraTrees),
    ('randomforest', randomForest),
    ('histGradientBoosting', histGradientBoosting),
    ('XGB', XGB)
], n_jobs=-1)

### Fit model

In [18]:
model = model.fit(X.values, y)
model.score(X.values, y)

0.9934101884161262

### RMSE

In [19]:
print('\nRMSE: ', np.sqrt(mean_squared_log_error(y, model.predict(X.values))))


RMSE:  0.002505550846953378


### Plot actual label vs model prediction

In [20]:
plot_actual_vs_pred(model, X, y)

# Submission

### Reading test data

In [21]:
print('\nLoading test data...\n')

test_df = pd.read_csv('../input/house-prices-advanced-regression-techniques/test.csv', index_col="Id")

test_df.head()


Loading test data...



Unnamed: 0_level_0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,...,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1461,20,RH,80.0,11622,Pave,,Reg,Lvl,AllPub,Inside,...,120,0,,MnPrv,,0,6,2010,WD,Normal
1462,20,RL,81.0,14267,Pave,,IR1,Lvl,AllPub,Corner,...,0,0,,,Gar2,12500,6,2010,WD,Normal
1463,60,RL,74.0,13830,Pave,,IR1,Lvl,AllPub,Inside,...,0,0,,MnPrv,,0,3,2010,WD,Normal
1464,60,RL,78.0,9978,Pave,,IR1,Lvl,AllPub,Inside,...,0,0,,,,0,6,2010,WD,Normal
1465,120,RL,43.0,5005,Pave,,IR1,HLS,AllPub,Inside,...,144,0,,,,0,1,2010,WD,Normal


### Preparing test data for model

In [22]:
print('\nPreparing data...\n')

X_test = prepare_features(test_df, scaler, X.columns)

X_test.head()


Preparing data...



Unnamed: 0,LotFrontage,LotArea,LotShape,Utilities,LandSlope,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,...,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial,GarageType_2Types,GarageType_Attchd,GarageType_Basment,GarageType_BuiltIn,GarageType_CarPort,GarageType_Detchd,GarageType_None
1461,0.645747,0.110763,-0.701291,0.02618,-0.225716,-0.795151,0.381743,-0.340077,-1.15638,-0.57075,...,-0.117851,0.467651,-0.305995,-0.064238,0.823505,-0.114827,-0.253259,-0.078757,-0.600559,-0.24236
1462,0.674605,0.37585,1.016637,0.02618,-0.225716,-0.071836,0.381743,-0.43944,-1.30174,0.027027,...,-0.117851,0.467651,-0.305995,-0.064238,0.823505,-0.114827,-0.253259,-0.078757,-0.600559,-0.24236
1463,0.472599,0.332053,1.016637,0.02618,-0.225716,-0.795151,-0.5172,0.852269,0.6364,-0.57075,...,-0.117851,0.467651,-0.305995,-0.064238,0.823505,-0.114827,-0.253259,-0.078757,-0.600559,-0.24236
1464,0.588031,-0.054002,1.016637,0.02618,-0.225716,-0.071836,0.381743,0.88539,0.6364,-0.460051,...,-0.117851,0.467651,-0.305995,-0.064238,0.823505,-0.114827,-0.253259,-0.078757,-0.600559,-0.24236
1465,-0.421999,-0.552407,1.016637,0.02618,-0.225716,1.374795,-0.5172,0.686666,0.345679,-0.57075,...,-0.117851,0.467651,-0.305995,-0.064238,0.823505,-0.114827,-0.253259,-0.078757,-0.600559,-0.24236


### Making predictions

In [23]:
print('\nPredicting target...\n')

test_preds = pd.DataFrame.from_dict({'Id': test_df.index,'SalePrice': np.exp(model.predict(X_test.values))})

test_preds.head()


Predicting target...



Unnamed: 0,Id,SalePrice
0,1461,115548.400109
1,1462,157114.57607
2,1463,187070.016766
3,1464,190686.943943
4,1465,195115.826327


### Saving submission file

In [24]:
print('\nSaving output...')

test_preds.to_csv('submission.csv', index=False)

print('\nSaved successfully!\n')


Saving output...

Saved successfully!

