# Kaggle Submission

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
import pickle
np.random.seed(42)

## Load in trained model and scaler and columns

In [2]:
with open('../assets/scaler.pkl', 'rb') as f:
    ss = pickle.load(f)
    
with open('../assets/lasso.pkl', 'rb') as f:
    lasso = pickle.load(f)
    
with open('../assets/columns.pkl', 'rb') as f:
    columns = pickle.load(f)
    
with open('../assets/kaggle_columns.pkl', 'rb') as f:
    kaggle_columns = pickle.load(f)

## Load Kaggle Data

In [3]:
kaggle = pd.read_csv('../data/test.csv', index_col='Id')

## Clean Data exactly as Training Data

In [4]:
kaggle.columns = kaggle.columns.map(lambda x: x.replace(' ', '_').replace('/', '_').lower())

In [5]:
kaggle.shape

(879, 79)

In [6]:
kaggle.columns[kaggle.isnull().any()]

Index(['lot_frontage', 'alley', 'mas_vnr_type', 'mas_vnr_area', 'bsmt_qual',
       'bsmt_cond', 'bsmt_exposure', 'bsmtfin_type_1', 'bsmtfin_type_2',
       'electrical', 'fireplace_qu', 'garage_type', 'garage_yr_blt',
       'garage_finish', 'garage_qual', 'garage_cond', 'pool_qc', 'fence',
       'misc_feature'],
      dtype='object')

In [7]:
kaggle.lot_frontage.fillna('None', inplace=True)
kaggle.alley.fillna('None', inplace=True)
kaggle.mas_vnr_type.fillna('None', inplace=True)
kaggle.mas_vnr_area.fillna(0, inplace=True)
kaggle.bsmt_qual.fillna('Na', inplace=True)
kaggle.bsmt_cond.fillna('Na', inplace=True)
kaggle.bsmt_exposure.fillna('Na', inplace=True)
kaggle.bsmtfin_type_1.fillna('Na', inplace=True)
kaggle.bsmtfin_type_2.fillna('Na', inplace=True)
kaggle.electrical.fillna('Na', inplace=True)
kaggle.fireplace_qu.fillna('Na', inplace=True)
kaggle.garage_type.fillna('Na', inplace=True)
kaggle.garage_yr_blt.fillna(0, inplace=True)
kaggle.garage_finish.fillna('Na', inplace=True)
kaggle.garage_qual.fillna('Na', inplace=True)
kaggle.garage_cond.fillna('Na', inplace=True)
kaggle.pool_qc.fillna('Na', inplace=True)
kaggle.fence.fillna('Na', inplace=True)
kaggle.misc_feature.fillna('Na', inplace=True)

In [8]:
kaggle['1st_flr_sf'] = kaggle['1st_flr_sf'].astype(float)
kaggle['2nd_flr_sf'] = kaggle['2nd_flr_sf'].astype(float)
kaggle['3ssn_porch'] = kaggle['3ssn_porch'].astype(float)
kaggle['bedroom_abvgr'] = kaggle['bedroom_abvgr'].astype(float)
kaggle['bsmt_full_bath'] = kaggle['bsmt_full_bath'].astype(float)
kaggle['bsmt_half_bath'] = kaggle['bsmt_half_bath'].astype(float)
kaggle['full_bath'] = kaggle['full_bath'].astype(float)
kaggle['half_bath'] = kaggle['half_bath'].astype(float)
kaggle['enclosed_porch'] = kaggle['enclosed_porch'].astype(float)
kaggle['fireplaces'] = kaggle['fireplaces'].astype(float)
kaggle['garage_cars'] = kaggle['garage_cars'].astype(float)
kaggle['garage_yr_blt'] = kaggle['garage_yr_blt'].astype(float)
kaggle['gr_liv_area'] = kaggle['gr_liv_area'].astype(float)
kaggle['kitchen_abvgr'] = kaggle['kitchen_abvgr'].astype(float)
kaggle['lot_area'] = kaggle['lot_area'].astype(float) 
kaggle['low_qual_fin_sf'] = kaggle['low_qual_fin_sf'].astype(float)
kaggle['ms_subclass'] = kaggle['ms_subclass'].astype(str)
kaggle['mo_sold'] = kaggle['mo_sold'].astype(float)
kaggle['open_porch_sf'] = kaggle['open_porch_sf'].astype(float)
kaggle['overall_qual'] = kaggle['overall_qual'].astype(float)
kaggle['overall_cond'] = kaggle['overall_cond'].astype(float)
kaggle['screen_porch'] = kaggle['screen_porch'].astype(float)
kaggle['totrms_abvgrd'] = kaggle['totrms_abvgrd'].astype(float)
kaggle['wood_deck_sf'] = kaggle['wood_deck_sf'].astype(float)
kaggle['yr_sold'] = kaggle['yr_sold'].astype(int)
kaggle['year_built'] = kaggle['year_built'].astype(int)
kaggle['year_remod_add'] = kaggle['year_remod_add'].astype(int)

## Change Datatypes accordingly

In [9]:
for col in kaggle.columns:
    try:
        kaggle[col] = kaggle[col].astype(float)
    except:
        pass

## Perform Feature Engineering exactly as Training Data

In [10]:
kaggle = kaggle[kaggle_columns]

In [11]:
missing_col = ['heating_qc_Po', 'garage_qual_Ex']

In [12]:
for col in missing_col:
    kaggle[col] = 0

In [13]:
kaggle.shape

(879, 17)

## Create Dummy Columns for Categoricals

In [14]:
cat_mask = (kaggle.dtypes == np.object)
cat_columns = kaggle.columns[cat_mask]
kaggle_dummies = pd.get_dummies(kaggle, columns = cat_columns)

In [15]:
kaggle_dummies = kaggle_dummies[columns]

In [16]:
kaggle_dummies.shape

(879, 35)

## Scale Kaggle Data

In [17]:
kaggle_dummies_sc = ss.transform(kaggle_dummies)

## Make Predictions with Model

In [18]:
preds = lasso.predict(kaggle_dummies_sc)

## Align Predictions with Index and Setup Header

In [19]:
submission = pd.DataFrame(preds, index=kaggle.index, columns=['SalePrice'])

## Sort the Index

In [20]:
submission.sort_index(inplace=True)

## Save Submission to csv

In [21]:
submission.to_csv('../data/lasso.csv')

## Check data with `head`

In [22]:
!head ../data/lasso.csv

Id,SalePrice
2,132225.1635549931
4,238074.02513347083
6,181375.0648146856
7,238262.69165614218
17,235808.1457413977
18,317745.12107583194
22,193768.94697877867
27,104859.8290106025
31,119599.304712419
