In [1]:
import h2o
from h2o.estimators.glrm import H2OGeneralizedLowRankEstimator
from h2o.estimators.gbm import H2OGradientBoostingEstimator 
from h2o.estimators.random_forest import H2ORandomForestEstimator
from h2o.grid.grid_search import H2OGridSearch 
from h2o.estimators.xgboost import H2OXGBoostEstimator
from h2o.estimators.stackedensemble import H2OStackedEnsembleEstimator
#import xgboost as xgb
h2o.init() # give h2o as much memory as possible
h2o.no_progress() # turn off h2o progress bars

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats.mstats import winsorize
import matplotlib.pyplot as plt
pd.options.display.mpl_style = 'default'
from pandas.tools.plotting import scatter_matrix
import seaborn as sns
sns.set()

Checking whether there is an H2O instance running at http://localhost:54321..... not found.
Attempting to start a local H2O server...
; Java HotSpot(TM) 64-Bit Server VM (build 25.131-b11, mixed mode)
  Starting server from C:\Users\Soomin\Anaconda3\lib\site-packages\h2o\backend\bin\h2o.jar
  Ice root: C:\Users\Soomin\AppData\Local\Temp\tmpxbhryiuw
  JVM stdout: C:\Users\Soomin\AppData\Local\Temp\tmpxbhryiuw\h2o_Soomin_started_from_python.out
  JVM stderr: C:\Users\Soomin\AppData\Local\Temp\tmpxbhryiuw\h2o_Soomin_started_from_python.err
  Server is running at http://127.0.0.1:54327
Connecting to H2O server at http://127.0.0.1:54327... successful.


0,1
H2O cluster uptime:,08 secs
H2O cluster version:,3.10.5.2
H2O cluster version age:,8 days
H2O cluster name:,H2O_from_python_Soomin_wvgw3j
H2O cluster total nodes:,1
H2O cluster free memory:,3.539 Gb
H2O cluster total cores:,8
H2O cluster allowed cores:,8
H2O cluster status:,"accepting new members, healthy"
H2O connection url:,http://127.0.0.1:54327


mpl_style had been deprecated and will be removed in a future version.
Use `matplotlib.pyplot.style.use` instead.

  exec(code_obj, self.user_global_ns, self.user_ns)


### Import Data & Create Sales Price Dummy Column in Test Dataset

In [2]:
trainori = h2o.import_file('train.csv')
testori = h2o.import_file('test.csv')
dummy_col = np.random.rand(testori.shape[0])
testori = testori.cbind(h2o.H2OFrame(dummy_col))
cols = testori.columns
cols[-1] = 'SalePrice'
testori.columns = cols
print(trainori.shape)
print(testori.shape)

(1460, 81)
(1459, 81)


In [3]:
excludevars = ['ID','SalePrice']
def get_type_lists(frame, rejects=excludevars):

    """Creates lists of numeric and categorical variables.
    :param frame: The frame from which to determine types.
    :param rejects: Variable names not to be included in returned lists.
    :return: Tuple of lists for numeric and categorical variables in the frame.
    """
    
    nums, cats = [], []
    for key, val in frame.types.items():
        if key not in rejects:
            if val == 'enum':
                cats.append(key)
            else: 
                nums.append(key)
                
    print('Numeric =', nums)                
    print()
    print('Categorical =', cats)
    
    return nums, cats

In [4]:
original_nums, cats = get_type_lists(trainori)

Numeric = ['OverallQual', 'TotRmsAbvGrd', 'GrLivArea', 'PoolArea', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'LowQualFinSF', 'TotalBsmtSF', 'Fireplaces', 'YearRemodAdd', 'BedroomAbvGr', 'Id', 'OverallCond', 'MoSold', 'YearBuilt', 'BsmtFinSF1', 'GarageArea', 'FullBath', 'WoodDeckSF', 'GarageCars', 'BsmtFinSF2', 'MasVnrArea', 'BsmtFullBath', 'KitchenAbvGr', 'MiscVal', 'GarageYrBlt', 'LotArea', 'ScreenPorch', 'BsmtUnfSF', 'BsmtHalfBath', '2ndFlrSF', '1stFlrSF', 'LotFrontage', 'MSSubClass', 'HalfBath', 'YrSold']

Categorical = ['Neighborhood', 'Alley', 'Exterior2nd', 'BsmtFinType2', 'GarageCond', 'SaleCondition', 'Heating', 'GarageQual', 'HeatingQC', 'HouseStyle', 'FireplaceQu', 'Fence', 'LotShape', 'BldgType', 'CentralAir', 'RoofMatl', 'MSZoning', 'PoolQC', 'BsmtExposure', 'Electrical', 'ExterCond', 'Utilities', 'BsmtFinType1', 'LandSlope', 'Functional', 'Exterior1st', 'BsmtCond', 'LotConfig', 'ExterQual', 'GarageFinish', 'LandContour', 'Condition2', 'Condition1', 'BsmtQual', 'SaleType

## Split into train and validation (before doing prep)

In [10]:
train, valid = trainori.split_frame([0.7], seed=12345)
print(train.shape)
print(valid.shape)

(1001, 81)
(459, 81)


In [5]:
# do this first
train = trainori
test = testori
print(train.shape)

(1460, 81)


## Preprocessing

In [6]:
pandas_train = train.as_data_frame(use_pandas=True) #Convert to Pandas frame
pandas_test = test.as_data_frame(use_pandas=True) 


In [7]:
# Lotfrontage
temp = pandas_train.groupby('Neighborhood', as_index=False)['LotFrontage'].median()
temp = temp.rename(columns={"LotFrontage":"LotFrontage2"})
pandas_train = pd.merge(pandas_train, temp, how='left', on='Neighborhood')
pandas_train['LotFrontage'][pandas_train['LotFrontage'].isnull()] = pandas_train['LotFrontage2'][pandas_train['LotFrontage'].isnull()]
pandas_train = pandas_train.drop('LotFrontage2', axis=1)


temp = pandas_test.groupby('Neighborhood', as_index=False)['LotFrontage'].median()
temp = temp.rename(columns={"LotFrontage":"LotFrontage2"})
pandas_test = pd.merge(pandas_test, temp, how='left', on='Neighborhood')
pandas_test['LotFrontage'][pandas_test['LotFrontage'].isnull()] = pandas_test['LotFrontage2'][pandas_test['LotFrontage'].isnull()]
pandas_test = pandas_test.drop('LotFrontage2', axis=1)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [8]:
# Alley
pandas_train["Alley"].fillna("None", inplace=True)

pandas_test["Alley"].fillna("None", inplace=True)


In [9]:
# MasVnrType, MasVnrArea
pandas_train['MasVnrType'].fillna(pandas_train['MasVnrType'].value_counts().index[0],inplace=True)
pandas_train['MasVnrArea'].fillna(pandas_train['MasVnrArea'].mode()[0],inplace=True)


pandas_test['MasVnrType'].fillna(pandas_train['MasVnrType'].value_counts().index[0],inplace=True)
pandas_test['MasVnrArea'].fillna(pandas_train['MasVnrArea'].mode()[0],inplace=True)

In [10]:
# Basement related

basement_cols=['BsmtQual','BsmtCond','BsmtExposure','BsmtFinType1','BsmtFinType2','BsmtFinSF1','BsmtFinSF2']

pandas_train["BsmtQual"].fillna("None", inplace=True)
pandas_train["BsmtCond"].fillna("None", inplace=True)
pandas_train["BsmtExposure"].fillna("None", inplace=True)
pandas_train["BsmtFinType1"].fillna("None", inplace=True)
pandas_train["BsmtFinSF1"].fillna(0, inplace=True)
pandas_train["BsmtFinType2"].fillna("None", inplace=True)
pandas_train["BsmtFinSF2"].fillna(0, inplace=True)
pandas_train["BsmtUnfSF"].fillna(0, inplace=True)

pandas_test["BsmtQual"].fillna("None", inplace=True)
pandas_test["BsmtCond"].fillna("None", inplace=True)
pandas_test["BsmtExposure"].fillna("None", inplace=True)
pandas_test["BsmtFinType1"].fillna("None", inplace=True)
pandas_test["BsmtFinSF1"].fillna(0, inplace=True)
pandas_test["BsmtFinType2"].fillna("None", inplace=True)
pandas_test["BsmtFinSF2"].fillna(0, inplace=True)
pandas_test["BsmtUnfSF"].fillna(0, inplace=True)

In [11]:
pandas_test[basement_cols][pandas_train['BsmtQual'].isnull()==True]

  if __name__ == '__main__':


Unnamed: 0,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinType2,BsmtFinSF1,BsmtFinSF2


In [12]:
# Electrical
pandas_train["Electrical"].fillna("SBrkr", inplace=True)

pandas_test["Electrical"].fillna("SBrkr", inplace=True)

In [13]:
# FireplaceQu
pandas_train["FireplaceQu"].fillna("None", inplace=True)

pandas_test["FireplaceQu"].fillna("None", inplace=True)

In [14]:
# Garage related
garage_cols=['GarageType','GarageQual','GarageCond','GarageYrBlt','GarageFinish','GarageCars','GarageArea']

pandas_train["GarageType"].fillna("None", inplace=True)
pandas_train["GarageQual"].fillna("None", inplace=True)
pandas_train["GarageCond"].fillna("None", inplace=True)
pandas_train["GarageFinish"].fillna("None", inplace=True)
pandas_train["GarageCars"].fillna(0, inplace=True)
pandas_train["GarageArea"].fillna(0, inplace=True)

pandas_test["GarageType"].fillna("None", inplace=True)
pandas_test["GarageQual"].fillna("None", inplace=True)
pandas_test["GarageCond"].fillna("None", inplace=True)
pandas_test["GarageFinish"].fillna("None", inplace=True)
pandas_test["GarageCars"].fillna(0, inplace=True)
pandas_test["GarageArea"].fillna(0, inplace=True)

In [15]:
# GarageYrBlt Binning

minval = pandas_train['GarageYrBlt'].min()
maxval = pandas_train['GarageYrBlt'].max()+1
binlist=[0,minval,1920,1940,1960,1980,2000,maxval]
pandas_train['GarageYrBlt'].fillna(0,inplace=True)
pandas_train['GarageYrBltBins'] = pd.cut(pandas_train['GarageYrBlt'],binlist,include_lowest=True,right=False)

minval = pandas_test['GarageYrBlt'].min()
maxval = pandas_test['GarageYrBlt'].max()+1
binlist=[0,minval,1920,1940,1960,1980,2000,maxval]
pandas_test['GarageYrBlt'].fillna(0,inplace=True)
pandas_test['GarageYrBltBins'] = pd.cut(pandas_test['GarageYrBlt'],binlist,include_lowest=True,right=False)

In [16]:
# PoolQC
pandas_train["PoolQC"].fillna("None", inplace=True)

pandas_test["PoolQC"].fillna("None", inplace=True)

In [17]:
# Fence, MiscFeature

pandas_train["Fence"].fillna("None", inplace=True)
pandas_train["MiscFeature"].fillna("None", inplace=True)

pandas_test["Fence"].fillna("None", inplace=True)
pandas_test["MiscFeature"].fillna("None", inplace=True)

In [18]:
def show_missing(pandas_frame):
    missing = pandas_frame.columns[pandas_frame.isnull().any()].tolist()
    return missing

In [19]:
show_missing(pandas_train)

[]

In [20]:
show_missing(pandas_test) # 나중에 처리해야..

['MSZoning',
 'Utilities',
 'Exterior1st',
 'Exterior2nd',
 'TotalBsmtSF',
 'BsmtFullBath',
 'BsmtHalfBath',
 'KitchenQual',
 'Functional',
 'SaleType']

In [21]:
train_h2o = h2o.H2OFrame(pandas_train) #Convert back to H2O frame 
test_h2o = h2o.H2OFrame(pandas_test)

### Set categorical vars

In [22]:
train_h2o['MSSubClass'] = train_h2o['MSSubClass'].asfactor()
train_h2o['OverallQual'] = train_h2o['OverallQual'].asfactor()
train_h2o['OverallCond'] = train_h2o['OverallCond'].asfactor()
train_h2o['YearBuilt'] = train_h2o['YearBuilt'].asfactor()
train_h2o['YearRemodAdd'] = train_h2o['YearRemodAdd'].asfactor()
#train_h2o['GarageYrBlt'] = train_h2o['GarageYrBlt'].asfactor()
train_h2o['MoSold'] = train_h2o['MoSold'].asfactor()
train_h2o['YrSold'] = train_h2o['YrSold'].asfactor()

test_h2o['MSSubClass'] = test_h2o['MSSubClass'].asfactor()
test_h2o['OverallQual'] = test_h2o['OverallQual'].asfactor()
test_h2o['OverallCond'] = test_h2o['OverallCond'].asfactor()
test_h2o['YearBuilt'] = test_h2o['YearBuilt'].asfactor()
test_h2o['YearRemodAdd'] = test_h2o['YearRemodAdd'].asfactor()
#test_h2o['GarageYrBlt'] = test_h2o['GarageYrBlt'].asfactor()
test_h2o['MoSold'] = test_h2o['MoSold'].asfactor()
test_h2o['YrSold'] = test_h2o['YrSold'].asfactor()

In [23]:
excludevars = ['Id','SalePrice', 'GarageYrBltBins2','GarageYrBlt']
nums_afterpp, cats_afterpp = get_type_lists(train_h2o, excludevars)

Numeric = ['TotRmsAbvGrd', 'GrLivArea', 'PoolArea', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'LowQualFinSF', 'TotalBsmtSF', 'Fireplaces', 'BedroomAbvGr', 'BsmtFinSF1', 'GarageArea', 'FullBath', 'WoodDeckSF', 'GarageCars', 'BsmtFinSF2', 'MasVnrArea', 'BsmtFullBath', 'KitchenAbvGr', 'MiscVal', 'LotArea', 'ScreenPorch', 'BsmtUnfSF', 'BsmtHalfBath', '2ndFlrSF', '1stFlrSF', 'LotFrontage', 'HalfBath']

Categorical = ['OverallQual', 'Neighborhood', 'Alley', 'Exterior2nd', 'BsmtFinType2', 'GarageCond', 'SaleCondition', 'Heating', 'GarageQual', 'HeatingQC', 'YearRemodAdd', 'HouseStyle', 'FireplaceQu', 'Fence', 'LotShape', 'GarageYrBltBins', 'BldgType', 'OverallCond', 'MoSold', 'YearBuilt', 'CentralAir', 'RoofMatl', 'MSZoning', 'PoolQC', 'BsmtExposure', 'Electrical', 'ExterCond', 'Utilities', 'BsmtFinType1', 'LandSlope', 'Functional', 'Exterior1st', 'BsmtCond', 'LotConfig', 'ExterQual', 'GarageFinish', 'LandContour', 'Condition2', 'Condition1', 'BsmtQual', 'SaleType', 'RoofStyle', 'Foundatio

In [40]:
train, valid = train_h2o.split_frame([0.7], seed=12345)
test = test_h2o
print(train.shape)
print(valid.shape)

(1001, 82)
(459, 82)


In [28]:
def target_encoder(training_frame, test_frame, x, y, lambda_=0.15, threshold=150, test=False):

    """ Applies simple target encoding to categorical variables.

    :param training_frame: Training frame which to create target means and to be encoded.
    :param test_frame: Test frame to be encoded using information from training frame.
    :param x: Name of input variable to be encoded.
    :param y: Name of target variable to use for encoding.
    :param lambda_: Balance between level mean and overall mean for small groups.
    :param threshold: Number below which a level is considered small enough to be shrunken.
    :param test: Whether or not to print the row_val_dict for testing purposes.
    :return: Tuple of encoded variable from train and test set as H2OFrames.

    """

    # convert to pandas
    trdf = training_frame.as_data_frame().loc[:, [x,y]] # df
    tss = test_frame.as_data_frame().loc[:, x]          # series


    # create dictionary of level:encode val

    encode_name = x + '_Tencode'
    overall_mean = trdf[y].mean()
    row_val_dict = {}

    for level in trdf[x].unique():
        level_df = trdf[trdf[x] == level][y]
        level_n = level_df.shape[0]
        level_mean = level_df.mean()
        if level_n >= threshold:
            row_val_dict[level] = level_mean
        else:
            row_val_dict[level] = ((1 - lambda_) * level_mean) +\
                                  (lambda_ * overall_mean)

    row_val_dict[np.nan] = overall_mean # handle missing values

    if test:
        print(row_val_dict)

    # apply the transform to training data
    trdf[encode_name] = trdf[x].apply(lambda i: row_val_dict[i])

    # apply the transform to test data
    tsdf = pd.DataFrame(columns=[x, encode_name])
    tsdf[x] = tss
    tsdf.loc[:, encode_name] = overall_mean # handle previously unseen values
    # handle values that are seen in tsdf but not row_val_dict
    for i, col_i in enumerate(tsdf[x]):
        try:
            row_val_dict[col_i]
        except:
            # a value that appeared in tsdf isn't in the row_val_dict so just
            # make it the overall_mean
            row_val_dict[col_i] = overall_mean
    tsdf[encode_name] = tsdf[x].apply(lambda i: row_val_dict[i])


    # convert back to H2O

    trdf = h2o.H2OFrame(trdf[encode_name].as_matrix())
    trdf.columns = [encode_name]

    tsdf = h2o.H2OFrame(tsdf[encode_name].as_matrix())
    tsdf.columns = [encode_name]

    return (trdf, tsdf)


In [41]:
total = len(cats_afterpp)
for i, var in enumerate(cats_afterpp):
    
    tr_enc, _ = target_encoder(train, test, var, 'SalePrice')
    v_enc, ts_enc = target_encoder(valid, test, var, 'SalePrice')
    
    print('Encoding: ' + var + ' (' + str(i+1) + '/' + str(total) + ') ...')

    train = train.cbind(tr_enc)
    valid = valid.cbind(v_enc)
    test = test.cbind(ts_enc)    
    
print('Done.')

Encoding: OverallQual (1/51) ...
Encoding: BsmtFinType2 (2/51) ...
Encoding: LotConfig (3/51) ...
Encoding: HeatingQC (4/51) ...
Encoding: YearRemodAdd (5/51) ...
Encoding: OverallCond (6/51) ...
Encoding: FireplaceQu (7/51) ...
Encoding: LotShape (8/51) ...
Encoding: Fence (9/51) ...
Encoding: BldgType (10/51) ...
Encoding: MoSold (11/51) ...
Encoding: YearBuilt (12/51) ...
Encoding: RoofMatl (13/51) ...
Encoding: MSZoning (14/51) ...
Encoding: LandContour (15/51) ...
Encoding: SaleCondition (16/51) ...
Encoding: BsmtFinType1 (17/51) ...
Encoding: LandSlope (18/51) ...
Encoding: Exterior1st (19/51) ...
Encoding: GarageFinish (20/51) ...
Encoding: Condition1 (21/51) ...
Encoding: BsmtQual (22/51) ...
Encoding: RoofStyle (23/51) ...
Encoding: Foundation (24/51) ...
Encoding: Street (25/51) ...
Encoding: GarageType (26/51) ...
Encoding: YrSold (27/51) ...
Encoding: Neighborhood (28/51) ...
Encoding: Alley (29/51) ...
Encoding: GarageCond (30/51) ...
Encoding: Heating (31/51) ...
Encoding

In [42]:
encoded_nums, cats = get_type_lists(frame=train)

Numeric = ['MasVnrType_Tencode', 'Id', 'TotRmsAbvGrd', 'BsmtCond_Tencode', 'PoolArea', 'TotalBsmtSF', 'KitchenQual_Tencode', 'ExterQual_Tencode', 'Alley_Tencode', 'BsmtExposure_Tencode', 'Functional_Tencode', 'Condition2_Tencode', 'SaleCondition_Tencode', 'YearBuilt_Tencode', 'LandContour_Tencode', 'HeatingQC_Tencode', 'BsmtQual_Tencode', 'Neighborhood_Tencode', 'LotShape_Tencode', 'Fence_Tencode', 'BsmtFullBath', 'ExterCond_Tencode', 'GarageYrBlt', 'LowQualFinSF', 'Heating_Tencode', 'MoSold_Tencode', 'ScreenPorch', 'BsmtHalfBath', '1stFlrSF', 'OverallQual_Tencode', 'LandSlope_Tencode', 'Foundation_Tencode', 'BsmtFinType1_Tencode', 'GarageYrBltBins_Tencode', 'PoolQC_Tencode', 'GrLivArea', 'OpenPorchSF', 'SaleType_Tencode', 'GarageFinish_Tencode', 'Exterior1st_Tencode', 'YearRemodAdd_Tencode', 'Fireplaces', 'Exterior2nd_Tencode', 'MiscFeature_Tencode', 'BedroomAbvGr', 'HouseStyle_Tencode', 'Utilities_Tencode', 'LotConfig_Tencode', 'Street_Tencode', 'MSZoning_Tencode', 'BsmtFinSF1', 'Bld

In [44]:
len(encoded_nums)

81

In [45]:
print(train.shape)

(1001, 133)


In [46]:
print('Imputed and encoded numeric training data:')
train[encoded_nums].describe() #79 numeric columns w/ no missing
print('--------------------------------------------------------------------------------')
print('Imputed and encoded numeric validation data:')
valid[encoded_nums].describe() #79 numeric columns w/ no missing
print('--------------------------------------------------------------------------------')
print('Imputed and encoded numeric test data:')
test[encoded_nums].describe() #79 numeric columns w/ no missing

Imputed and encoded numeric training data:
Rows:1001
Cols:81




Unnamed: 0,MasVnrType_Tencode,Id,TotRmsAbvGrd,BsmtCond_Tencode,PoolArea,TotalBsmtSF,KitchenQual_Tencode,ExterQual_Tencode,Alley_Tencode,BsmtExposure_Tencode,Functional_Tencode,Condition2_Tencode,SaleCondition_Tencode,YearBuilt_Tencode,LandContour_Tencode,HeatingQC_Tencode,BsmtQual_Tencode,Neighborhood_Tencode,LotShape_Tencode,Fence_Tencode,BsmtFullBath,ExterCond_Tencode,GarageYrBlt,LowQualFinSF,Heating_Tencode,MoSold_Tencode,ScreenPorch,BsmtHalfBath,1stFlrSF,OverallQual_Tencode,LandSlope_Tencode,Foundation_Tencode,BsmtFinType1_Tencode,GarageYrBltBins_Tencode,PoolQC_Tencode,GrLivArea,OpenPorchSF,SaleType_Tencode,GarageFinish_Tencode,Exterior1st_Tencode,YearRemodAdd_Tencode,Fireplaces,Exterior2nd_Tencode,MiscFeature_Tencode,BedroomAbvGr,HouseStyle_Tencode,Utilities_Tencode,LotConfig_Tencode,Street_Tencode,MSZoning_Tencode,BsmtFinSF1,BldgType_Tencode,RoofMatl_Tencode,2ndFlrSF,PavedDrive_Tencode,FullBath,GarageCond_Tencode,WoodDeckSF,GarageCars,GarageQual_Tencode,LotArea,Condition1_Tencode,MasVnrArea,MSSubClass_Tencode,KitchenAbvGr,MiscVal,FireplaceQu_Tencode,CentralAir_Tencode,3SsnPorch,RoofStyle_Tencode,BsmtFinType2_Tencode,Electrical_Tencode,EnclosedPorch,GarageArea,BsmtUnfSF,BsmtFinSF2,LotFrontage,GarageType_Tencode,HalfBath,OverallCond_Tencode,YrSold_Tencode
type,real,int,int,real,int,int,real,real,real,real,real,real,real,real,real,real,real,real,real,real,int,real,int,int,real,real,int,int,int,real,real,real,real,real,real,int,int,real,real,real,real,int,real,real,int,real,real,real,real,real,int,real,real,int,real,int,real,int,int,real,int,real,int,real,int,int,real,real,int,real,real,real,int,int,int,int,real,real,int,real,real
mins,156555.86183074262,1.0,3.0,84275.79385614385,0.0,0.0,124936.94820396995,116078.96885614384,128075.200999001,123956.21308691308,110059.12718947718,108075.79385614385,115832.04385614385,103613.29385614385,145140.16285614387,135668.69902855766,121216.12093947716,109690.79385614385,163944.59385614382,141306.73814185814,0.0,122047.14941169939,0.0,0.0,93469.96052281052,171267.6210300569,0.0,0.0,480.0,57340.143856143855,181486.5182747485,121726.89385614387,121276.42985614385,119405.5855228105,181425.2874371859,480.0,0.0,125968.29385614385,119405.5855228105,78325.79385614385,104035.31885614386,0.0,129978.23968947721,140375.79385614386,0.0,118907.22242757239,144200.79385614386,176473.2899159664,146350.10385614386,78614.79385614385,0.0,138273.46052281052,143775.79385614386,0.0,129253.89464979463,0.0,112070.79385614385,0.0,0.0,96813.29385614385,1300.0,134468.29385614386,0.0,110931.40933233431,0.0,0.0,141462.34885614386,119608.23368665231,0.0,161577.2224275724,126947.55924075923,84275.79385614385,0.0,0.0,0.0,0.0,21.0,119405.5855228105,0.0,116283.85154845155,179239.41089108912
mean,180953.6910454183,724.8291708291705,6.512487512487514,182522.25164540764,3.376623376623376,1063.2387612387604,180810.9702399497,181095.696027499,182525.0014360763,181332.52799323562,182576.2453368812,182224.44204037718,181541.3249445857,182171.95904095916,182186.7329973223,182409.70856326495,180724.5454251543,182171.95904095893,181993.92786324595,183005.0923464647,0.4265734265734266,182637.23505924654,1883.850149850151,5.9250749250749255,182323.57985955084,182085.76055692547,14.763236763236765,0.05994005994005996,1172.088911088912,180290.10698776753,182145.0556687068,183119.90242260238,184124.927790092,184028.82781249733,182060.62963350347,1519.8011988011988,44.92407592407593,181180.3311701287,182703.09600908577,183565.71379414786,182171.95904095907,0.6203796203796214,183994.5260926888,182343.8002408678,2.8711288711288696,183188.2563525883,182178.6531407155,181772.16118142588,182203.5349842965,183387.92640181998,445.5994005994004,182849.79797599994,182033.94005549914,341.78721278721247,182908.1805595005,1.5744255744255726,182992.97056335275,91.07392607392609,1.7932067932067925,183008.55356277092,10628.262737262736,182645.11776425384,106.16383616383624,184318.41699728824,1.0469530469530457,39.684315684315685,181849.37466200133,182822.70708043195,3.4565434565434567,182159.00869385348,182622.35788112954,182937.98398065448,21.257742257742255,477.4685314685315,575.9900099900102,41.64935064935064,70.74475524475535,182245.84650479382,0.381618381618382,183250.586628257,182185.62702342597
maxs,256064.37787676242,1460.0,14.0,212500.14385614384,738.0,6110.0,305958.08769176033,348876.58744588745,184672.98832271763,243510.2073978105,185063.63879957132,269469.54385614384,260380.61674771016,362592.99385614385,215016.8662245649,216426.0,312352.66653056245,329868.3581418581,220137.0973044197,189018.974137931,3.0,185708.33940774488,2010.0,528.0,185483.95814185814,198254.40597735601,440.0,2.0,4692.0,411753.33231768233,200379.84385614382,226465.8274336283,236446.98648648648,239676.72027972032,443825.79385614384,5642.0,523.0,263880.6876061438,243708.832,250025.79385614384,313744.7688561438,3.0,298475.79385614384,239825.79385614384,8.0,210965.119205298,182216.631,215932.78135614385,182383.5220883534,204495.20385614387,5644.0,187595.7541966427,669075.7938561438,2065.0,187523.8671023965,3.0,188146.7502726281,736.0,4.0,222252.04385614384,164660.0,219155.3393106893,1378.0,243184.96534653462,3.0,15500.0,306119.08332982805,186782.0,508.0,248325.79385614384,218122.46052281052,187716.3622559653,552.0,1418.0,2153.0,1127.0,313.0,243875.54558028182,2.0,202661.43181818182,186896.74166666667
sigma,32513.444047378878,421.4954943866087,1.587480364002624,14302.859526666482,43.98791894358055,450.4683539784607,49556.81599252627,52277.89924581866,9766.58876414746,26179.291204996774,9673.47275081667,6389.719881512636,24904.82535576665,46504.44964927137,11064.930655592716,35398.54044306139,49983.0128988164,51529.9324408036,21926.258171174657,13504.518364088008,0.5204355357324734,9945.861000354027,423.68187301615546,49.51948485817861,8588.039549532357,8240.20571837234,54.16981527671003,0.2457714312193271,392.4739572085174,58402.9609943276,1835.4154172265153,39735.240377141236,35223.67981963558,41226.63262266888,11901.388296801693,520.2774292995629,63.01874506660877,25459.930478733866,43904.519304777365,29589.150408848287,38617.88648752911,0.6416730131026673,30224.673393447127,6090.240696354947,0.8052177496650357,21247.331540214996,1201.5656926005156,10669.040225587902,2541.57596669905,22616.419950331485,467.17326588712297,13615.68412825508,15980.206375764057,432.9085950594231,15415.629575665147,0.5538098001166959,17804.37971784183,120.05044160464608,0.7349787794173421,18601.506242713353,9442.37310711137,13080.75973520577,179.66239765423683,36476.82137140024,0.22537348289718281,516.2493547150991,40541.64241671104,15828.294334486298,29.83605804459362,20695.377655471344,11550.862509420635,16464.287213227504,61.04442237008644,209.92257920181268,444.45750741786287,148.91838678308972,23.15278676455676,36865.89240600122,0.5002217290580031,23878.60607396363,2758.862801413868
zeros,0,0,0,0,995,25,0,0,0,0,0,0,0,0,0,0,0,0,0,0,586,0,48,984,0,0,921,943,0,0,0,0,0,0,0,0,460,0,0,0,0,466,0,0,4,0,0,0,0,0,326,0,0,572,0,6,0,523,48,0,0,0,577,0,1,969,0,0,985,0,0,0,867,48,78,894,0,0,626,0,0
missing,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,203515.30891719743,1.0,8.0,184737.95170142702,0.0,856.0,212221.98514851482,229333.4454277286,184672.98832271763,167645.4123076923,185063.63879957132,182525.73333333334,177020.8800489596,208416.20154845156,182282.48766816143,216426.0,200924.0538641686,197940.3021894772,165131.57073954982,189018.974137931,1.0,185708.33940774488,2003.0,0.0,183201.27365208542,175405.8820140386,0.0,0.0,856.0,207242.98260869566,181982.57594936708,226465.8274336283,236446.98648648648,239676.72027972032,181425.2874371859,1710.0,61.0,174574.41216991964,200494.42807017543,216859.7290502793,184833.50913392162,0.0,216626.9093484419,183357.84694932782,3.0,210965.119205298,182216.631,176473.2899159664,182383.5220883534,192391.8022670025,706.0,187595.7541966427,181235.93597560975,854.0,187523.8671023965,2.0,188146.7502726281,0.0,2.0,188334.07174392935,8450.0,185776.46347031964,196.0,243184.96534653462,1.0,0.0,142610.06223175966,186782.0,0.0,171522.79741935484,185630.71806674334,187716.3622559653,0.0,548.0,150.0,0.0,65.0,203664.60358890702,1.0,202661.43181818182,181231.93779904302
1,156555.86183074262,2.0,6.0,184737.95170142702,0.0,1262.0,139811.59481037923,144619.2755267423,184672.98832271763,243510.2073978105,185063.63879957132,182525.73333333334,177020.8800489596,168284.7849275724,182282.48766816143,216426.0,200924.0538641686,218575.79385614384,165131.57073954982,189018.974137931,0.0,185708.33940774488,1976.0,0.0,183201.27365208542,175083.33112582783,0.0,1.0,1262.0,162035.816091954,181982.57594936708,150397.2807424594,161782.77448994666,152368.91505791506,181425.2874371859,1262.0,0.0,174574.41216991964,200494.42807017543,155330.24861804862,163150.6584394772,1.0,155721.17213200592,183357.84694932782,3.0,178321.97183098592,182216.631,178818.25814185815,182383.5220883534,192391.8022670025,978.0,187595.7541966427,181235.93597560975,0.0,187523.8671023965,2.0,188146.7502726281,298.0,2.0,188334.07174392935,9600.0,147024.1605228105,0.0,187575.74864864862,1.0,0.0,204076.0357142857,186782.0,0.0,171522.79741935484,185630.71806674334,187716.3622559653,0.0,460.0,284.0,0.0,80.0,203664.60358890702,0.0,157086.21052281052,186896.74166666667
2,203515.30891719743,3.0,6.0,184737.95170142702,0.0,920.0,212221.98514851482,229333.4454277286,184672.98832271763,187129.2701381951,185063.63879957132,182525.73333333334,177020.8800489596,241163.1271894772,182282.48766816143,216426.0,200924.0538641686,197940.3021894772,209450.39420289855,189018.974137931,1.0,185708.33940774488,2001.0,0.0,183201.27365208542,195815.36317432567,0.0,0.0,920.0,207242.98260869566,181982.57594936708,226465.8274336283,236446.98648648648,239676.72027972032,181425.2874371859,1786.0,42.0,174574.41216991964,200494.42807017543,216859.7290502793,212217.14000999,1.0,216626.9093484419,183357.84694932782,3.0,210965.119205298,182216.631,176473.2899159664,182383.5220883534,192391.8022670025,486.0,187595.7541966427,181235.93597560975,866.0,187523.8671023965,2.0,188146.7502726281,0.0,2.0,188334.07174392935,11250.0,185776.46347031964,162.0,243184.96534653462,1.0,0.0,204076.0357142857,186782.0,0.0,171522.79741935484,185630.71806674334,187716.3622559653,0.0,608.0,434.0,0.0,68.0,203664.60358890702,1.0,202661.43181818182,181231.93779904302


--------------------------------------------------------------------------------
Imputed and encoded numeric validation data:
Rows:459
Cols:81




Unnamed: 0,MasVnrType_Tencode,Id,TotRmsAbvGrd,BsmtCond_Tencode,PoolArea,TotalBsmtSF,KitchenQual_Tencode,ExterQual_Tencode,Alley_Tencode,BsmtExposure_Tencode,Functional_Tencode,Condition2_Tencode,SaleCondition_Tencode,YearBuilt_Tencode,LandContour_Tencode,HeatingQC_Tencode,BsmtQual_Tencode,Neighborhood_Tencode,LotShape_Tencode,Fence_Tencode,BsmtFullBath,ExterCond_Tencode,GarageYrBlt,LowQualFinSF,Heating_Tencode,MoSold_Tencode,ScreenPorch,BsmtHalfBath,1stFlrSF,OverallQual_Tencode,LandSlope_Tencode,Foundation_Tencode,BsmtFinType1_Tencode,GarageYrBltBins_Tencode,PoolQC_Tencode,GrLivArea,OpenPorchSF,SaleType_Tencode,GarageFinish_Tencode,Exterior1st_Tencode,YearRemodAdd_Tencode,Fireplaces,Exterior2nd_Tencode,MiscFeature_Tencode,BedroomAbvGr,HouseStyle_Tencode,Utilities_Tencode,LotConfig_Tencode,Street_Tencode,MSZoning_Tencode,BsmtFinSF1,BldgType_Tencode,RoofMatl_Tencode,2ndFlrSF,PavedDrive_Tencode,FullBath,GarageCond_Tencode,WoodDeckSF,GarageCars,GarageQual_Tencode,LotArea,Condition1_Tencode,MasVnrArea,MSSubClass_Tencode,KitchenAbvGr,MiscVal,FireplaceQu_Tencode,CentralAir_Tencode,3SsnPorch,RoofStyle_Tencode,BsmtFinType2_Tencode,Electrical_Tencode,EnclosedPorch,GarageArea,BsmtUnfSF,BsmtFinSF2,LotFrontage,GarageType_Tencode,HalfBath,OverallCond_Tencode,YrSold_Tencode
type,real,int,int,real,int,int,real,real,real,real,real,real,real,real,real,real,real,real,real,real,int,real,int,int,real,real,int,int,int,real,real,real,real,real,real,int,int,real,real,real,real,int,real,real,int,real,real,real,real,real,int,real,real,int,real,int,real,int,int,real,int,real,int,real,int,int,real,real,int,real,real,real,int,int,int,int,real,real,int,real,real
mins,131576.52450980392,4.0,2.0,78579.02450980392,0.0,0.0,105130.89950980392,91042.14950980392,134607.54723707662,107897.15367647057,84954.02450980392,84954.02450980392,143816.52450980392,79514.02450980392,163075.56297134238,100679.02450980392,107897.15367647057,112866.7168174962,163981.4191419142,133419.96200980392,0.0,91754.02450980392,0.0,0.0,77729.02450980392,162484.5985098039,0.0,0.0,334.0,69356.52450980392,175531.83179723503,110363.31736694675,107897.15367647057,108352.20632798571,178187.36899563318,334.0,0.0,107734.02450980392,108352.20632798571,96429.02450980392,120621.97532947603,0.0,106204.02450980392,73479.02450980392,0.0,122402.59593837534,178193.4967320261,165751.57330498463,95579.02450980392,118217.35784313723,0.0,128987.56617647059,175904.02450980392,0.0,114840.96895424835,0.0,108352.20632798571,0.0,0.0,108352.20632798571,1491.0,150223.0311764706,0.0,95012.35784313723,1.0,0.0,133191.52450980392,111601.52450980392,0.0,139864.02450980392,107897.15367647057,100143.52450980392,0.0,0.0,0.0,0.0,21.0,108352.20632798571,0.0,78579.02450980392,169627.25564187934
mean,176236.3233521294,742.8671023965148,6.52941176470589,178575.42264940837,1.411764705882353,1044.760348583878,177298.28443120178,174940.29949164853,178551.46728608664,176528.2975458157,178425.7326455637,178197.18104147993,177335.48292323473,178193.49673202625,177854.22806185653,180663.66151693798,177231.78653936516,178193.49673202608,176786.22238241715,178943.90578410032,0.42265795206971674,178484.55631381104,1835.7864923747268,5.668845315904139,178371.04867785898,178193.49673202605,15.710239651416122,0.05228758169934641,1141.9912854030508,178193.49673202616,177815.9926203596,179041.90369302416,178193.49673202584,178193.49673202634,178192.57957409543,1506.0043572984757,50.44662309368192,177239.63702422142,176100.76506899047,179651.10523303007,178193.49673202634,0.5969498910675383,179728.6088213935,178331.82024007873,2.856209150326796,177648.76674783212,178193.4967320261,178163.2663740442,178225.25931265744,179328.32668845326,439.36601307189557,178649.14910504507,177969.71943483278,358.34422657952064,179002.96058140023,1.544662309368193,179410.743410654,101.15904139433547,1.7102396514161213,179173.93458285267,10273.808278867102,178617.62590029478,96.47276688453148,178290.67043872018,1.0457516339869268,51.78649237472768,175300.38912811334,179115.18492887347,3.30718954248366,177381.14581571185,178749.88685121093,179083.62772865142,23.4727668845316,463.19172113289744,548.159041394335,57.235294117647065,69.0108932461874,180113.26154897668,0.3856209150326796,180314.7703639625,178193.496732026
maxs,243111.468058191,1459.0,12.0,204324.73418722322,648.0,3200.0,307728.63562091504,311103.9129713424,180974.4445098039,252970.21398348815,181312.14950980392,302979.0245098039,255001.6042717087,430479.0245098039,252389.9995098039,211491.75330396474,287410.74022408965,288359.0245098039,257372.27450980392,184450.31335149863,2.0,197862.35784313726,2009.0,572.0,179414.41573033712,200082.05082559335,480.0,1.0,3228.0,369297.8945098039,219485.84269162212,222366.88205128204,224699.7863950498,228607.89347532115,180579.02450980392,3608.0,547.0,255001.6042717087,223156.8495098039,230256.21200980392,273645.3828431372,3.0,237481.8022875817,188229.02450980392,6.0,221634.02450980392,178193.4967320261,221371.6687405731,178405.7096069869,219311.6970098039,1880.0,183943.1995098039,296434.0245098039,1611.0,184063.06872037915,3.0,187300.52567237162,857.0,4.0,282791.5245098039,215245.0,311479.0245098039,1600.0,224956.37657165958,2.0,8300.0,345181.5245098039,184861.0283687943,407.0,205162.1274859944,194068.52450980392,184788.65617433414,301.0,1248.0,2336.0,1474.0,182.0,243247.17284313726,2.0,236726.2467320261,186970.48567051822
sigma,27300.308004292383,422.05491502326714,1.7068785455857547,17674.722900752706,30.246049815005012,412.0811851328445,48579.93227209306,46704.676972640176,9870.638176773426,29831.690627197433,8139.874204543354,7675.716724689661,25636.5564484805,48282.59548084681,12704.546441754976,31183.876623648637,46388.72372416759,47446.35307377432,21170.422649021333,12656.577864073239,0.5161250426046549,12461.390436783036,512.057765479955,46.66085163497713,7482.9638209952,8376.309432540083,59.12791370210353,0.2228492452574714,373.01668824001615,55383.22892736045,9612.485004032442,37636.63649998418,30235.126490265342,38096.993571949526,111.63291947830336,537.1049436609105,72.74780631554769,25705.648619049694,37887.20742632184,27697.042657464837,38509.35013979468,0.6515613257699318,26639.054892573462,5792.9151130758355,0.8391483048651234,21371.85806025564,0.0,11574.4525615196,3866.0185855881646,22954.507567875306,431.42414339733244,11313.686291071555,12445.872412899758,444.5856999141145,17370.731330504437,0.5445931590280559,22986.979310392446,136.0428056028114,0.7712943151662268,22413.52398643429,11072.988785723228,12058.531723299979,183.06222490418148,34491.169018196844,0.20917396118996429,449.54109321577573,41407.53486285178,19717.249202290554,28.18457901138756,13516.296484902437,13507.295148882748,17731.970597652493,61.32101817353125,221.95498544637042,436.0358278966984,185.24001065203282,20.747859653582008,36364.50360655458,0.5091844194507408,30660.56351655315,6756.846117385383
zeros,0,0,0,0,458,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,270,0,33,450,0,0,423,435,0,0,0,0,0,0,0,0,196,0,0,0,0,224,0,0,2,0,0,0,0,0,141,0,0,257,0,3,0,238,33,0,0,0,292,0,0,439,0,0,451,0,0,0,385,33,40,399,0,0,287,0,0
missing,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,157753.39249146762,4.0,7.0,204324.73418722322,0.0,756.0,211880.81318681315,143747.87889273354,180758.81264637003,161376.63366336632,179857.76346604215,178218.27472527474,147087.25059676042,134248.71200980392,175715.7828162291,158961.70308123247,138313.57142857142,216008.84593837534,194850.58566088302,184450.31335149863,1.0,180398.05693069307,1998.0,0.0,179414.41573033712,186396.24236694674,0.0,0.0,961.0,204327.95765587135,175531.83179723503,144486.10784313726,169309.44117647054,199424.48842732972,178187.36899563318,1717.0,35.0,170822.7626262626,143948.79679144386,154595.81798806478,155036.52450980392,1.0,180267.35784313726,179157.6651480638,3.0,203633.455628685,178193.4967320261,165751.57330498463,178405.7096069869,187920.61064425771,216.0,181805.66321243523,176671.8111111111,756.0,184063.06872037915,1.0,187300.52567237162,0.0,3.0,185601.24938271602,9550.0,181573.27604166663,0.0,182715.71200980392,1.0,0.0,219907.18575980392,184861.0283687943,0.0,171401.71038251367,182592.86046511628,184788.65617433414,272.0,642.0,540.0,0.0,60.0,142532.16792065662,0.0,204262.16064257032,186970.48567051822
1,243111.468058191,8.0,7.0,181115.23,0.0,1107.0,140285.62820512822,143747.87889273354,180758.81264637003,199413.65506535943,179857.76346604215,178218.27472527474,171302.3622047244,161156.52450980392,175715.7828162291,211491.75330396474,206633.03141361254,184610.14950980392,194850.58566088302,184450.31335149863,1.0,180398.05693069307,1973.0,0.0,179414.41573033712,200082.05082559335,0.0,0.0,1107.0,204327.95765587135,175531.83179723503,148549.72906403942,169309.44117647054,158794.07093837534,178187.36899563318,2090.0,204.0,170822.7626262626,201271.56721053383,167128.41844919784,161352.02450980392,2.0,169568.25527903467,163465.69117647054,3.0,203633.455628685,178193.4967320261,165751.57330498463,178405.7096069869,187920.61064425771,859.0,181805.66321243523,176671.8111111111,983.0,184063.06872037915,2.0,187300.52567237162,235.0,2.0,185601.24938271602,10382.0,197366.52450980392,240.0,224956.37657165958,1.0,350.0,205118.42900418595,184861.0283687943,0.0,171401.71038251367,156634.52450980392,184788.65617433414,228.0,484.0,216.0,32.0,80.0,201051.3968871595,1.0,154426.8646332607,176381.1217825312
2,157753.39249146762,10.0,5.0,181115.23,0.0,991.0,140285.62820512822,143747.87889273354,180758.81264637003,161376.63366336632,179857.76346604215,127029.02450980392,171302.3622047244,139354.02450980392,175715.7828162291,211491.75330396474,138313.57142857142,129965.77450980392,163981.4191419142,184450.31335149863,1.0,180398.05693069307,1939.0,0.0,179414.41573033712,162484.5985098039,0.0,0.0,1077.0,141364.58360071303,175531.83179723503,144486.10784313726,224699.7863950498,156634.52450980392,178187.36899563318,1077.0,4.0,170822.7626262626,201271.56721053383,151733.02861939298,120621.97532947603,2.0,151828.55059676044,179157.6651480638,2.0,122402.59593837534,178193.4967320261,165751.57330498463,178405.7096069869,187920.61064425771,851.0,128987.56617647059,176671.8111111111,0.0,184063.06872037915,1.0,187300.52567237162,0.0,1.0,194951.10784313726,7420.0,152316.52450980392,0.0,128987.56617647059,2.0,0.0,205118.42900418595,184861.0283687943,0.0,171401.71038251367,182592.86046511628,184788.65617433414,0.0,205.0,140.0,0.0,50.0,201051.3968871595,0.0,154426.8646332607,170246.78240454075


--------------------------------------------------------------------------------
Imputed and encoded numeric test data:
Rows:1459
Cols:81




Unnamed: 0,MasVnrType_Tencode,Id,TotRmsAbvGrd,BsmtCond_Tencode,PoolArea,TotalBsmtSF,KitchenQual_Tencode,ExterQual_Tencode,Alley_Tencode,BsmtExposure_Tencode,Functional_Tencode,Condition2_Tencode,SaleCondition_Tencode,YearBuilt_Tencode,LandContour_Tencode,HeatingQC_Tencode,BsmtQual_Tencode,Neighborhood_Tencode,LotShape_Tencode,Fence_Tencode,BsmtFullBath,ExterCond_Tencode,GarageYrBlt,LowQualFinSF,Heating_Tencode,MoSold_Tencode,ScreenPorch,BsmtHalfBath,1stFlrSF,OverallQual_Tencode,LandSlope_Tencode,Foundation_Tencode,BsmtFinType1_Tencode,GarageYrBltBins_Tencode,PoolQC_Tencode,GrLivArea,OpenPorchSF,SaleType_Tencode,GarageFinish_Tencode,Exterior1st_Tencode,YearRemodAdd_Tencode,Fireplaces,Exterior2nd_Tencode,MiscFeature_Tencode,BedroomAbvGr,HouseStyle_Tencode,Utilities_Tencode,LotConfig_Tencode,Street_Tencode,MSZoning_Tencode,BsmtFinSF1,BldgType_Tencode,RoofMatl_Tencode,2ndFlrSF,PavedDrive_Tencode,FullBath,GarageCond_Tencode,WoodDeckSF,GarageCars,GarageQual_Tencode,LotArea,Condition1_Tencode,MasVnrArea,MSSubClass_Tencode,KitchenAbvGr,MiscVal,FireplaceQu_Tencode,CentralAir_Tencode,3SsnPorch,RoofStyle_Tencode,BsmtFinType2_Tencode,Electrical_Tencode,EnclosedPorch,GarageArea,BsmtUnfSF,BsmtFinSF2,LotFrontage,GarageType_Tencode,HalfBath,OverallCond_Tencode,YrSold_Tencode
type,real,int,int,real,int,int,real,real,real,real,real,real,real,real,real,real,real,real,real,real,int,real,int,int,real,real,int,int,int,real,real,real,real,real,real,int,int,real,real,real,real,int,real,real,int,real,real,real,real,real,int,real,real,int,real,int,real,int,int,real,int,real,int,real,int,int,real,real,int,real,real,real,int,int,int,int,real,real,int,real,real
mins,131576.52450980392,1461.0,3.0,78579.02450980392,0.0,0.0,105130.89950980392,91042.14950980392,134607.54723707662,107897.15367647057,84954.02450980392,127029.02450980392,143816.52450980392,79514.02450980392,163075.56297134238,100679.02450980392,107897.15367647057,112866.7168174962,163981.4191419142,133419.96200980392,0.0,91754.02450980392,0.0,0.0,77729.02450980392,162484.5985098039,0.0,0.0,407.0,69356.52450980392,175531.83179723503,110363.31736694675,107897.15367647057,139136.05102495544,178187.36899563318,407.0,0.0,107734.02450980392,108352.20632798571,96429.02450980392,120621.97532947603,0.0,106204.02450980392,73479.02450980392,0.0,122402.59593837534,178193.4967320261,165751.57330498463,95579.02450980392,118217.35784313723,0.0,128987.56617647059,175904.02450980392,0.0,114840.96895424835,0.0,108352.20632798571,0.0,0.0,108352.20632798571,1470.0,150223.0311764706,0.0,95012.35784313723,0.0,0.0,133191.52450980392,111601.52450980392,0.0,139864.02450980392,107897.15367647057,100143.52450980392,0.0,0.0,0.0,0.0,21.0,108352.20632798571,0.0,78579.02450980392,169627.25564187934
mean,178146.24432411807,2190.0,6.3851953392734755,177115.4641081019,1.7443454420836189,1046.1179698216752,179340.39624644505,177673.68837649445,178550.03252320114,177234.88905514343,178383.03720943822,178369.39980873468,176492.09637795304,178184.3114298014,179174.2263375262,182227.81273899996,178942.1349335839,179660.9318538107,176844.08363718318,179260.15082329372,0.4344543582704184,178138.292477274,1871.9897189856056,3.543522960932145,179140.34833949347,178570.99134392996,17.064427690198773,0.0652024708304736,1156.5346127484572,181143.66379652923,177395.21920765375,181006.29018575946,180068.38187120648,170002.81029859162,178187.38159550258,1486.0459218642889,48.31391363947919,175933.1071534588,177253.37457383898,179810.686881209,176730.68322763144,0.5812200137080195,180995.2353182519,178536.70969509304,2.8540095956134333,176296.0327526845,178193.4967320261,178278.00897433673,178065.0926703297,179232.0904301915,438.9026730637427,178370.3696551594,176751.7532061534,325.9677861549004,177090.1544055946,1.570938999314599,180887.13312920654,93.17477724468819,1.7649074708704602,178833.85891178728,9819.161069225516,179939.77372747503,99.67374914324867,177704.83404990312,1.0424948594928027,58.16792323509254,174678.31798555847,179789.60281035848,1.794379712131597,177370.70371664243,178665.96855634326,180126.11931528134,24.243317340644285,472.4448252227548,553.9150102810141,52.583276216586704,68.9551062371488,182701.64309087777,0.3776559287183003,181638.3138716565,178138.41592993547
maxs,243111.468058191,2919.0,15.0,204324.73418722322,800.0,5095.0,307728.63562091504,311103.9129713424,180974.4445098039,252970.21398348815,181312.14950980392,302979.0245098039,255001.6042717087,331964.0245098039,252389.9995098039,211491.75330396474,287410.74022408965,288359.0245098039,257372.27450980392,184450.31335149863,3.0,197862.35784313726,2207.0,1064.0,179414.41573033712,200082.05082559335,576.0,2.0,5095.0,369297.8945098039,219485.84269162212,222366.88205128204,224699.7863950498,199424.48842732972,178193.4967320261,5095.0,742.0,255001.6042717087,223156.8495098039,230256.21200980392,273645.3828431372,4.0,237481.8022875817,188229.02450980392,6.0,203633.455628685,178193.4967320261,221371.6687405731,178405.7096069869,219311.6970098039,4010.0,183943.1995098039,296434.0245098039,1862.0,184063.06872037915,4.0,187300.52567237162,1424.0,5.0,194951.10784313726,56600.0,311479.0245098039,1290.0,224956.37657165958,2.0,17000.0,345181.5245098039,184861.0283687943,360.0,205162.1274859944,194068.52450980392,184788.65617433414,1012.0,1488.0,2140.0,1526.0,200.0,243247.17284313726,2.0,236726.2467320261,186970.48567051822
sigma,28384.824139871405,421.32133421732476,1.5088945751925396,18416.204155714877,30.491646305342066,442.89862416784206,50064.36965926008,48363.33170083524,9868.109461036458,31365.106075644642,7764.753398575101,6115.230699170744,24614.448808109875,46552.401284272695,16731.149524355016,30689.890057024273,48552.82922823181,48658.8290868911,19890.637189969897,11743.043122261108,0.5306475357080623,13372.261749177289,445.78611753041713,44.04325086437556,4120.712522628474,8523.644517243943,56.609762906910575,0.2524682621647407,398.16581959237874,57004.8706645638,8806.02800442614,38032.67490944731,31074.332692622134,19244.744163326824,0.2776738495325187,485.5660986532532,68.88336411315396,24826.258639866202,37599.99853102868,27215.585184717413,38062.468024576185,0.6474204530720105,25951.286853243673,4782.455327633695,0.8297883627354514,20533.377591439952,0.0,11456.730191238732,5302.397777524543,23489.406573197444,455.25711858126164,11421.92229037309,3137.235960973218,420.61022646910345,20263.25653257761,0.5551898880356613,20818.988716246073,127.744881519076,0.7770557771054145,20266.41566502196,4955.517326926451,15269.946772589565,177.00179159577831,32301.2058169479,0.2084716721132495,630.806977589708,41931.78957638678,18602.363953739627,20.207841751496495,13409.55977987675,14021.1874493847,15963.223969023644,67.22776541956965,217.32690210578195,437.3513241096486,176.6986705850261,20.99909053910434,34696.99414034791,0.5030166769415861,29339.674195109288,6672.392547342386
zeros,0,0,0,0,1453,41,0,0,0,0,0,0,0,0,0,0,0,0,0,0,849,0,78,1445,0,0,1319,1364,0,0,0,0,0,0,0,0,642,0,0,0,0,730,0,0,2,0,0,0,0,0,463,0,0,839,0,3,0,762,77,0,0,0,892,0,2,1408,0,0,1446,0,0,0,1208,77,124,1279,0,0,921,0,0
missing,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,157753.39249146762,1461.0,5.0,181115.23,0.0,882.0,140285.62820512822,143747.87889273354,180758.81264637003,161376.63366336632,179857.76346604215,178218.27472527474,171302.3622047244,136209.02450980392,175715.7828162291,149413.2797336845,138313.57142857142,152080.32258672698,163981.4191419142,151206.9049445865,0.0,180398.05693069307,1961.0,0.0,179414.41573033712,180372.69763480392,120.0,0.0,896.0,141364.58360071303,175531.83179723503,148549.72906403942,156012.9060887513,158794.07093837534,178187.36899563318,896.0,0.0,170822.7626262626,143948.79679144386,206602.9363057325,135104.02450980392,0.0,209302.39072847684,179157.6651480638,2.0,170914.57205240175,178193.4967320261,177919.81360946747,178405.7096069869,134962.35784313726,468.0,181805.66321243523,176671.8111111111,0.0,184063.06872037915,1.0,187300.52567237162,140.0,1.0,185601.24938271602,11622.0,150223.0311764706,0.0,179984.77108433735,1.0,0.0,138671.58035714287,184861.0283687943,0.0,171401.71038251367,168862.10143288082,184788.65617433414,0.0,730.0,270.0,144.0,80.0,201051.3968871595,0.0,154426.8646332607,169627.25564187934
1,203114.2504640024,1462.0,6.0,181115.23,0.0,1329.0,211880.81318681315,143747.87889273354,180758.81264637003,161376.63366336632,179857.76346604215,178218.27472527474,171302.3622047244,140204.02450980392,175715.7828162291,149413.2797336845,138313.57142857142,152080.32258672698,194850.58566088302,184450.31335149863,0.0,180398.05693069307,1958.0,0.0,179414.41573033712,180372.69763480392,0.0,0.0,1329.0,163241.9355717508,175531.83179723503,148549.72906403942,169309.44117647054,139136.05102495544,178187.36899563318,1329.0,36.0,170822.7626262626,143948.79679144386,154595.81798806478,127029.02450980392,0.0,163767.73719637108,188229.02450980392,3.0,170914.57205240175,178193.4967320261,165751.57330498463,178405.7096069869,187920.61064425771,923.0,181805.66321243523,176671.8111111111,0.0,184063.06872037915,1.0,187300.52567237162,393.0,1.0,185601.24938271602,14267.0,181573.27604166663,108.0,179984.77108433735,1.0,12500.0,138671.58035714287,184861.0283687943,0.0,205162.1274859944,182592.86046511628,184788.65617433414,0.0,312.0,406.0,0.0,81.0,201051.3968871595,1.0,154426.8646332607,169627.25564187934
2,157753.39249146762,1463.0,6.0,181115.23,0.0,928.0,140285.62820512822,143747.87889273354,180758.81264637003,161376.63366336632,179857.76346604215,178218.27472527474,171302.3622047244,229709.02450980392,175715.7828162291,158961.70308123247,206633.03141361254,191808.05969498912,194850.58566088302,151206.9049445865,0.0,180398.05693069307,1997.0,0.0,179414.41573033712,176369.09256535946,0.0,0.0,928.0,141364.58360071303,175531.83179723503,222366.88205128204,224699.7863950498,199424.48842732972,178187.36899563318,1629.0,34.0,170822.7626262626,223156.8495098039,206602.9363057325,214984.4091251885,1.0,209302.39072847684,179157.6651480638,3.0,203633.455628685,178193.4967320261,177919.81360946747,178405.7096069869,187920.61064425771,791.0,181805.66321243523,176671.8111111111,701.0,184063.06872037915,2.0,187300.52567237162,212.0,2.0,185601.24938271602,13830.0,181573.27604166663,0.0,224956.37657165958,1.0,0.0,205118.42900418595,184861.0283687943,0.0,171401.71038251367,182592.86046511628,184788.65617433414,0.0,482.0,137.0,0.0,74.0,201051.3968871595,1.0,204262.16064257032,169627.25564187934


In [47]:
# Check Neighborhood_Tencode

print(test[0:5, ['Neighborhood', 'Neighborhood_Tencode']])
_, _ = target_encoder(valid, test, 'Neighborhood', 'SalePrice', test=True)
del _

# NAmes   152080
# NAmes   152080
# Gilbert 191808
# Gilbert 191808
# StoneBr 288359

Neighborhood,Neighborhood_Tencode
NAmes,152080
NAmes,152080
Gilbert,191808
Gilbert,191808
StoneBr,288359



{nan: 178193.49673202613, 'NridgHt': 284073.1545098039, 'StoneBr': 288359.0245098039, 'OldTown': 139863.03613771088, 'NPkVill': 147641.52450980392, 'BrDale': 116064.02450980392, 'ClearCr': 207949.02450980392, 'NWAmes': 184610.14950980392, 'Blmngtn': 210845.6545098039, 'Mitchel': 169316.52450980392, 'Veenker': 243734.02450980392, 'Gilbert': 191808.0596949891, 'CollgCr': 190019.77971813726, 'SWISU': 156269.02450980392, 'NAmes': 152080.32258672698, 'Edwards': 126241.13989441929, 'NoRidge': 273948.2552790347, 'Crawfor': 216008.84593837534, 'Sawyer': 143162.40700980392, 'SawyerW': 189097.7776348039, 'IDOTRR': 112866.71681749621, 'Somerst': 227656.9671023965, 'MeadowV': 113131.52450980392, 'BrkSide': 129965.77450980392, 'Timber': 260109.74673202613}


In [48]:
def feature_combiner(training_frame, test_frame, nums):
    
    """ Combines numeric features using simple arithmatic operations.
    
    :param training_frame: Training frame from which to generate features and onto which generated 
                           feeatures will be cbound.
    :param test_frame: Test frame from which to generate features and onto which generated 
                       feeatures will be cbound.
    :param nums: List of original numeric features from which to generate combined features.
    
    """

    total = len(nums)
    
    # convert to pandas
    train_df = training_frame.as_data_frame()
    test_df = test_frame.as_data_frame()
    
    for i, col_i in enumerate(nums):
        
        print('Combining: ' + col_i + ' (' + str(i+1) + '/' + str(total) + ') ...')        
        
        for j, col_j in enumerate(nums):
            
            # don't repeat (i*j = j*i)
            if i < j:
                
                # convert to pandas
                col_i_train_df = train_df[col_i]
                col_j_train_df = train_df[col_j]
                col_i_test_df = test_df[col_i]
                col_j_test_df = test_df[col_j] 

                # multiply, convert back to h2o
                train_df[str(col_i + '|' + col_j)] = col_i_train_df.values*col_j_train_df.values
                test_df[str(col_i + '|' + col_j)] = col_i_test_df.values*col_j_test_df.values
                
    print('Done.')
    
    # convert back to h2o
    
    print('Converting to H2OFrame ...')
    
    training_frame = h2o.H2OFrame(train_df)
    training_frame.columns = list(train_df)
    test_frame = h2o.H2OFrame(test_df)
    test_frame.columns = list(test_df)
    
    print('Done.')
    print()
    
    # conserve memory 
    del train_df
    del test_df 
    
    return training_frame, test_frame


In [49]:
len(encoded_nums)

81

In [52]:
train.shape

(1001, 133)

In [56]:
train2, _ = feature_combiner(train, test, encoded_nums)
valid2, test2 = feature_combiner(valid, test, encoded_nums)

Combining: MasVnrType_Tencode (1/81) ...
Combining: Id (2/81) ...
Combining: TotRmsAbvGrd (3/81) ...
Combining: BsmtCond_Tencode (4/81) ...
Combining: PoolArea (5/81) ...
Combining: TotalBsmtSF (6/81) ...
Combining: KitchenQual_Tencode (7/81) ...
Combining: ExterQual_Tencode (8/81) ...
Combining: Alley_Tencode (9/81) ...
Combining: BsmtExposure_Tencode (10/81) ...
Combining: Functional_Tencode (11/81) ...
Combining: Condition2_Tencode (12/81) ...
Combining: SaleCondition_Tencode (13/81) ...
Combining: YearBuilt_Tencode (14/81) ...
Combining: LandContour_Tencode (15/81) ...
Combining: HeatingQC_Tencode (16/81) ...
Combining: BsmtQual_Tencode (17/81) ...
Combining: Neighborhood_Tencode (18/81) ...
Combining: LotShape_Tencode (19/81) ...
Combining: Fence_Tencode (20/81) ...
Combining: BsmtFullBath (21/81) ...
Combining: ExterCond_Tencode (22/81) ...
Combining: GarageYrBlt (23/81) ...
Combining: LowQualFinSF (24/81) ...
Combining: Heating_Tencode (25/81) ...
Combining: MoSold_Tencode (26/8

In [57]:
encoded_combined_nums, cats = get_type_lists(frame=train2)

Numeric = ['LotShape_Tencode|BsmtHalfBath', 'BsmtFullBath|PoolQC_Tencode', 'PoolArea|LotShape_Tencode', 'BedroomAbvGr|BsmtFinSF2', 'KitchenQual_Tencode|OpenPorchSF', 'Id|SaleCondition_Tencode', 'KitchenQual_Tencode|BsmtFinSF2', 'BsmtExposure_Tencode|ExterCond_Tencode', 'BsmtFinSF1|CentralAir_Tencode', 'Id', 'TotRmsAbvGrd', 'BsmtFullBath|Exterior1st_Tencode', 'BsmtCond_Tencode', 'MiscFeature_Tencode|FireplaceQu_Tencode', 'SaleCondition_Tencode|RoofStyle_Tencode', 'Fence_Tencode|BldgType_Tencode', 'BsmtCond_Tencode|PavedDrive_Tencode', 'MoSold_Tencode|HouseStyle_Tencode', 'BsmtHalfBath|EnclosedPorch', 'YearRemodAdd_Tencode|LotArea', 'SaleCondition_Tencode|BsmtHalfBath', 'OverallQual_Tencode|BsmtFinType2_Tencode', 'Fence_Tencode|BedroomAbvGr', 'BsmtCond_Tencode|LotArea', 'Id|HalfBath', 'ExterQual_Tencode|MiscFeature_Tencode', 'RoofMatl_Tencode|Electrical_Tencode', 'TotRmsAbvGrd|BsmtCond_Tencode', 'KitchenQual_Tencode', 'GarageArea|BsmtFinSF2', 'FireplaceQu_Tencode|BsmtUnfSF', 'OverallQual

In [58]:
train2.shape

(1001, 3373)

In [59]:
# check number of created variables is correct
# 1 id column, 290)) combined variables
print(train.shape == (1001, sum(range(1, 290), (290 + 43 + 1 + 1))))
print(train.shape)
print(test.shape == (1459, sum(range(1, 290), (290 + 43 + 1 + 1))))
print(test.shape)

False
(1001, 133)
False
(1459, 133)
