In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# seaborn is a data visualization library based on matplotlib but with interactive features
import seaborn as sns
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_squared_log_error

%matplotlib inline

In [2]:
# import the data
train = pd.read_csv('./data/train.csv')
test= pd.read_csv('./data/test.csv')

In [3]:
train.columns

Index(['Id', 'MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea', 'Street',
       'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig',
       'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType',
       'HouseStyle', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd',
       'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType',
       'MasVnrArea', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual',
       'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinSF1',
       'BsmtFinType2', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'Heating',
       'HeatingQC', 'CentralAir', 'Electrical', '1stFlrSF', '2ndFlrSF',
       'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath',
       'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual',
       'TotRmsAbvGrd', 'Functional', 'Fireplaces', 'FireplaceQu', 'GarageType',
       'GarageYrBlt', 'GarageFinish', 'GarageCars', 'GarageArea', 'GarageQual',
       'GarageCond', 'PavedDrive

In [42]:
# check null values
train.isnull().sum().sort_values().tail(35)

MSSubClass      0
PavedDrive      0
GarageArea      0
TotRmsAbvGrd    0
BedroomAbvGr    0
FullBath        0
GrLivArea       0
1stFlrSF        0
CentralAir      0
TotalBsmtSF     0
Foundation      0
ExterQual       0
YearBuilt       0
OverallCond     0
OverallQual     0
HouseStyle      0
BldgType        0
Condition1      0
Neighborhood    0
LandSlope       0
LotConfig       0
LandContour     0
LotShape        0
LotArea         0
LotFrontage     0
MSZoning        0
SalePrice       0
Remodeled       0
dtype: int64

In [5]:
# Select useful columns
train = train[['MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea',
        'LotShape', 'LandContour', 'LotConfig',
       'LandSlope', 'Neighborhood', 'Condition1', 'BldgType',
       'HouseStyle', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd',
       'ExterQual', 'Foundation', 'TotalBsmtSF', 'CentralAir', '1stFlrSF',
       'GrLivArea', 'FullBath',
       'BedroomAbvGr', 'TotRmsAbvGrd', 'GarageArea',
       'PavedDrive', 'SalePrice']]

In [6]:
# check the shape
train.shape

(1460, 28)

In [7]:
# remove outliers
train = train.drop([934, 1298, 249, 313, 335, 706, 691, 1182, 1169, 1046, 1298, 523, 691, 635, 581, 1061, 1190],axis = 0)

In [8]:
# check that outliers were remove
train.shape

(1445, 28)

In [9]:
# calculate the mean of lotfrontrage to fill null values
mean = np.mean(train["LotFrontage"])

In [10]:
mean

69.31008403361345

In [11]:
# remove null values by adding the mean
train["LotFrontage"] = train["LotFrontage"].fillna(value=mean)

In [12]:
#check that there is no more null values
train.isnull().sum().sort_values().tail(10)

Neighborhood    0
LandSlope       0
LotConfig       0
LandContour     0
LotShape        0
LotArea         0
LotFrontage     0
MSZoning        0
PavedDrive      0
SalePrice       0
dtype: int64

In [13]:
# create remodeled feature which is a yes or no if the house was remodeled
train["Remodeled"] = ["Yes" if val == True else "No" for val in train["YearBuilt"] == train["YearRemodAdd"]]

In [14]:
train.shape

(1445, 29)

In [15]:
# Remove year reamodeled of add because we are using remodeled
train = train.drop("YearRemodAdd", axis=1)

In [16]:
train.shape

(1445, 28)

In [17]:
train.isnull().sum().sort_values().tail(2)

SalePrice    0
Remodeled    0
dtype: int64

In [18]:
# Separate numeric features to do scaling
numeric_features = train[["LotFrontage", "LotArea", "YearBuilt", "TotalBsmtSF", "1stFlrSF", "GrLivArea", "GarageArea",
                         "BedroomAbvGr", "FullBath", "TotRmsAbvGrd", "MSSubClass", "OverallQual", "OverallCond"]]
numeric_features.head(2)

Unnamed: 0,LotFrontage,LotArea,YearBuilt,TotalBsmtSF,1stFlrSF,GrLivArea,GarageArea,BedroomAbvGr,FullBath,TotRmsAbvGrd,MSSubClass,OverallQual,OverallCond
0,65.0,8450,2003,856,856,1710,548,3,2,8,60,7,5
1,80.0,9600,1976,1262,1262,1262,460,3,2,6,20,6,8


In [19]:
# Separate categorical features to get the categorical columns
categorical_features = train.drop(numeric_features.columns, axis =1)
categorical_features = categorical_features.drop("SalePrice", axis =1)
categorical_features.head(2)

Unnamed: 0,MSZoning,LotShape,LandContour,LotConfig,LandSlope,Neighborhood,Condition1,BldgType,HouseStyle,ExterQual,Foundation,CentralAir,PavedDrive,Remodeled
0,RL,Reg,Lvl,Inside,Gtl,CollgCr,Norm,1Fam,2Story,Gd,PConc,Y,Y,Yes
1,RL,Reg,Lvl,FR2,Gtl,Veenker,Feedr,1Fam,1Story,TA,CBlock,Y,Y,Yes


In [20]:
# Create scaler
scaler = MinMaxScaler()

In [21]:
# fit data
scaler.fit(numeric_features)

  return self.partial_fit(X, y)


MinMaxScaler(copy=True, feature_range=(0, 1))

In [22]:
# update the numeric_features
numeric_features = pd.DataFrame(data=scaler.fit_transform(numeric_features),
                       columns=numeric_features.columns,
                       index=numeric_features.index)

  return self.partial_fit(X, y)


In [23]:
numeric_features.head(2)

Unnamed: 0,LotFrontage,LotArea,YearBuilt,TotalBsmtSF,1stFlrSF,GrLivArea,GarageArea,BedroomAbvGr,FullBath,TotRmsAbvGrd,MSSubClass,OverallQual,OverallCond
0,0.273292,0.102935,0.949275,0.266999,0.180373,0.420281,0.44918,0.5,0.666667,0.6,0.235294,0.666667,0.5
1,0.36646,0.119492,0.753623,0.393637,0.320663,0.283445,0.377049,0.5,0.666667,0.4,0.0,0.555556,0.875


In [24]:
# separate the labels
labels = train["SalePrice"]

In [25]:
# scale labels so weights don't go too big
labels = labels / 1000

In [26]:
labels.head(2)

0    208.5
1    181.5
Name: SalePrice, dtype: float64

In [27]:
# join data to xcerate the data separation
all_data = pd.concat([categorical_features, numeric_features], axis=1)
all_data.head()

Unnamed: 0,MSZoning,LotShape,LandContour,LotConfig,LandSlope,Neighborhood,Condition1,BldgType,HouseStyle,ExterQual,...,TotalBsmtSF,1stFlrSF,GrLivArea,GarageArea,BedroomAbvGr,FullBath,TotRmsAbvGrd,MSSubClass,OverallQual,OverallCond
0,RL,Reg,Lvl,Inside,Gtl,CollgCr,Norm,1Fam,2Story,Gd,...,0.266999,0.180373,0.420281,0.44918,0.5,0.666667,0.6,0.235294,0.666667,0.5
1,RL,Reg,Lvl,FR2,Gtl,Veenker,Feedr,1Fam,1Story,TA,...,0.393637,0.320663,0.283445,0.377049,0.5,0.666667,0.4,0.0,0.555556,0.875
2,RL,IR1,Lvl,Inside,Gtl,CollgCr,Norm,1Fam,2Story,Gd,...,0.286962,0.202488,0.443494,0.498361,0.5,0.666667,0.4,0.235294,0.666667,0.5
3,RL,IR1,Lvl,Corner,Gtl,Crawfor,Norm,1Fam,2Story,TA,...,0.235808,0.216655,0.422419,0.52623,0.5,0.333333,0.5,0.294118,0.666667,0.5
4,RL,IR1,Lvl,FR2,Gtl,NoRidge,Norm,1Fam,2Story,Gd,...,0.357143,0.280235,0.569334,0.685246,0.666667,0.666667,0.7,0.235294,0.777778,0.5


In [28]:
# Separate the data
x_train, x_validation, y_train, y_validation = train_test_split(all_data, labels, test_size=0.2)

In [29]:
x_train.head(4)

Unnamed: 0,MSZoning,LotShape,LandContour,LotConfig,LandSlope,Neighborhood,Condition1,BldgType,HouseStyle,ExterQual,...,TotalBsmtSF,1stFlrSF,GrLivArea,GarageArea,BedroomAbvGr,FullBath,TotRmsAbvGrd,MSSubClass,OverallQual,OverallCond
214,RL,IR1,Lvl,FR2,Gtl,CollgCr,Norm,1Fam,2Story,TA,...,0.21491,0.122668,0.323152,0.245082,0.5,0.333333,0.4,0.235294,0.555556,0.75
1348,RL,IR3,Low,Inside,Gtl,SawyerW,Norm,1Fam,1Story,Gd,...,0.462258,0.400829,0.354307,0.421311,0.5,0.666667,0.3,0.0,0.666667,0.5
477,RL,Reg,Lvl,Inside,Gtl,NridgHt,Norm,1Fam,2Story,Ex,...,0.671553,0.599516,0.705254,0.568852,0.5,0.666667,0.7,0.235294,0.888889,0.5
662,RL,Reg,Lvl,Corner,Gtl,NAmes,Norm,1Fam,1Story,TA,...,0.434186,0.365584,0.323152,0.472131,0.333333,0.333333,0.3,0.0,0.555556,0.25


### Create Feature Columns

In [30]:
numeric_features.columns

Index(['LotFrontage', 'LotArea', 'YearBuilt', 'TotalBsmtSF', '1stFlrSF',
       'GrLivArea', 'GarageArea', 'BedroomAbvGr', 'FullBath', 'TotRmsAbvGrd',
       'MSSubClass', 'OverallQual', 'OverallCond'],
      dtype='object')

In [31]:
lot_frontage = tf.feature_column.numeric_column("LotFrontage")
lot_area = tf.feature_column.numeric_column("LotArea")
year_built = tf.feature_column.numeric_column("YearBuilt")
total_bsmt_sf = tf.feature_column.numeric_column("TotalBsmtSF")
first_floor_sf = tf.feature_column.numeric_column("1stFlrSF")
gr_liv_area = tf.feature_column.numeric_column("GrLivArea")
garage_area = tf.feature_column.numeric_column("GarageArea")
bedroom_abv_gr = tf.feature_column.numeric_column("BedroomAbvGr")
full_bath = tf.feature_column.numeric_column("FullBath")
tot_rms_abv_gr = tf.feature_column.numeric_column("TotRmsAbvGrd")
ms_sub_class = tf.feature_column.numeric_column("MSSubClass")
overall_qual = tf.feature_column.numeric_column("OverallQual")
overall_cond = tf.feature_column.numeric_column("OverallCond")

In [32]:
categorical_features.columns

Index(['MSZoning', 'LotShape', 'LandContour', 'LotConfig', 'LandSlope',
       'Neighborhood', 'Condition1', 'BldgType', 'HouseStyle', 'ExterQual',
       'Foundation', 'CentralAir', 'PavedDrive', 'Remodeled'],
      dtype='object')

In [33]:
ms_zoning = tf.feature_column.categorical_column_with_hash_bucket("MSZoning", hash_bucket_size=50)
lot_shape = tf.feature_column.categorical_column_with_hash_bucket("LotShape", hash_bucket_size=40)
land_contour = tf.feature_column.categorical_column_with_hash_bucket("LandContour", hash_bucket_size=40)
lot_config = tf.feature_column.categorical_column_with_hash_bucket("LotConfig", hash_bucket_size=50)
land_slope = tf.feature_column.categorical_column_with_hash_bucket("LandSlope", hash_bucket_size=30)
neighborhood = tf.feature_column.categorical_column_with_hash_bucket("Neighborhood", hash_bucket_size=250)
condition_1 = tf.feature_column.categorical_column_with_hash_bucket("Condition1", hash_bucket_size=90)
bldg_type = tf.feature_column.categorical_column_with_hash_bucket("BldgType", hash_bucket_size=50)
house_style = tf.feature_column.categorical_column_with_hash_bucket("HouseStyle", hash_bucket_size=80)
exter_qual = tf.feature_column.categorical_column_with_hash_bucket("ExterQual", hash_bucket_size=40)
foundation = tf.feature_column.categorical_column_with_hash_bucket("Foundation", hash_bucket_size=60)
central_air = tf.feature_column.categorical_column_with_hash_bucket("CentralAir", hash_bucket_size=20)
paved_drive = tf.feature_column.categorical_column_with_hash_bucket("PavedDrive", hash_bucket_size=30)
remodeled = tf.feature_column.categorical_column_with_hash_bucket("Remodeled", hash_bucket_size=20)

In [34]:
e_ms_zoning = tf.feature_column.embedding_column(ms_zoning, dimension=5)
e_lot_shape = tf.feature_column.embedding_column(lot_shape, dimension=4)
e_land_contour = tf.feature_column.embedding_column(land_contour, dimension=4)
e_lot_config = tf.feature_column.embedding_column(lot_config, dimension=5)
e_land_slope = tf.feature_column.embedding_column(land_slope, dimension=3)
e_neighborhood = tf.feature_column.embedding_column(neighborhood, dimension=25)
e_condition_1 = tf.feature_column.embedding_column(condition_1, dimension=9)
e_bldg_type = tf.feature_column.embedding_column(bldg_type, dimension=5)
e_house_style = tf.feature_column.embedding_column(house_style, dimension=8)
e_exter_qual = tf.feature_column.embedding_column(exter_qual, dimension=4)
e_foundation = tf.feature_column.embedding_column(foundation, dimension=6)
e_central_air = tf.feature_column.embedding_column(central_air, dimension=2)
e_paved_drive = tf.feature_column.embedding_column(paved_drive, dimension=3)
e_remodeled = tf.feature_column.embedding_column(remodeled, dimension=2)

In [35]:
feature_columns = [lot_frontage, lot_area, year_built, total_bsmt_sf, first_floor_sf, gr_liv_area, garage_area,
                  ms_sub_class, e_ms_zoning, e_lot_shape, e_land_contour, e_lot_config, e_land_slope, e_neighborhood,
                  e_condition_1, e_bldg_type, e_house_style, overall_qual, overall_cond, e_exter_qual, e_foundation,
                  e_central_air, full_bath, bedroom_abv_gr, tot_rms_abv_gr, e_paved_drive, e_remodeled]

In [36]:
x_train.columns

Index(['MSZoning', 'LotShape', 'LandContour', 'LotConfig', 'LandSlope',
       'Neighborhood', 'Condition1', 'BldgType', 'HouseStyle', 'ExterQual',
       'Foundation', 'CentralAir', 'PavedDrive', 'Remodeled', 'LotFrontage',
       'LotArea', 'YearBuilt', 'TotalBsmtSF', '1stFlrSF', 'GrLivArea',
       'GarageArea', 'BedroomAbvGr', 'FullBath', 'TotRmsAbvGrd', 'MSSubClass',
       'OverallQual', 'OverallCond'],
      dtype='object')

In [37]:
train_func = tf.estimator.inputs.pandas_input_fn(x=x_train, y=y_train,
                                                batch_size=100,
                                                num_epochs=10000,
                                                shuffle=True)

In [38]:
validation_func = tf.estimator.inputs.pandas_input_fn(x=x_validation, y=y_validation,
                                                     batch_size=100,
                                                     num_epochs=1,
                                                     shuffle=False)

In [39]:
model = tf.estimator.DNNRegressor(hidden_units=[1024, 128, 32],
                                 feature_columns=feature_columns,
                                 model_dir="./model_check_points/",
                                 dropout=0.5,
                                 config=tf.estimator.RunConfig(save_summary_steps=100,
                                                              save_checkpoints_steps=10000))

INFO:tensorflow:Using config: {'_model_dir': './model_check_points/', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 10000, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f828bb8b710>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [40]:
model.train(input_fn=train_func, steps=100000)

Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./model_check_points/model.ckpt-1000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
INFO:tensorflow:Saving checkpoints for 1000 into ./model_check_points/model.ckpt.
INFO:tensorflow:loss = 412538.88, step = 1001
INFO:tensorflow:global_step/sec: 45.0674
INFO:tensorflow:loss = 274498.84, step = 1101 (2.221 sec)
INFO:tensorflow:global_step/sec: 66.8371
INFO:tensorflow:loss = 474259.4, step = 1201 (1.496 sec)
INFO:tensorflow:global_step/sec: 68.0558
INFO:tensorflow:loss = 214963.83, step = 1301 (1.470 sec)
INFO:tensor

INFO:tensorflow:loss = 245318.38, step = 7901 (1.453 sec)
INFO:tensorflow:global_step/sec: 67.9988
INFO:tensorflow:loss = 205608.23, step = 8001 (1.470 sec)
INFO:tensorflow:global_step/sec: 68.0586
INFO:tensorflow:loss = 245116.03, step = 8101 (1.470 sec)
INFO:tensorflow:global_step/sec: 68.6419
INFO:tensorflow:loss = 307383.22, step = 8201 (1.457 sec)
INFO:tensorflow:global_step/sec: 68.2577
INFO:tensorflow:loss = 250696.16, step = 8301 (1.465 sec)
INFO:tensorflow:global_step/sec: 67.0402
INFO:tensorflow:loss = 239887.02, step = 8401 (1.492 sec)
INFO:tensorflow:global_step/sec: 68.5273
INFO:tensorflow:loss = 268462.1, step = 8501 (1.459 sec)
INFO:tensorflow:global_step/sec: 68.593
INFO:tensorflow:loss = 267431.25, step = 8601 (1.458 sec)
INFO:tensorflow:global_step/sec: 67.68
INFO:tensorflow:loss = 352092.0, step = 8701 (1.477 sec)
INFO:tensorflow:global_step/sec: 69.2821
INFO:tensorflow:loss = 307439.8, step = 8801 (1.444 sec)
INFO:tensorflow:global_step/sec: 69.9889
INFO:tensorflow:

INFO:tensorflow:loss = 376244.72, step = 16101 (1.478 sec)
INFO:tensorflow:global_step/sec: 69.1951
INFO:tensorflow:loss = 187655.84, step = 16201 (1.445 sec)
INFO:tensorflow:global_step/sec: 66.8518
INFO:tensorflow:loss = 229035.0, step = 16301 (1.496 sec)
INFO:tensorflow:global_step/sec: 67.7281
INFO:tensorflow:loss = 259783.78, step = 16401 (1.477 sec)
INFO:tensorflow:global_step/sec: 68.4927
INFO:tensorflow:loss = 260194.31, step = 16501 (1.459 sec)
INFO:tensorflow:global_step/sec: 70.5325
INFO:tensorflow:loss = 194748.42, step = 16601 (1.418 sec)
INFO:tensorflow:global_step/sec: 69.1287
INFO:tensorflow:loss = 228536.38, step = 16701 (1.447 sec)
INFO:tensorflow:global_step/sec: 67.6183
INFO:tensorflow:loss = 361886.38, step = 16801 (1.479 sec)
INFO:tensorflow:global_step/sec: 67.835
INFO:tensorflow:loss = 218689.66, step = 16901 (1.475 sec)
INFO:tensorflow:global_step/sec: 68.816
INFO:tensorflow:loss = 220557.05, step = 17001 (1.453 sec)
INFO:tensorflow:global_step/sec: 67.5831
INF

INFO:tensorflow:global_step/sec: 68.6857
INFO:tensorflow:loss = 268929.66, step = 24301 (1.456 sec)
INFO:tensorflow:global_step/sec: 67.7605
INFO:tensorflow:loss = 296684.06, step = 24401 (1.476 sec)
INFO:tensorflow:global_step/sec: 67.4746
INFO:tensorflow:loss = 229287.36, step = 24501 (1.483 sec)
INFO:tensorflow:global_step/sec: 67.7688
INFO:tensorflow:loss = 242121.3, step = 24601 (1.475 sec)
INFO:tensorflow:global_step/sec: 66.822
INFO:tensorflow:loss = 260358.0, step = 24701 (1.497 sec)
INFO:tensorflow:global_step/sec: 66.7376
INFO:tensorflow:loss = 178480.25, step = 24801 (1.500 sec)
INFO:tensorflow:global_step/sec: 66.7729
INFO:tensorflow:loss = 224149.0, step = 24901 (1.497 sec)
INFO:tensorflow:global_step/sec: 66.3052
INFO:tensorflow:loss = 229110.72, step = 25001 (1.508 sec)
INFO:tensorflow:global_step/sec: 65.921
INFO:tensorflow:loss = 213290.28, step = 25101 (1.517 sec)
INFO:tensorflow:global_step/sec: 66.8697
INFO:tensorflow:loss = 186207.94, step = 25201 (1.495 sec)
INFO:

INFO:tensorflow:loss = 274645.88, step = 32401 (1.454 sec)
INFO:tensorflow:global_step/sec: 68.8379
INFO:tensorflow:loss = 195557.23, step = 32501 (1.453 sec)
INFO:tensorflow:global_step/sec: 68.6672
INFO:tensorflow:loss = 208992.12, step = 32601 (1.456 sec)
INFO:tensorflow:global_step/sec: 69.3498
INFO:tensorflow:loss = 148317.64, step = 32701 (1.442 sec)
INFO:tensorflow:global_step/sec: 66.4613
INFO:tensorflow:loss = 306634.53, step = 32801 (1.505 sec)
INFO:tensorflow:global_step/sec: 67.7909
INFO:tensorflow:loss = 216745.56, step = 32901 (1.475 sec)
INFO:tensorflow:global_step/sec: 68.8716
INFO:tensorflow:loss = 218049.38, step = 33001 (1.452 sec)
INFO:tensorflow:global_step/sec: 67.0623
INFO:tensorflow:loss = 164107.31, step = 33101 (1.491 sec)
INFO:tensorflow:global_step/sec: 67.0633
INFO:tensorflow:loss = 192913.45, step = 33201 (1.490 sec)
INFO:tensorflow:global_step/sec: 68.397
INFO:tensorflow:loss = 275949.7, step = 33301 (1.462 sec)
INFO:tensorflow:global_step/sec: 68.1239
IN

INFO:tensorflow:global_step/sec: 68.4724
INFO:tensorflow:loss = 198244.03, step = 40701 (1.460 sec)
INFO:tensorflow:global_step/sec: 69.1858
INFO:tensorflow:loss = 401327.28, step = 40801 (1.445 sec)
INFO:tensorflow:global_step/sec: 68.242
INFO:tensorflow:loss = 397949.25, step = 40901 (1.465 sec)
INFO:tensorflow:Saving checkpoints for 41000 into ./model_check_points/model.ckpt.
INFO:tensorflow:global_step/sec: 62.4845
INFO:tensorflow:loss = 282729.25, step = 41001 (1.600 sec)
INFO:tensorflow:global_step/sec: 67.3825
INFO:tensorflow:loss = 283076.16, step = 41101 (1.485 sec)
INFO:tensorflow:global_step/sec: 68.6994
INFO:tensorflow:loss = 197919.86, step = 41201 (1.456 sec)
INFO:tensorflow:global_step/sec: 67.9514
INFO:tensorflow:loss = 228545.69, step = 41301 (1.471 sec)
INFO:tensorflow:global_step/sec: 67.7503
INFO:tensorflow:loss = 198423.27, step = 41401 (1.476 sec)
INFO:tensorflow:global_step/sec: 67.6977
INFO:tensorflow:loss = 186347.67, step = 41501 (1.477 sec)
INFO:tensorflow:gl

INFO:tensorflow:loss = 178909.81, step = 48801 (1.447 sec)
INFO:tensorflow:global_step/sec: 68.4878
INFO:tensorflow:loss = 264370.94, step = 48901 (1.459 sec)
INFO:tensorflow:global_step/sec: 70.5853
INFO:tensorflow:loss = 223472.73, step = 49001 (1.417 sec)
INFO:tensorflow:global_step/sec: 70.8726
INFO:tensorflow:loss = 219941.9, step = 49101 (1.411 sec)
INFO:tensorflow:global_step/sec: 68.6429
INFO:tensorflow:loss = 210809.94, step = 49201 (1.457 sec)
INFO:tensorflow:global_step/sec: 68.828
INFO:tensorflow:loss = 206734.8, step = 49301 (1.453 sec)
INFO:tensorflow:global_step/sec: 68.8752
INFO:tensorflow:loss = 190606.88, step = 49401 (1.453 sec)
INFO:tensorflow:global_step/sec: 69.4855
INFO:tensorflow:loss = 199024.0, step = 49501 (1.438 sec)
INFO:tensorflow:global_step/sec: 69.4474
INFO:tensorflow:loss = 201350.72, step = 49601 (1.440 sec)
INFO:tensorflow:global_step/sec: 68.3945
INFO:tensorflow:loss = 226791.72, step = 49701 (1.462 sec)
INFO:tensorflow:global_step/sec: 68.6106
INFO

INFO:tensorflow:global_step/sec: 69.3917
INFO:tensorflow:loss = 131159.7, step = 57001 (1.441 sec)
INFO:tensorflow:global_step/sec: 69.5093
INFO:tensorflow:loss = 245131.88, step = 57101 (1.439 sec)
INFO:tensorflow:global_step/sec: 68.2792
INFO:tensorflow:loss = 161703.23, step = 57201 (1.465 sec)
INFO:tensorflow:global_step/sec: 68.8565
INFO:tensorflow:loss = 215804.84, step = 57301 (1.452 sec)
INFO:tensorflow:global_step/sec: 67.6035
INFO:tensorflow:loss = 122011.875, step = 57401 (1.479 sec)
INFO:tensorflow:global_step/sec: 68.1326
INFO:tensorflow:loss = 90934.8, step = 57501 (1.468 sec)
INFO:tensorflow:global_step/sec: 70.4451
INFO:tensorflow:loss = 123847.82, step = 57601 (1.420 sec)
INFO:tensorflow:global_step/sec: 69.0185
INFO:tensorflow:loss = 190387.34, step = 57701 (1.449 sec)
INFO:tensorflow:global_step/sec: 67.7703
INFO:tensorflow:loss = 213755.6, step = 57801 (1.476 sec)
INFO:tensorflow:global_step/sec: 66.6111
INFO:tensorflow:loss = 161290.34, step = 57901 (1.501 sec)
INF

INFO:tensorflow:loss = 178690.81, step = 65101 (1.450 sec)
INFO:tensorflow:global_step/sec: 69.634
INFO:tensorflow:loss = 188691.94, step = 65201 (1.436 sec)
INFO:tensorflow:global_step/sec: 67.6508
INFO:tensorflow:loss = 215141.6, step = 65301 (1.478 sec)
INFO:tensorflow:global_step/sec: 69.1437
INFO:tensorflow:loss = 201278.44, step = 65401 (1.447 sec)
INFO:tensorflow:global_step/sec: 69.6665
INFO:tensorflow:loss = 261903.16, step = 65501 (1.436 sec)
INFO:tensorflow:global_step/sec: 66.135
INFO:tensorflow:loss = 150023.7, step = 65601 (1.511 sec)
INFO:tensorflow:global_step/sec: 67.0034
INFO:tensorflow:loss = 184712.4, step = 65701 (1.492 sec)
INFO:tensorflow:global_step/sec: 68.4814
INFO:tensorflow:loss = 196677.23, step = 65801 (1.460 sec)
INFO:tensorflow:global_step/sec: 67.9424
INFO:tensorflow:loss = 222662.73, step = 65901 (1.472 sec)
INFO:tensorflow:global_step/sec: 68.875
INFO:tensorflow:loss = 175761.22, step = 66001 (1.451 sec)
INFO:tensorflow:global_step/sec: 68.9945
INFO:t

INFO:tensorflow:global_step/sec: 55.6844
INFO:tensorflow:loss = 204876.19, step = 73301 (1.796 sec)
INFO:tensorflow:global_step/sec: 57.0508
INFO:tensorflow:loss = 185266.08, step = 73401 (1.753 sec)
INFO:tensorflow:global_step/sec: 55.8671
INFO:tensorflow:loss = 196929.36, step = 73501 (1.790 sec)
INFO:tensorflow:global_step/sec: 53.9672
INFO:tensorflow:loss = 90455.95, step = 73601 (1.853 sec)
INFO:tensorflow:global_step/sec: 53.0077
INFO:tensorflow:loss = 241974.25, step = 73701 (1.887 sec)
INFO:tensorflow:global_step/sec: 65.2755
INFO:tensorflow:loss = 203262.19, step = 73801 (1.532 sec)
INFO:tensorflow:global_step/sec: 68.6726
INFO:tensorflow:loss = 190614.14, step = 73901 (1.456 sec)
INFO:tensorflow:global_step/sec: 69.2201
INFO:tensorflow:loss = 211349.72, step = 74001 (1.444 sec)
INFO:tensorflow:global_step/sec: 67.6064
INFO:tensorflow:loss = 155127.19, step = 74101 (1.480 sec)
INFO:tensorflow:global_step/sec: 68.8588
INFO:tensorflow:loss = 189764.1, step = 74201 (1.452 sec)
IN

INFO:tensorflow:loss = 177802.3, step = 81401 (1.477 sec)
INFO:tensorflow:global_step/sec: 69.2556
INFO:tensorflow:loss = 269745.03, step = 81501 (1.444 sec)
INFO:tensorflow:global_step/sec: 67.7987
INFO:tensorflow:loss = 201718.36, step = 81601 (1.475 sec)
INFO:tensorflow:global_step/sec: 69.0297
INFO:tensorflow:loss = 190375.78, step = 81701 (1.448 sec)
INFO:tensorflow:global_step/sec: 67.8041
INFO:tensorflow:loss = 138082.73, step = 81801 (1.475 sec)
INFO:tensorflow:global_step/sec: 67.5825
INFO:tensorflow:loss = 152665.17, step = 81901 (1.480 sec)
INFO:tensorflow:global_step/sec: 67.5347
INFO:tensorflow:loss = 205925.73, step = 82001 (1.481 sec)
INFO:tensorflow:global_step/sec: 67.9259
INFO:tensorflow:loss = 144218.95, step = 82101 (1.472 sec)
INFO:tensorflow:global_step/sec: 67.8516
INFO:tensorflow:loss = 216057.36, step = 82201 (1.474 sec)
INFO:tensorflow:global_step/sec: 68.9132
INFO:tensorflow:loss = 230174.3, step = 82301 (1.451 sec)
INFO:tensorflow:global_step/sec: 69.6793
IN

INFO:tensorflow:global_step/sec: 68.3312
INFO:tensorflow:loss = 221359.45, step = 89701 (1.464 sec)
INFO:tensorflow:global_step/sec: 69.4156
INFO:tensorflow:loss = 353987.56, step = 89801 (1.441 sec)
INFO:tensorflow:global_step/sec: 69.3241
INFO:tensorflow:loss = 110333.74, step = 89901 (1.443 sec)
INFO:tensorflow:global_step/sec: 69.4783
INFO:tensorflow:loss = 169035.38, step = 90001 (1.439 sec)
INFO:tensorflow:global_step/sec: 68.772
INFO:tensorflow:loss = 157360.06, step = 90101 (1.454 sec)
INFO:tensorflow:global_step/sec: 69.0578
INFO:tensorflow:loss = 177778.0, step = 90201 (1.448 sec)
INFO:tensorflow:global_step/sec: 70.1412
INFO:tensorflow:loss = 202581.2, step = 90301 (1.425 sec)
INFO:tensorflow:global_step/sec: 68.8194
INFO:tensorflow:loss = 196669.06, step = 90401 (1.453 sec)
INFO:tensorflow:global_step/sec: 70.3657
INFO:tensorflow:loss = 182383.88, step = 90501 (1.421 sec)
INFO:tensorflow:global_step/sec: 67.8692
INFO:tensorflow:loss = 266713.22, step = 90601 (1.474 sec)
INF

INFO:tensorflow:loss = 133675.16, step = 97801 (1.461 sec)
INFO:tensorflow:global_step/sec: 69.3045
INFO:tensorflow:loss = 145474.73, step = 97901 (1.443 sec)
INFO:tensorflow:global_step/sec: 69.423
INFO:tensorflow:loss = 269570.94, step = 98001 (1.440 sec)
INFO:tensorflow:global_step/sec: 68.463
INFO:tensorflow:loss = 211796.48, step = 98101 (1.461 sec)
INFO:tensorflow:global_step/sec: 68.6715
INFO:tensorflow:loss = 172857.44, step = 98201 (1.456 sec)
INFO:tensorflow:global_step/sec: 70.4333
INFO:tensorflow:loss = 162499.86, step = 98301 (1.420 sec)
INFO:tensorflow:global_step/sec: 68.7818
INFO:tensorflow:loss = 184960.47, step = 98401 (1.454 sec)
INFO:tensorflow:global_step/sec: 68.3988
INFO:tensorflow:loss = 253571.84, step = 98501 (1.462 sec)
INFO:tensorflow:global_step/sec: 69.69
INFO:tensorflow:loss = 158140.92, step = 98601 (1.435 sec)
INFO:tensorflow:global_step/sec: 69.3377
INFO:tensorflow:loss = 143163.83, step = 98701 (1.442 sec)
INFO:tensorflow:global_step/sec: 68.0294
INFO

<tensorflow.python.estimator.canned.dnn.DNNRegressor at 0x7f828bb8b470>

### LRMSE

In [43]:
pred_generator = model.predict(validation_func)

In [44]:
predictions = list(pred_generator)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./model_check_points/model.ckpt-101000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [45]:
final_predictions=[]

for pred in predictions:
    final_predictions.append(pred["predictions"])
final_predictions

[array([171.76534], dtype=float32),
 array([105.501945], dtype=float32),
 array([162.60583], dtype=float32),
 array([215.14392], dtype=float32),
 array([121.68986], dtype=float32),
 array([135.79112], dtype=float32),
 array([190.99663], dtype=float32),
 array([195.53777], dtype=float32),
 array([112.92931], dtype=float32),
 array([115.55371], dtype=float32),
 array([118.97346], dtype=float32),
 array([124.613716], dtype=float32),
 array([88.75177], dtype=float32),
 array([185.96309], dtype=float32),
 array([185.9087], dtype=float32),
 array([206.19788], dtype=float32),
 array([134.27579], dtype=float32),
 array([206.76546], dtype=float32),
 array([228.04225], dtype=float32),
 array([120.7983], dtype=float32),
 array([197.36673], dtype=float32),
 array([305.56955], dtype=float32),
 array([130.59868], dtype=float32),
 array([153.907], dtype=float32),
 array([143.92717], dtype=float32),
 array([305.5672], dtype=float32),
 array([171.5182], dtype=float32),
 array([127.10248], dtype=float32

In [46]:
mean_squared_log_error(y_validation, final_predictions)**.5

0.15742083570095253

## Predict

In [47]:
test.columns

Index(['Id', 'MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea', 'Street',
       'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig',
       'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType',
       'HouseStyle', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd',
       'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType',
       'MasVnrArea', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual',
       'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinSF1',
       'BsmtFinType2', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'Heating',
       'HeatingQC', 'CentralAir', 'Electrical', '1stFlrSF', '2ndFlrSF',
       'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath',
       'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual',
       'TotRmsAbvGrd', 'Functional', 'Fireplaces', 'FireplaceQu', 'GarageType',
       'GarageYrBlt', 'GarageFinish', 'GarageCars', 'GarageArea', 'GarageQual',
       'GarageCond', 'PavedDrive

In [49]:
test.isnull().sum().sort_values().tail(35)

LandSlope          0
HouseStyle         0
GarageCars         1
GarageArea         1
KitchenQual        1
Exterior1st        1
SaleType           1
TotalBsmtSF        1
BsmtUnfSF          1
Exterior2nd        1
BsmtFinSF1         1
BsmtFinSF2         1
BsmtFullBath       2
Functional         2
Utilities          2
BsmtHalfBath       2
MSZoning           4
MasVnrArea        15
MasVnrType        16
BsmtFinType2      42
BsmtFinType1      42
BsmtExposure      44
BsmtQual          44
BsmtCond          45
GarageType        76
GarageFinish      78
GarageYrBlt       78
GarageQual        78
GarageCond        78
LotFrontage      227
FireplaceQu      730
Fence           1169
Alley           1352
MiscFeature     1408
PoolQC          1456
dtype: int64

In [50]:
test = test[['MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea',
        'LotShape', 'LandContour', 'LotConfig',
       'LandSlope', 'Neighborhood', 'Condition1', 'BldgType',
       'HouseStyle', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd',
       'ExterQual', 'Foundation', 'TotalBsmtSF', 'CentralAir', '1stFlrSF',
       'GrLivArea', 'FullBath',
       'BedroomAbvGr', 'TotRmsAbvGrd', 'GarageArea',
       'PavedDrive']]

In [53]:
test.isnull().sum().sort_values().tail(10)

LotConfig        0
LandContour      0
LotShape         0
LotArea          0
HouseStyle       0
PavedDrive       0
GarageArea       1
TotalBsmtSF      1
MSZoning         4
LotFrontage    227
dtype: int64

In [55]:
garage_area_mean = np.mean(test["GarageArea"])
garage_area_mean

472.76886145404666

In [57]:
total_basement_mean = np.mean(test["TotalBsmtSF"])
total_basement_mean

1046.1179698216736

In [58]:
test["LotFrontage"] = test["LotFrontage"].fillna(value=mean)

In [59]:
test["GarageArea"] = test["GarageArea"].fillna(value=garage_area_mean)

In [60]:
test["TotalBsmtSF"] = test["TotalBsmtSF"].fillna(value=total_basement_mean)

In [63]:
test.isnull().sum().sort_values().tail(3)

OverallQual    0
PavedDrive     0
MSZoning       4
dtype: int64

In [76]:
test["MSZoning"].value_counts()

RL         1114
RM          242
FV           74
C (all)      15
RH           10
Name: MSZoning, dtype: int64

In [77]:
test["MSZoning"] = test["MSZoning"].fillna(value='RL')

In [78]:
test.isnull().sum().sort_values().tail(3)

MSZoning       0
OverallQual    0
PavedDrive     0
dtype: int64

In [79]:
test["Remodeled"] = ["Yes" if val == True else "No" for val in test["YearBuilt"] == test["YearRemodAdd"]]

In [80]:
test.shape

(1459, 28)

In [81]:
test = test.drop("YearRemodAdd", axis=1)

In [82]:
test.shape

(1459, 27)

In [83]:
numeric_features_test = test[["LotFrontage", "LotArea", "YearBuilt", "TotalBsmtSF", "1stFlrSF", "GrLivArea", "GarageArea",
                         "BedroomAbvGr", "FullBath", "TotRmsAbvGrd", "MSSubClass", "OverallQual", "OverallCond"]]
numeric_features_test.head(2)

Unnamed: 0,LotFrontage,LotArea,YearBuilt,TotalBsmtSF,1stFlrSF,GrLivArea,GarageArea,BedroomAbvGr,FullBath,TotRmsAbvGrd,MSSubClass,OverallQual,OverallCond
0,80.0,11622,1961,882.0,896,896,730.0,2,1,5,20,5,6
1,81.0,14267,1958,1329.0,1329,1329,312.0,3,1,6,20,6,6


In [84]:
categorical_features_test = test.drop(numeric_features_test.columns, axis =1)
categorical_features_test.head(2)

Unnamed: 0,MSZoning,LotShape,LandContour,LotConfig,LandSlope,Neighborhood,Condition1,BldgType,HouseStyle,ExterQual,Foundation,CentralAir,PavedDrive,Remodeled
0,RH,Reg,Lvl,Inside,Gtl,NAmes,Feedr,1Fam,1Story,TA,CBlock,Y,Y,Yes
1,RL,IR1,Lvl,Corner,Gtl,NAmes,Norm,1Fam,1Story,TA,CBlock,Y,Y,Yes


In [85]:
scaler_test = MinMaxScaler()

In [87]:
scaler_test.fit(numeric_features_test)

  return self.partial_fit(X, y)


MinMaxScaler(copy=True, feature_range=(0, 1))

In [88]:
numeric_features_test = pd.DataFrame(data=scaler_test.fit_transform(numeric_features_test),
                       columns=numeric_features_test.columns,
                       index=numeric_features_test.index)

  return self.partial_fit(X, y)


In [89]:
numeric_features_test.head(2)

Unnamed: 0,LotFrontage,LotArea,YearBuilt,TotalBsmtSF,1stFlrSF,GrLivArea,GarageArea,BedroomAbvGr,FullBath,TotRmsAbvGrd,MSSubClass,OverallQual,OverallCond
0,0.329609,0.184147,0.625954,0.173111,0.104309,0.104309,0.490591,0.333333,0.25,0.166667,0.0,0.444444,0.625
1,0.335196,0.232124,0.603053,0.260844,0.196672,0.196672,0.209677,0.5,0.25,0.25,0.0,0.555556,0.625


In [90]:
all_data_test = pd.concat([categorical_features_test, numeric_features_test], axis=1)
all_data_test.head()

Unnamed: 0,MSZoning,LotShape,LandContour,LotConfig,LandSlope,Neighborhood,Condition1,BldgType,HouseStyle,ExterQual,...,TotalBsmtSF,1stFlrSF,GrLivArea,GarageArea,BedroomAbvGr,FullBath,TotRmsAbvGrd,MSSubClass,OverallQual,OverallCond
0,RH,Reg,Lvl,Inside,Gtl,NAmes,Feedr,1Fam,1Story,TA,...,0.173111,0.104309,0.104309,0.490591,0.333333,0.25,0.166667,0.0,0.444444,0.625
1,RL,IR1,Lvl,Corner,Gtl,NAmes,Norm,1Fam,1Story,TA,...,0.260844,0.196672,0.196672,0.209677,0.5,0.25,0.25,0.0,0.555556,0.625
2,RL,IR1,Lvl,Inside,Gtl,Gilbert,Norm,1Fam,2Story,TA,...,0.182139,0.111135,0.260666,0.323925,0.5,0.5,0.25,0.235294,0.444444,0.5
3,RL,IR1,Lvl,Inside,Gtl,Gilbert,Norm,1Fam,2Story,TA,...,0.181747,0.110708,0.255333,0.31586,0.5,0.5,0.333333,0.235294,0.555556,0.625
4,RL,IR1,HLS,Inside,Gtl,StoneBr,Norm,TwnhsE,1Story,Gd,...,0.251227,0.18622,0.18622,0.340054,0.333333,0.5,0.166667,0.588235,0.777778,0.5


In [91]:
prediction_func = tf.estimator.inputs.pandas_input_fn(x=all_data_test,
                                                     batch_size=100,
                                                     num_epochs=1,
                                                     shuffle=False)

In [92]:
test_pred_generator = model.predict(prediction_func)

In [93]:
predictions = list(test_pred_generator)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./model_check_points/model.ckpt-101000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [94]:
final_predictions=[]

for pred in predictions:
    final_predictions.append(pred["predictions"])
final_predictions

[array([107.83897], dtype=float32),
 array([134.12859], dtype=float32),
 array([146.7189], dtype=float32),
 array([154.82939], dtype=float32),
 array([133.64737], dtype=float32),
 array([152.32927], dtype=float32),
 array([145.00967], dtype=float32),
 array([144.50931], dtype=float32),
 array([140.95186], dtype=float32),
 array([106.15374], dtype=float32),
 array([151.8174], dtype=float32),
 array([83.23521], dtype=float32),
 array([84.27758], dtype=float32),
 array([124.88801], dtype=float32),
 array([101.51646], dtype=float32),
 array([259.9756], dtype=float32),
 array([200.10599], dtype=float32),
 array([207.36736], dtype=float32),
 array([209.18513], dtype=float32),
 array([473.23697], dtype=float32),
 array([206.72832], dtype=float32),
 array([161.50142], dtype=float32),
 array([164.15039], dtype=float32),
 array([149.7111], dtype=float32),
 array([159.4969], dtype=float32),
 array([159.92809], dtype=float32),
 array([225.69885], dtype=float32),
 array([228.22646], dtype=float32),

In [98]:
len(final_predictions)

1459

In [99]:
results = pd.DataFrame(final_predictions, columns=["SalePrice"], dtype=np.float32)

In [100]:
results.head()

Unnamed: 0,SalePrice
0,107.838966
1,134.128586
2,146.718903
3,154.829391
4,133.647369


In [101]:
results.index +=1461
results.index.name = "Id"

In [102]:
results.head()

Unnamed: 0_level_0,SalePrice
Id,Unnamed: 1_level_1
1461,107.838966
1462,134.128586
1463,146.718903
1464,154.829391
1465,133.647369


In [103]:
results = results * 1000

In [104]:
results.head()

Unnamed: 0_level_0,SalePrice
Id,Unnamed: 1_level_1
1461,107838.96875
1462,134128.578125
1463,146718.90625
1464,154829.390625
1465,133647.375


In [105]:
results.to_csv("./results2.csv")