In [77]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew
from scipy.stats import zscore
from scipy.special import boxcox1p
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import Lasso, LassoCV
import tensorflow as tf
import re

def train_input_fn(ds, label, num_epochs, training=True, batch_size=10):
    """An input function for training or evaluating"""    
    # Convert the inputs to a Dataset.
    dataset = tf.data.Dataset.from_tensor_slices((dict(ds), label))

    # Shuffle and repeat if you are in training mode.
    if training:
        dataset = dataset.shuffle(len_train).repeat(num_epochs)
    
    return dataset.batch(batch_size)

def validation_input_fn(ds, label, num_epochs):
    """An input function for training or evaluating"""    
    # Convert the inputs to a Dataset.
    dataset = tf.data.Dataset.from_tensor_slices((dict(ds), label))

    dataset = dataset.repeat(num_epochs)
    
    return dataset.batch(1)

def eval_input_fn(ds, num_epochs):
    """An input function for training or evaluating"""    
    # Convert the inputs to a Dataset.
    dataset = tf.data.Dataset.from_tensor_slices(dict(ds))

    dataset = dataset.repeat(num_epochs)
    
    return dataset.batch(1)

train=pd.read_csv("house-prices/train.csv")
test=pd.read_csv("house-prices/test.csv")
len_train=train.shape[0]
houses=pd.concat([train,test], sort=False)

for col in ('Alley','Utilities','MasVnrType','BsmtQual','BsmtCond','BsmtExposure','BsmtFinType1',
            'BsmtFinType2','Electrical','FireplaceQu','GarageType','GarageFinish','GarageQual','GarageCond',
           'PoolQC','Fence','MiscFeature'):
    train[col]=train[col].fillna('None')
    test[col]=test[col].fillna('None')
    
for col in ('MSZoning','Exterior1st','Exterior2nd','KitchenQual','SaleType','Functional'):
    train[col]=train[col].fillna(train[col].mode()[0])
    test[col]=test[col].fillna(train[col].mode()[0])

for col in ('MasVnrArea','BsmtFinSF1','BsmtFinSF2','BsmtUnfSF','TotalBsmtSF','BsmtFullBath','BsmtHalfBath','GarageYrBlt','GarageCars','GarageArea'):
    train[col]=train[col].fillna(0)
    test[col]=test[col].fillna(0)

train['LotFrontage']=train['LotFrontage'].fillna(train['LotFrontage'].mean())
test['LotFrontage']=test['LotFrontage'].fillna(train['LotFrontage'].mean())

train.drop(['GarageArea','1stFlrSF','TotRmsAbvGrd','2ndFlrSF'], axis=1, inplace=True)
test.drop(['GarageArea','1stFlrSF','TotRmsAbvGrd','2ndFlrSF'], axis=1, inplace=True)

train['LotFrontage']=train['LotFrontage'].fillna(train['LotFrontage'].mean())
test['LotFrontage']=test['LotFrontage'].fillna(train['LotFrontage'].mean())

train = train[train['GrLivArea']<4000]

len_train=train.shape[0]

houses=pd.concat([train,test], sort=False)

houses['MSSubClass']=houses['MSSubClass'].astype(str)

skew=houses.select_dtypes(include=['int','float']).apply(lambda x: skew(x.dropna())).sort_values(ascending=False)
skew_df=pd.DataFrame({'Skew':skew})
skewed_df=skew_df[(skew_df['Skew']>0.5)|(skew_df['Skew']<-0.5)]

train=houses[:len_train]
test=houses[len_train:]

lam=0.1
for col in ('MiscVal', 'PoolArea', 'LotArea', 'LowQualFinSF', '3SsnPorch',
       'KitchenAbvGr', 'BsmtFinSF2', 'EnclosedPorch', 'ScreenPorch',
       'BsmtHalfBath', 'MasVnrArea', 'OpenPorchSF', 'WoodDeckSF',
       'LotFrontage', 'GrLivArea', 'BsmtFinSF1', 'BsmtUnfSF', 'Fireplaces',
       'HalfBath', 'TotalBsmtSF', 'BsmtFullBath', 'OverallCond', 'YearBuilt',
       'GarageYrBlt'):
    train[col]=boxcox1p(train[col],lam)
    test[col]=boxcox1p(test[col],lam)
    
train['SalePrice']=np.log(train['SalePrice'])

houses=pd.concat([train,test], sort=False)

#Proper column names
tf_regex=re.compile('[A-Za-z0-9.][A-Za-z0-9_.\\-/]*')
for col in houses.columns:
    mo=tf_regex.findall(col)
    houses.rename(columns = {col:''.join(mo)}, inplace = True) 
    
training_df=houses[:len_train]
testing_df=houses[len_train:]

#Personal Data Modification and Adjustment
#----------------------------------------------------
training_labels=training_df['SalePrice']
testing_labels=testing_df['SalePrice']
training_df.drop(['SalePrice','Id'], axis=1,inplace=True)
testing_df.drop(['SalePrice','Id'], axis=1,inplace=True)

#Building Feature Columns

tf_feature_columns=[]
categorical_columns={}

for col in training_df.select_dtypes(include=['float','int']):
    tf_feature_columns.append(tf.feature_column.numeric_column(col))

for col in training_df.select_dtypes(include=['object']):
    categorical_columns[col]=houses[col].unique()
    
for key, values in categorical_columns.items():
    vocabulary_feature_column = tf.feature_column.categorical_column_with_vocabulary_list(
        key=key,
        vocabulary_list=values)
    indicator_column = tf.feature_column.indicator_column(vocabulary_feature_column)
    tf_feature_columns.append(indicator_column)
    
#zscore columns
for col in ['LotFrontage', 'LotArea', 'OverallQual', 'OverallCond',
        'MasVnrArea', 'BsmtFinSF1', 'BsmtUnfSF', 'TotalBsmtSF',
        'GrLivArea', 'GarageYrBlt', 'GarageCars',
        'WoodDeckSF', 'OpenPorchSF']:
    training_df[col]=zscore(training_df[col])
    testing_df[col]=zscore(testing_df[col])
    
validation_df=training_df[728:]
training_df=training_df[:728]
validation_labels=training_labels[728:]
training_labels=training_labels[:728]    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_gui

In [112]:
estimator = tf.estimator.DNNRegressor(
    feature_columns=tf_feature_columns,
    hidden_units=[1024, 512,256,128],
    optimizer=tf.train.ProximalAdagradOptimizer(
      learning_rate=0.001,
      l1_regularization_strength=0.001
    ))

train_spec=tf.estimator.TrainSpec(input_fn=lambda:train_input_fn(training_df,training_labels,100))
eval_spec = tf.estimator.EvalSpec(input_fn=lambda:validation_input_fn(validation_df,validation_labels,1))

tf.estimator.train_and_evaluate(
    estimator=estimator,
    train_spec=train_spec,
    eval_spec=eval_spec
)

results=estimator.evaluate(input_fn=lambda:validation_input_fn(validation_df,validation_labels,1))

results

W0901 20:32:31.527804 140487420921664 estimator.py:1811] Using temporary folder as model directory: /tmp/tmpelki_nmh


{'average_loss': 0.023675624,
 'label/mean': 12.01604,
 'loss': 0.023675624,
 'prediction/mean': 12.059729,
 'global_step': 7280}

In [113]:
predictions=estimator.predict(input_fn=lambda:eval_input_fn(testing_df,1))
idi=1461
data=[]
for pre_dict in predictions:
    data.append([idi,np.exp(pre_dict['predictions'][0])])
    idi+=1
    

data=pd.DataFrame(data, columns=['Id','SalePrice'])

data.to_csv('HousePredictions2.csv', index=None,header=True)

[[1461, 11.873117],
 array([            inf, 121463.00818353]),
 [1463, 12.079979],
 [1464, 12.059605],
 [1465, 12.11497],
 [1466, 12.075922],
 [1467, 11.985489],
 [1468, 12.084193],
 [1469, 12.031033],
 [1470, 11.915199],
 [1471, 12.078215],
 [1472, 11.928314],
 [1473, 11.921216],
 [1474, 11.93084],
 [1475, 11.965329],
 [1476, 12.242445],
 [1477, 12.167918],
 [1478, 12.161111],
 [1479, 12.204445],
 [1480, 12.221526],
 [1481, 12.15078],
 [1482, 12.1492405],
 [1483, 12.116211],
 [1484, 12.130819],
 [1485, 12.041662],
 [1486, 12.09664],
 [1487, 12.132649],
 [1488, 12.089973],
 [1489, 12.153415],
 [1490, 12.109482],
 [1491, 12.131927],
 [1492, 11.863215],
 [1493, 12.03104],
 [1494, 12.042598],
 [1495, 12.068479],
 [1496, 12.114048],
 [1497, 12.1436405],
 [1498, 12.134251],
 [1499, 12.111699],
 [1500, 12.108462],
 [1501, 12.096973],
 [1502, 12.082088],
 [1503, 12.211459],
 [1504, 12.112414],
 [1505, 12.11787],
 [1506, 11.971339],
 [1507, 12.091598],
 [1508, 12.009553],
 [1509, 11.9970665],

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,5.203879,14.699651,Pave,,Reg,Lvl,AllPub,...,0.0,,,,0.0,2,2008,WD,Normal,12.247694
1,2,20,RL,5.518456,15.016794,Pave,,Reg,Lvl,AllPub,...,0.0,,,,0.0,5,2007,WD,Normal,12.109011
2,3,60,RL,5.271613,15.416697,Pave,,IR1,Lvl,AllPub,...,0.0,,,,0.0,9,2008,WD,Normal,12.317167
3,4,70,RL,5.084572,15.003735,Pave,,IR1,Lvl,AllPub,...,0.0,,,,0.0,2,2006,WD,Abnorml,11.849398
4,5,60,RL,5.593439,16.026454,Pave,,IR1,Lvl,AllPub,...,0.0,,,,0.0,12,2008,WD,Normal,12.429216
