#### ID : AI23
#### Submission Date : 29th December, 2017


In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import make_scorer
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

In [2]:
def rmsle(y_pred, y_true): 
    
    return np.sqrt(np.square(np.log(y_pred + 1) - np.log(y_true + 1)).mean())

In [3]:
train_df=pd.read_csv('train.csv')
test_df=pd.read_csv('test.csv')

In [4]:
ntrain = train_df.shape[0]
ntest = test_df.shape[0]
y_train = train_df.SalePrice.values
all_data = pd.concat((train_df, test_df)).reset_index(drop=True)
all_data.drop(['SalePrice'], axis=1, inplace=True)
print("df size is : {}".format(all_data.shape))
print(train_df.shape,test_df.shape)

df size is : (2919, 80)
(1460, 81) (1459, 80)


In [5]:
all_data = all_data.fillna(0)
total = all_data.isnull().sum().sort_values(ascending=False)
percent = (all_data.isnull().sum()/all_data.isnull().count()).sort_values(ascending=False)
missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
missing_data

Unnamed: 0,Total,Percent
YrSold,0,0.0
YearRemodAdd,0,0.0
ExterCond,0,0.0
ExterQual,0,0.0
Exterior1st,0,0.0
Exterior2nd,0,0.0
Fence,0,0.0
FireplaceQu,0,0.0
Fireplaces,0,0.0
Foundation,0,0.0


In [6]:
all_data['MSSubClass'] = all_data['MSSubClass'].astype('category')
all_data['OverallCond'] = all_data['OverallCond'].astype('category')
all_data['YrSold'] = all_data['YrSold'].astype('category')
all_data['MoSold'] = all_data['MoSold'].astype('category')


In [7]:
categoric_feats = list(test_df.dtypes[test_df.dtypes == "object"].index)
for items in categoric_feats:
    all_data[items]=all_data[items].astype('category').cat.codes
len(categoric_feats)

43

In [8]:
all_data.dtypes.unique()

array([dtype('int64'), dtype('int8'), dtype('float64'), category], dtype=object)

In [9]:
train_data = all_data[:ntrain]
test_data = all_data[ntrain:]
print(train_data.shape,test_data.shape)

(1460, 80) (1459, 80)


In [10]:
scaler=StandardScaler()
X_train_scaled=scaler.fit_transform(train_data)
X_test_scaled=scaler.fit_transform(test_data)
y_train=np.log(y_train)

In [11]:
layer=[]
a=0
for i in range(1,4):
    for j in [10,20,30,40,50]:
        a=j
        layer.append(tuple((np.ones(i)*j).astype(int)))
len(layer)

15

In [12]:
ann=MLPRegressor(hidden_layer_sizes=(40,20,10),max_iter=1000,verbose=False)
param_grid={'activation':['relu','identity'],
            'solver':['lbfgs','adam'],
            'learning_rate':['constant','adaptive'],
            'alpha':[0.0001],
            'tol':[.0001]}
#grid = GridSearchCV(ann, param_grid, cv=5, scoring=make_scorer(mean_squared_error),n_jobs = -1)
grid = GridSearchCV(ann, param_grid, cv=10, scoring='r2')

grid.fit(X_train_scaled,y_train)
print(grid.best_score_)
print(grid.best_params_)
print(grid.best_estimator_)

0.836477582821
{'activation': 'identity', 'alpha': 0.0001, 'learning_rate': 'adaptive', 'solver': 'lbfgs', 'tol': 0.0001}
MLPRegressor(activation='identity', alpha=0.0001, batch_size='auto',
       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(40, 20, 10), learning_rate='adaptive',
       learning_rate_init=0.001, max_iter=1000, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='lbfgs', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)


In [14]:
test_y_grid = pd.DataFrame()
test_y_grid['Id'] = test_data['Id']
test_y_grid['SalePrice'] = np.exp(grid.predict(X_test_scaled))
test_y_grid.to_csv('submission_grid.csv',index=0)