#### ID : AI23
#### Submission Date : 29th December, 2017


In [61]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import make_scorer
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

In [2]:
def rmsle(y_pred, y_true): 
    
    return np.sqrt(np.square(np.log(y_pred + 1) - np.log(y_true + 1)).mean())

In [62]:
train_df=pd.read_csv('train.csv')
test_df=pd.read_csv('test.csv')
train_ID = train_df['Id']
test_ID = test_df['Id']
train_df.shape

(1460, 81)

In [63]:
ntrain = train_df.shape[0]
ntest = test_df.shape[0]
y_train = train_df.SalePrice.values
all_data = pd.concat((train_df, test_df)).reset_index(drop=True)
all_data.drop(['SalePrice'], axis=1, inplace=True)
print("all_data size is : {}".format(all_data.shape))
print(train_df.shape,test_df.shape)

all_data size is : (2919, 80)
(1460, 81) (1459, 80)


In [64]:
all_data = all_data.fillna(0)
total = all_data.isnull().sum().sort_values(ascending=False)
percent = (all_data.isnull().sum()/all_data.isnull().count()).sort_values(ascending=False)
missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
missing_data

Unnamed: 0,Total,Percent
YrSold,0,0.0
YearRemodAdd,0,0.0
ExterCond,0,0.0
ExterQual,0,0.0
Exterior1st,0,0.0
Exterior2nd,0,0.0
Fence,0,0.0
FireplaceQu,0,0.0
Fireplaces,0,0.0
Foundation,0,0.0


In [65]:
all_data['MSSubClass'] = all_data['MSSubClass'].astype('category')
all_data['OverallCond'] = all_data['OverallCond'].astype('category')
all_data['YrSold'] = all_data['YrSold'].astype('category')
all_data['MoSold'] = all_data['MoSold'].astype('category')


In [66]:
categoric_feats = list(test_df.dtypes[test_df.dtypes == "object"].index)
for items in categoric_feats:
    all_data[items]=all_data[items].astype('category').cat.codes
len(categoric_feats)

43

In [67]:
all_data.dtypes.unique()

array([dtype('int64'), dtype('int8'), dtype('float64'), category], dtype=object)

In [68]:
all_data.drop(['Id'], axis=1, inplace=True)
train_data = all_data[:ntrain]
test_data = all_data[ntrain:]
print(train_data.shape,test_data.shape)

(1460, 79) (1459, 79)


In [69]:
scaler=StandardScaler()
X_train_scaled=scaler.fit_transform(train_data)
X_test_scaled=scaler.fit_transform(test_data)
y_train=np.log(y_train)

In [11]:
layer=[]
a=0
for i in range(1,4):
    for j in [10,20,30,40,50]:
        a=j
        layer.append(tuple((np.ones(i)*j).astype(int)))
len(layer)

15

In [70]:
ann=MLPRegressor(hidden_layer_sizes=(40,20,10),max_iter=1000,verbose=False)
param_grid={'activation':['relu','identity'],
            'solver':['lbfgs','adam'],
            'learning_rate':['constant','adaptive'],
            'alpha':[0.0001],
            'tol':[.0001]}
#grid = GridSearchCV(ann, param_grid, cv=5, scoring=make_scorer(mean_squared_error),n_jobs = -1)
grid = GridSearchCV(ann, param_grid, cv=10, scoring='r2')

grid.fit(X_train_scaled,y_train)
print(grid.best_score_)
print(grid.best_params_)
print(grid.best_estimator_)

0.656191008525
{'activation': 'relu', 'alpha': 0.0001, 'learning_rate': 'adaptive', 'solver': 'lbfgs', 'tol': 0.0001}
MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(40, 20, 10), learning_rate='adaptive',
       learning_rate_init=0.001, max_iter=1000, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='lbfgs', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)


In [72]:
test_y_grid = pd.DataFrame()
test_y_grid['Id'] = test_ID
test_y_grid['SalePrice'] = np.exp(grid.predict(X_test_scaled))
test_y_grid.to_csv('submission_grid.csv',index=0)

#### CNN

In [11]:
import keras
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.constraints import maxnorm
from keras.optimizers import SGD
from keras.layers.convolutional import Conv2D, Conv1D
from keras.layers.convolutional import MaxPooling2D,MaxPooling1D
from keras.utils import np_utils
from keras.layers import Embedding
from keras.utils.np_utils import to_categorical

Using TensorFlow backend.


In [35]:
def CNN(train_x_scaled_sample):
    model = Sequential()
    model.add(Conv1D(200, 1,strides=1, activation='relu', input_shape=(1,train_x_scaled_sample.shape[1])))
    model.add(Conv1D(200, 1,strides =1, activation='relu'))
    model.add(MaxPooling1D(1))
    model.add(Conv1D(100,1,strides=1, activation='relu'))
    model.add(Conv1D(100, 1,strides=1 ,activation='relu'))
    model.add(MaxPooling1D(1))
    model.add(Dropout(0.5))
    model.add(Dense(1,activation='relu'))
    return model
 

In [41]:
x=X_train_scaled
x_reshaped = np.reshape(x,(x.shape[0],1,x.shape[1]))
x_reshaped.shape

(1460, 1, 80)

In [42]:
y=y_train
y_reshape = np.reshape(y,(y.shape[0],1,-1))
y_reshape.shape

(1460, 1, 1)

In [38]:
model = CNN(x)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_5 (Conv1D)            (None, 1, 200)            16200     
_________________________________________________________________
conv1d_6 (Conv1D)            (None, 1, 200)            40200     
_________________________________________________________________
max_pooling1d_3 (MaxPooling1 (None, 1, 200)            0         
_________________________________________________________________
conv1d_7 (Conv1D)            (None, 1, 100)            20100     
_________________________________________________________________
conv1d_8 (Conv1D)            (None, 1, 100)            10100     
_________________________________________________________________
max_pooling1d_4 (MaxPooling1 (None, 1, 100)            0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 1, 100)            0         
__________

In [39]:
model.compile(loss='mean_squared_error', optimizer='adam')

In [40]:
nb_epochs = 250
model.fit(x_reshaped,y_reshape,epochs=nb_epochs,validation_split=0.1,verbose=1,batch_size=1)

ValueError: Error when checking input: expected conv1d_5_input to have 3 dimensions, but got array with shape (1460, 80)

In [28]:
test_x=X_test_scaled
test_x_reshape = np.reshape(test_x,(test_x.shape[0],1,test_x.shape[1]))
y_pred_cnn_reshape = model.predict(test_x_reshape)
y_pred_cnn_reshape.shape

(1459, 1, 1)

In [29]:
y_pred_cnn = np.reshape(y_pred_cnn_reshape,(-1,1))
y_pred_cnn.shape

(1459, 1)

In [30]:
y_pred_cnn = np.exp(y_pred_cnn)

In [31]:
y_pred_cnn

array([[ 120300.2890625],
       [ 182200.421875 ],
       [ 190931.46875  ],
       ..., 
       [ 167185.828125 ],
       [ 131926.59375  ],
       [ 236464.03125  ]], dtype=float32)

In [32]:
test_y_cnn = pd.DataFrame()
test_y_cnn['Id'] = test_data['Id']
test_y_cnn['SalePrice'] = y_pred_cnn

In [34]:
test_y_cnn.to_csv('submission_cnn.csv',index=0)