In [1]:
import numpy as np
import pandas as pd
from sklearn import linear_model
from sklearn.preprocessing import PolynomialFeatures
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors.nearest_centroid import NearestCentroid
from sklearn.neural_network import MLPClassifier
from xgboost import XGBRegressor
from sklearn.impute import SimpleImputer

In [2]:
train = pd.DataFrame(pd.read_csv('train.csv'))
y = train['SalePrice']
non_imputed_train = train.drop(['Id', 'SalePrice'], axis=1)
non_imputed_test = pd.DataFrame(pd.read_csv('test.csv')).drop('Id', axis=1)

In [3]:
train = non_imputed_train.copy()
test = non_imputed_test.copy()

cols_with_missing = (col for col in non_imputed_train.columns if non_imputed_train[col].isnull().any())
for col in cols_with_missing:
    train[col + '_was_missing'] = train[col].isnull()
    test[col + '_was_missing'] = test[col].isnull()

In [4]:
train.head()

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,...,Electrical_was_missing,FireplaceQu_was_missing,GarageType_was_missing,GarageYrBlt_was_missing,GarageFinish_was_missing,GarageQual_was_missing,GarageCond_was_missing,PoolQC_was_missing,Fence_was_missing,MiscFeature_was_missing
0,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,Inside,...,False,True,False,False,False,False,False,True,True,True
1,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,FR2,...,False,False,False,False,False,False,False,True,True,True
2,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,Inside,...,False,False,False,False,False,False,False,True,True,True
3,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,Corner,...,False,False,False,False,False,False,False,True,True,True
4,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,FR2,...,False,False,False,False,False,False,False,True,True,True


In [5]:
test.head()

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,...,Electrical_was_missing,FireplaceQu_was_missing,GarageType_was_missing,GarageYrBlt_was_missing,GarageFinish_was_missing,GarageQual_was_missing,GarageCond_was_missing,PoolQC_was_missing,Fence_was_missing,MiscFeature_was_missing
0,20,RH,80.0,11622,Pave,,Reg,Lvl,AllPub,Inside,...,False,True,False,False,False,False,False,True,False,True
1,20,RL,81.0,14267,Pave,,IR1,Lvl,AllPub,Corner,...,False,True,False,False,False,False,False,True,True,False
2,60,RL,74.0,13830,Pave,,IR1,Lvl,AllPub,Inside,...,False,False,False,False,False,False,False,True,False,True
3,60,RL,78.0,9978,Pave,,IR1,Lvl,AllPub,Inside,...,False,False,False,False,False,False,False,True,True,True
4,120,RL,43.0,5005,Pave,,IR1,HLS,AllPub,Inside,...,False,True,False,False,False,False,False,True,True,True


In [6]:
train = pd.get_dummies(train)
test = pd.get_dummies(test)
train, test = train.align(test, join='left', axis=1)

In [7]:
my_imputer = SimpleImputer()
train = my_imputer.fit_transform(train)
test = my_imputer.transform(test)

In [8]:
model_br = linear_model.BayesianRidge()
model_br.fit(train, y)
predict_br = model_br.predict(test)

In [9]:
model_svr = svm.SVR()
model_svr.fit(train, y)
predict_svr = model_svr.predict(test)



In [10]:
model_nc = NearestCentroid()
model_nc.fit(train, y)
predict_nc = model_nc.predict(test)

In [11]:
model_mlp = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)
model_mlp.fit(train, y)
predict_mlp = model_mlp.predict(test)

In [12]:
indices = np.arange(1461, 1461+len(test))

In [13]:
put = pd.DataFrame({'Id':indices, 'SalePrice':predict_br})
put.to_csv('predict_br.csv', mode='a', index=False)

In [14]:
put = pd.DataFrame({'Id':indices, 'SalePrice':predict_svr})
put.to_csv('predict_svr.csv', mode='a', index=False)

In [15]:
put = pd.DataFrame({'Id':indices, 'SalePrice':predict_nc})
put.to_csv('predict_nc.csv', mode='a', index=False)

In [16]:
put = pd.DataFrame({'Id':indices, 'SalePrice':predict_mlp})
put.to_csv('predict_mlp.csv', mode='a', index=False)