In [872]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import PolynomialFeatures
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRFRegressor
from sklearn.svm import SVR
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.feature_selection import mutual_info_regression


In [864]:
data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

In [865]:
numerical_features = data.select_dtypes(include=['int64','float64']).columns
categorical_features = data.select_dtypes(include=['category','bool','object']).columns

In [866]:
# X = data[['OverallQual','GrLivArea','GarageCars','GarageArea','TotalBsmtSF','1stFlrSF','FullBath','TotRmsAbvGrd','YearBuilt','YearRemodAdd','GarageYrBlt','MasVnrType',
# 'Fireplaces','BsmtFinSF1','LotFrontage','WoodDeckSF','2ndFlrSF','OpenPorchSF']]
# test_data = test_data[['OverallQual','GrLivArea','GarageCars','GarageArea','TotalBsmtSF','1stFlrSF','FullBath','TotRmsAbvGrd','YearBuilt','YearRemodAdd','GarageYrBlt','MasVnrType',
# 'Fireplaces','BsmtFinSF1','LotFrontage','WoodDeckSF','2ndFlrSF','OpenPorchSF']]

X = data.drop('SalePrice',axis=1)
y = data['SalePrice']



In [867]:
numerical_features = X.select_dtypes(include=['int64','float64']).columns
categorical_features = X.select_dtypes(include=['category','bool','object']).columns

In [868]:
X_train,X_validation,y_train,y_validation = train_test_split(X,y,test_size=0.2,random_state=0)

In [869]:
numerical_pipe = Pipeline(
    steps=[
        ('imputer',SimpleImputer(strategy='median')),
        ('scaler',StandardScaler())
    ]
)

categorical_pipe = Pipeline(
    steps=[
        ('imputer',SimpleImputer(strategy='most_frequent')),
        ('onehot',OneHotEncoder(handle_unknown='ignore'))
    ]
)

In [870]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num',numerical_pipe,numerical_features),
        ('cat',categorical_pipe,categorical_features)
    ]
)

In [871]:
reg = LinearRegression()
rf = RandomForestRegressor()
xgb = XGBRFRegressor()
svr = SVR()
models = [reg,rf,xgb,svr]
best_accuracy = 10000000000000
best_model = None
for model in models:
    pipe = Pipeline(
        steps=[
            ('preprocessor',preprocessor),
            ('model',model)
        ]
    )
    pipe.fit(X_train,y_train)
    y_val = pipe.predict(X_validation)
    acc = mean_squared_error(y_validation,y_val)
    if acc < best_accuracy:
        best_accuracy = acc
        best_model = pipe
print(f"The best model is {best_model} with squared mean loss of {best_accuracy}")
y_pred = best_model.predict(test_data)
result = pd.DataFrame(
    {
        "Id":list(range(1461,1461+len(y_pred))),
        "SalePrice" : y_pred
    }
    )
result.to_csv("submission.csv",index=False) 

The best model is Pipeline(steps=[('preprocessor',
                 ColumnTransformer(transformers=[('num',
                                                  Pipeline(steps=[('imputer',
                                                                   SimpleImputer(strategy='median')),
                                                                  ('scaler',
                                                                   StandardScaler())]),
                                                  Index(['Id', 'MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual',
       'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1',
       'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrS...
       'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation',
       'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2',
       'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual',
       'Functional', 'FireplaceQu', 'GarageType'