# Install packages

In [None]:
# %pip install catboost==1.0.6
# %pip install scikit-learn==0.24

# Import package

In [None]:
import pandas as pd
import numpy as np
import pickle

from sklearn.metrics import mean_squared_error

from catboost import CatBoostRegressor , Pool

# Load train/dev data

In [None]:
df_train_FE=pd.read_csv('../data/W207_train_FE.csv')
df_dev_FE=pd.read_csv('../data/W207_dev_FE.csv')
df_train_FE.head()

# Trim the features

In [None]:
feature_list=['MiscVal',
 'LotFrontage',
 'FEN_IndoorArea',
 'FEN_YearAfterRemode',
 'FEN_YearAfterbuilt',
 'FEN_TotRmsAbvGrd_Per_GrLivArea',
 'FEN_BsmtTotalbath',
 'FEN_GrLivTotalbath',
 'FEN_Area_comp0',
 'FEN_Area_comp1',
 'FEN_Area_comp2',
 'FEN_Area_comp3',
 'FEN_Area_comp4',
 'FEN_Area_comp5',
 'FEN_Area_comp7',
 'MSSubClass',
 'MSZoning',
 'Street',
 'LandContour',
 'Utilities',
 'LotConfig',
 'LandSlope',
 'Neighborhood',
 'Condition1',
 'Condition2',
 'BldgType',
 'HouseStyle',
 'RoofStyle',
 'RoofMatl',
 'Exterior1st',
 'Exterior2nd',
 'MasVnrType',
 'Foundation',
 'Heating',
 'CentralAir',
 'Electrical',
 'GarageType',
 'SaleType',
 'SaleCondition',
 'MiscFeature',
 'ExterQual',
 'LotShape',
 'ExterCond',
 'BsmtQual',
 'BsmtCond',
 'BsmtExposure',
 'BsmtFinType1',
 'BsmtFinType2',
 'HeatingQC',
 'Functional',
 'KitchenQual',
 'GarageQual',
 'GarageFinish',
 'GarageCond',
 'PavedDrive',
 'PoolQC',
 'Fence',
 'FireplaceQu',
 'Alley',
 'OverallQual',
 'OverallCond',
 'BsmtFullBath',
 'BsmtHalfBath',
 'FullBath',
 'HalfBath',
 'BedroomAbvGr',
 'KitchenAbvGr',
 'TotRmsAbvGrd',
 'Fireplaces',
 'GarageCars',
 'YearBuilt',
 'YearRemodAdd',
 'MoSold',
 'YrSold',
 'GarageYrBlt',
 'FEC_ExterQual_dual',
 'FEC_LotShape_dual',
 'FEC_ExterCond_dual',
 'FEC_BsmtQual_dual',
 'FEC_BsmtCond_dual',
 'FEC_BsmtExposure_dual',
 'FEC_BsmtFinType1_dual',
 'FEC_BsmtFinType2_dual',
 'FEC_HeatingQC_dual',
 'FEC_Functional_dual',
 'FEC_KitchenQual_dual',
 'FEC_GarageQual_dual',
 'FEC_GarageFinish_dual',
 'FEC_GarageCond_dual',
 'FEC_PavedDrive_dual',
 'FEC_PoolQC_dual',
 'FEC_Fence_dual',
 'FEC_FireplaceQu_dual',
 'FEC_Alley_dual',
 'FEC_OverallQual_dual',
 'FEC_OverallCond_dual',
 'FEC_BsmtFullBath_dual',
 'FEC_BsmtHalfBath_dual',
 'FEC_FullBath_dual',
 'FEC_HalfBath_dual',
 'FEC_BedroomAbvGr_dual',
 'FEC_KitchenAbvGr_dual',
 'FEC_TotRmsAbvGrd_dual',
 'FEC_Fireplaces_dual',
 'FEC_GarageCars_dual',
 'FEC_YearBuilt_dual',
 'FEC_YearRemodAdd_dual',
 'FEC_MoSold_dual',
 'FEC_YrSold_dual',
 'FEC_GarageYrBlt_dual',
 'FEC_LotArea_dual',
 'FEC_MasVnrArea_dual',
 'FEC_BsmtFinSF1_dual',
 'FEC_BsmtFinSF2_dual',
 'FEC_BsmtUnfSF_dual',
 'FEC_TotalBsmtSF_dual',
 'FEC_1stFlrSF_dual',
 'FEC_2ndFlrSF_dual',
 'FEC_LowQualFinSF_dual',
 'FEC_GrLivArea_dual',
 'FEC_GarageArea_dual',
 'FEC_WoodDeckSF_dual',
 'FEC_OpenPorchSF_dual',
 'FEC_EnclosedPorch_dual',
 'FEC_3SsnPorch_dual',
 'FEC_ScreenPorch_dual',
 'FEC_PoolArea_dual',
 'FEC_MiscVal_dual',
 'FEC_LotFrontage_dual']

col_target='FE_SalePrice_Per_IndoorArea'

col_cat_final=['MSSubClass',
 'MSZoning',
 'Street',
 'LandContour',
 'Utilities',
 'LotConfig',
 'LandSlope',
 'Neighborhood',
 'Condition1',
 'Condition2',
 'BldgType',
 'HouseStyle',
 'RoofStyle',
 'RoofMatl',
 'Exterior1st',
 'Exterior2nd',
 'MasVnrType',
 'Foundation',
 'Heating',
 'CentralAir',
 'Electrical',
 'GarageType',
 'SaleType',
 'SaleCondition',
 'MiscFeature',
 'ExterQual',
 'LotShape',
 'ExterCond',
 'BsmtQual',
 'BsmtCond',
 'BsmtExposure',
 'BsmtFinType1',
 'BsmtFinType2',
 'HeatingQC',
 'Functional',
 'KitchenQual',
 'GarageQual',
 'GarageFinish',
 'GarageCond',
 'PavedDrive',
 'PoolQC',
 'Fence',
 'FireplaceQu',
 'Alley',
 'OverallQual',
 'OverallCond',
 'BsmtFullBath',
 'BsmtHalfBath',
 'FullBath',
 'HalfBath',
 'BedroomAbvGr',
 'KitchenAbvGr',
 'TotRmsAbvGrd',
 'Fireplaces',
 'GarageCars',
 'YearBuilt',
 'YearRemodAdd',
 'MoSold',
 'YrSold',
 'GarageYrBlt']
       


# rmse function for the evaluation confirmation
def rmse(validation, target):
    return np.sqrt(mean_squared_error(validation, target))

In [None]:
X_train=df_train_FE[feature_list]
y_train=df_train_FE[col_target].to_numpy().ravel()



X_valid=df_dev_FE[feature_list]
y_valid=df_dev_FE[col_target].to_numpy().ravel()

# Build the model

In [None]:
# initialize Pool
train_pool = Pool(X_train, 
                  y_train, 
                  cat_features=col_cat_final)
test_pool = Pool(X_valid, 
                 y_valid,
                 cat_features=col_cat_final)

In [None]:
param={'boosting_type': 'Plain',
'iterations':700, 
'random_seed':42,
'depth': 5,
'l2_leaf_reg': 5.464804976966055,
'learning_rate': 0.08943915485210002,
'logging_level': 'Silent',
'loss_function': 'RMSE',
'max_ctr_complexity': 6,
'random_seed': 4,
'task_type': 'CPU'
}

In [None]:
model = CatBoostRegressor(**param,)
model.fit(train_pool, verbose=0)

In [None]:
preds = model.predict(train_pool)*X_train['FEN_IndoorArea']
rmse(df_train_FE['SalePrice'], preds)

In [None]:
preds = model.predict(test_pool)*X_valid['FEN_IndoorArea']
rmse(df_dev_FE['SalePrice'], preds)

# Save the model

In [None]:
pickle.dump(model, open("./model.pkl","wb"))