In [None]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [None]:
df_train = pd.read_csv('AmesHousingPreprocessedTrain.csv', na_values=[], keep_default_na=False)

In [None]:
X_train = df_train.drop(columns=['Sale Price'])
y_train = df_train['Sale Price'].values

In [None]:
xgb_model = xgb.XGBRegressor(objective='reg:squarederror', random_state=1)

clf = GridSearchCV(
    xgb_model,
    {
        'subsample': np.linspace(0.05, 1, num=5),
        'colsample_bytree': np.linspace(0.05, 1, num=5),
        'max_depth': [2, 4, 6, 8],
        'n_estimators': [100, 200, 400, 800]
    }, 
    verbose=2,
    cv=KFold(n_splits=5, shuffle=True, random_state=2),
    scoring='neg_mean_squared_error',
    n_jobs=4
)

clf.fit(X_train, y_train)

In [None]:
model = clf.best_estimator_

In [None]:
clf.best_params_

In [None]:
clf.best_score_

In [None]:
y_pred = model.predict(X_train)

In [None]:
mean_absolute_error(y_train, y_pred)

In [None]:
mean_squared_error(y_train, y_pred)

In [None]:
model.score(X_train, y_train)

In [None]:
model.save_model('ames_model.json')