In [None]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split, GridSearchCV

In [None]:
df = pd.read_csv('ames_processed.csv', na_values=[], keep_default_na=False)

In [None]:
X = df.drop(columns=['Sale Price'])
y = df['Sale Price'].values

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

In [None]:
xgb_model = xgb.XGBRegressor(objective='reg:squarederror')

clf = GridSearchCV(
    xgb_model,
    {
        'subsample': np.linspace(0.05, 1, num=5),
        'colsample_bytree': np.linspace(0.05, 1, num=5),
        'max_depth': [2, 4, 6, 8],
        'n_estimators': [50, 100, 200, 400]
    }, 
    verbose=2,
    n_jobs=4
)

clf.fit(X_train, y_train)

In [None]:
model = clf.best_estimator_

In [None]:
clf.best_params_

In [None]:
clf.best_score_

In [None]:
y_pred = model.predict(X_train)
u = ((y_train - y_pred)** 2).sum()
v = ((y_train - y_train.mean()) ** 2).sum()
1 - (u / v)

In [None]:
model.save_model('ames_model.json')