# Setup

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import xgboost
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error
from joblib import dump

In [None]:
X_train_all_indices = np.load('../data/X_train_all_indices.npy')
y_train = np.load('../data/y_train.npy')

# Model Training

In [None]:
model_all_indices = xgboost.XGBRegressor(objective ='reg:absoluteerror')

param_grid = {
    'learning_rate': [0.01, 0.05, 0.1],
    'colsample_bytree': [0.75, 1.0],
    'reg_lambda': [0.5, 1.0],
    'n_estimators': [250, 500, 750],
    'max_depth': [5, 10, None],
}

grid_search = GridSearchCV(model_all_indices, param_grid, cv=3, n_jobs=-1, verbose=1, scoring='neg_mean_absolute_error')
grid_search.fit(X_train_all_indices, y_train)

In [None]:
print(f"Best parameters: {grid_search.best_params_}")
model_all_indices = xgboost.XGBRegressor(
    objective ='reg:absoluteerror', 
    colsample_bytree = 0.75, 
    learning_rate = 0.1,
    max_depth = None,
    reg_lambda = 1.0,
    n_estimators = 750)
model_all_indices.fit(X_train_all_indices, y_train)
dump(model_all_indices, '../models/xgboost.joblib')

In [None]:
model_all_indices = xgboost.XGBRegressor(objective ='reg:absoluteerror')

param_grid = {
    'learning_rate': [0.1, 0.25, 0.5],
    'colsample_bytree': [0.5, 0.75],
    'reg_lambda': [1.0, 5.0],
    'n_estimators': [750, 1000, 1250],
    'max_depth': [None],
}

grid_search = GridSearchCV(model_all_indices, param_grid, cv=3, n_jobs=-1, verbose=1, scoring='neg_mean_absolute_error')
grid_search.fit(X_train_all_indices, y_train)

In [None]:
print(f"Best parameters: {grid_search.best_params_}")
model_all_indices = xgboost.XGBRegressor(
    objective ='reg:absoluteerror', 
    colsample_bytree = 0.5, 
    learning_rate = 0.1,
    max_depth = None,
    reg_lambda = 5.0,
    n_estimators = 750)
model_all_indices.fit(X_train_all_indices, y_train)
dump(model_all_indices, '../models/xgboost.joblib')

In [None]:
model_all_indices = xgboost.XGBRegressor(objective ='reg:absoluteerror')

param_grid = {
    'learning_rate': [0.1],
    'colsample_bytree': [0.3, 0.4, 0.5],
    'reg_lambda': [2.5, 5.0, 10.0, 20.0],
    'n_estimators': [750],
    'max_depth': [None],
}

grid_search = GridSearchCV(model_all_indices, param_grid, cv=3, n_jobs=-1, verbose=1, scoring='neg_mean_absolute_error')
grid_search.fit(X_train_all_indices, y_train)

In [None]:
print(f"Best parameters: {grid_search.best_params_}")
model_all_indices = xgboost.XGBRegressor(
    objective ='reg:absoluteerror', 
    colsample_bytree = 0.5, 
    learning_rate = 0.1,
    max_depth = None,
    reg_lambda = 20.0,
    n_estimators = 750)
model_all_indices.fit(X_train_all_indices, y_train)
dump(model_all_indices, '../models/xgboost.joblib')

In [None]:
model_all_indices = xgboost.XGBRegressor(objective ='reg:absoluteerror')

param_grid = {
    'learning_rate': [0.1],
    'colsample_bytree': [0.5],
    'reg_lambda': [20.0, 40.0, 60.0],
    'n_estimators': [750],
    'max_depth': [None],
}

grid_search = GridSearchCV(model_all_indices, param_grid, cv=3, n_jobs=-1, verbose=1, scoring='neg_mean_absolute_error')
grid_search.fit(X_train_all_indices, y_train)

In [None]:
print(f"Best parameters: {grid_search.best_params_}")
model_all_indices = xgboost.XGBRegressor(
    objective ='reg:absoluteerror', 
    colsample_bytree = 0.5, 
    learning_rate = 0.1,
    max_depth = None,
    reg_lambda = 60.0,
    n_estimators = 750)
model_all_indices.fit(X_train_all_indices, y_train)
dump(model_all_indices, '../models/xgboost.joblib')