# Extreme Gradient Boost


### Initialization

In [3]:
from xgboost import XGBRegressor
from utility import start, train, display
import numpy as np

# Load data and split into train and test sets
X_train, X_test, y_train, y_test, df = start("encoded_data_04_13.csv")

# Define lists of hyperparameters to search through
n_estimators_list = [100, 500, 2000]
max_depth_list = [3, 6, 7]
learning_rate_list = [0.01, 0.03, 0.25]

best_score = np.inf
best_params = {}

# Iterate through hyperparameters
for n_estimators in n_estimators_list:
    for max_depth in max_depth_list:
        for learning_rate in learning_rate_list:
            print(f"Training with n_estimators={n_estimators}, max_depth={max_depth}, learning_rate={learning_rate}")
            
            # Initialize and train the model
            model = XGBRegressor(n_estimators=n_estimators, max_depth=max_depth, learning_rate=learning_rate)
            _, _, y_test_original, mae, mape, mae_train, mape_train, mape_train = train(model, X_train, X_test, y_train, y_test)
            
            
            # Check if the current model is the best so far
            if mae < best_score:
                best_score = mae
                best_params = {'n_estimators': n_estimators, 'max_depth': max_depth, 'learning_rate': learning_rate}
                
            print("Mean Absolute Percentage Error (MAPE):", round(mape, 2), "%. Training MAPE:", round(mape_train, 2), "%")

            


Training with n_estimators=100, max_depth=3, learning_rate=0.01
Training Set Scores:
Mean Absolute Error (MAE): 3.31 M
Mean Absolute Percentage Error (MAPE): 106.12 %

Test Set Scores:
Mean Absolute Error (MAE): 3.35 M
Mean Absolute Percentage Error (MAPE): 108.35 %
Mean Absolute Percentage Error (MAE): 3347894.925931291%
----------------------------------------------
Training with n_estimators=100, max_depth=3, learning_rate=0.03
Training Set Scores:
Mean Absolute Error (MAE): 2.51 M
Mean Absolute Percentage Error (MAPE): 71.23 %

Test Set Scores:
Mean Absolute Error (MAE): 2.55 M
Mean Absolute Percentage Error (MAPE): 72.42 %
Mean Absolute Percentage Error (MAE): 2546347.9365881104%
----------------------------------------------
Training with n_estimators=100, max_depth=3, learning_rate=0.25
Training Set Scores:
Mean Absolute Error (MAE): 1.93 M
Mean Absolute Percentage Error (MAPE): 57.25 %

Test Set Scores:
Mean Absolute Error (MAE): 1.97 M
Mean Absolute Percentage Error (MAPE): 58

In [16]:
print(best_params)
print(print(f"Mean Absolute Percentage Error (MAE): {best_score}"))

{'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.01}
Mean Absolute Percentage Error (MAE): 4707411.87951453
None
