**Testing Gradient_Boosting_Optimization.py**

In [6]:
import pandas as pd
import sys, os, random
import importlib
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.metrics import make_scorer, recall_score
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

#Class Import
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "../.."))) #Allow for imports from src
from src.models import Gradient_Boosting_Optimization
importlib.reload(Gradient_Boosting_Optimization) #Ensures file is uptodate!
from src.models.Gradient_Boosting_Optimization import Gradient_Boosting_Optimization

#Set Seed
os.environ["PYTHONHASHSEED"] = "1945"
random.seed(1945)
np.random.seed(1945)

In [3]:
#Complied data of convoys
#Routes examined are HX, SC, OB, ON, ONS
df = pd.read_csv('/Users/matthewplambeck/Desktop/Convoy Predictor/data/processed/Complete_Convoy_Data.csv')
df = df.drop(columns=['Unnamed: 0'])
df.shape #Test

(1174, 21)

In [4]:
#Drop unecessary/redundent features
df = df.drop(columns=['Convoy Number', 'Number of Ships Sunk', 'Depart_Date', 'Arrival/Dispersal Date', 'Number of Escorts Sunk', \
                         'Number of Stragglers Sunk', 'Total Tons of Ships Sunk', 'Escort Sink Percentage', 'Straggler Sink Percentage'])
df.reset_index(drop=True).head(3)

Unnamed: 0,Number of Ships,Number of Escort Ships,Number of Stragglers,Total Tons of Convoy,Overall Sink Percentage,Avg Number of U-Boats in Atlantic,Escort Ratio,Time At Sea (Days),Month,Year,Previous Month Avg Sink %,Approx. Sighting Range
0,5.0,2.0,0.0,22877.0,0.0,6.0,0.4,3.0,9.0,1939.0,0.0,12.1902
1,5.0,2.0,0.0,22967.0,0.0,6.0,0.4,5.0,9.0,1939.0,0.0,12.1902
2,7.0,4.0,0.0,21293.0,0.0,6.0,0.571429,5.0,9.0,1939.0,0.0,14.434062


In [5]:
#Convert Overall Sink Percentage to binary 1( High)
df['Risk'] = (df['Overall Sink Percentage'] > 0).astype(int) 
#Risk is binary based off whether a ship was sunk while in a convoy:  (0 = No Ships Sunk, 1 = At Least One Ship Sunk)
X = np.array(df.drop(columns=['Overall Sink Percentage', 'Risk'])) #Remove Overall Sink Percentage as it leaks data
y = df['Risk'].values #Prediction value

In [7]:

gb_params = {
    "learning_rate": [0.01, 0.1],
    "n_estimators": [150, 300],
    "max_depth": [3, 5],
    "subsample": [0.8, 1.0],
    "min_samples_leaf": [1, 5]}

optimizer = Gradient_Boosting_Optimization(
    model=GradientBoostingClassifier(random_state=1945),
    parameter_grid=gb_params,
    cv_folds=5,
    positive_label=1,
    optimize_scoring="recall",
    auto_calibrate_threshold=True,
    threshold_beta=2.0,)


In [8]:
#Train Test Split
optimizer.train_test_split(X, y, train_size=0.8, random_state=1945)
train_scores, test_scores = optimizer.k_folds(stratified=True)


Average Train Score: 0.9212 ± 0.0059
Average Test Score: 0.8073 ± 0.0319


In [9]:
#Optimize
optimizer.optimize(scoring=None, fit_params={})



Best Hyperparameters Found:
{'learning_rate': 0.1, 'max_depth': 5, 'min_samples_leaf': 5, 'n_estimators': 150, 'subsample': 0.8}
Best Cross-Validation Score: 0.4230


In [13]:
#Evaluate
results = optimizer.evaluate(show_plots=False)
results["recall"], results["f2_score"], results["decision_threshold"]

Applied custom decision threshold: 0.350

GradientBoostingClassifier Evaluation:

Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.89      0.88       185
           1       0.56      0.50      0.53        50

    accuracy                           0.81       235
   macro avg       0.71      0.70      0.70       235
weighted avg       0.80      0.81      0.80       235


ROC AUC Score: 0.7830
Matthews Correlation Coefficient (MCC): 0.4076
Balanced Accuracy: 0.6959
Confusion Matrix:
          Predicted 0  Predicted 1
Actual 0          165           20
Actual 1           25           25
Recall (positive=1): 0.5000
F2 Score: 0.5102
False Negatives: 25
GradientBoostingClassifier Confusion Matrix (values only):
[[165  20]
 [ 25  25]]


(0.5, 0.5102040816326531, 0.35)

In [11]:
optimizer.set_decision_threshold(0.35) #Test manual threshold for compairson
threshold_results = optimizer.evaluate(show_plots=False)

Applied custom decision threshold: 0.350

GradientBoostingClassifier Evaluation:

Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.89      0.88       185
           1       0.56      0.50      0.53        50

    accuracy                           0.81       235
   macro avg       0.71      0.70      0.70       235
weighted avg       0.80      0.81      0.80       235


ROC AUC Score: 0.7830
Matthews Correlation Coefficient (MCC): 0.4076
Balanced Accuracy: 0.6959
Confusion Matrix:
          Predicted 0  Predicted 1
Actual 0          165           20
Actual 1           25           25
Recall (positive=1): 0.5000
F2 Score: 0.5102
False Negatives: 25
GradientBoostingClassifier Confusion Matrix (values only):
[[165  20]
 [ 25  25]]


In [16]:
#TODO Fix this
xgb_params = {
    "eta": [0.05, 0.1],
    "max_depth": [3, 6],
    "subsample": [0.8, 1.0],
    "colsample_bytree": [0.7, 1.0],
    "gamma": [0, 1],
    "scale_pos_weight": [1, 3],}

xgb_optimizer = Gradient_Boosting_Optimization(
    model=XGBClassifier(
        objective="binary:logistic",
        eval_metric="logloss",
        use_label_encoder=False,
        random_state=1945,
        n_estimators=500),
    parameter_grid=xgb_params,
    positive_label=1,
    optimize_scoring="recall",
    auto_calibrate_threshold=True,
    threshold_beta=2.0,)

xgb_optimizer.train_test_split(X, y, train_size=0.8, random_state=1945)
xgb_optimizer.optimize(
    fit_params={
        "eval_set": [(xgb_optimizer.X_test, xgb_optimizer.y_test)],
        "early_stopping_rounds": 25,
        "verbose": False,})
xgb_results = xgb_optimizer.evaluate(show_plots=False)

ValueError: 
All the 320 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
320 fits failed with the following error:
Traceback (most recent call last):
  File "/opt/homebrew/Caskroom/miniforge/base/envs/Python-ML/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 859, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/opt/homebrew/Caskroom/miniforge/base/envs/Python-ML/lib/python3.10/site-packages/xgboost/core.py", line 705, in inner_f
    return func(**kwargs)
TypeError: XGBClassifier.fit() got an unexpected keyword argument 'early_stopping_rounds'


In [None]:
#Old Results (Tested in Algorithm_Test_2 Notebook)
# GradientBoostingClassifier Evaluation:

# Classification Report:
#               precision    recall  f1-score   support

#            0       0.86      0.95      0.90       185
#            1       0.71      0.44      0.54        50

#     accuracy                           0.84       235
#    macro avg       0.79      0.70      0.72       235
# weighted avg       0.83      0.84      0.83       235


# ROC AUC Score: 0.8351
# Matthews Correlation Coefficient (MCC): 0.4733
# Balanced Accuracy: 0.6957
# GradientBoostingClassifier Confusion Matrix (values only):
# [[176   9]
#  [ 28  22]]

#New Results from above:

# Applied custom decision threshold: 0.350

# GradientBoostingClassifier Evaluation:

# Classification Report:
#               precision    recall  f1-score   support

#            0       0.87      0.89      0.88       185
#            1       0.56      0.50      0.53        50

#     accuracy                           0.81       235
#    macro avg       0.71      0.70      0.70       235
# weighted avg       0.80      0.81      0.80       235


# ROC AUC Score: 0.7830
# Matthews Correlation Coefficient (MCC): 0.4076
# Balanced Accuracy: 0.6959
# Confusion Matrix:
#           Predicted 0  Predicted 1
# Actual 0          165           20
# Actual 1           25           25
# Recall (positive=1): 0.5000
# F2 Score: 0.5102
# False Negatives: 25
# GradientBoostingClassifier Confusion Matrix (values only):
# [[165  20]
#  [ 25  25]]

**Further Testing as Results did not improve as I had hoped :(**

In [18]:
#
# Baseline ML_Class_1

#Class Import
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "../.."))) #Allow for imports from src
from src.models import ML_Class_1
importlib.reload(ML_Class_1) #Ensures file is uptodate!
from src.models.ML_Class_1 import Model_Tester as LegacyTester

legacy = LegacyTester(
    model=GradientBoostingClassifier(random_state=1945),
    parameter_grid=None,
    cv_folds=5)

legacy.train_test_split(X, y, train_size=0.8, random_state=1945)
legacy.optimize()  #no grid search
legacy_results = legacy.evaluate(show_plots=False)
print("Legacy recall:", legacy_results["classification_report"]["1"]["recall"])



GradientBoostingClassifier Evaluation:

Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.95      0.90       185
           1       0.71      0.44      0.54        50

    accuracy                           0.84       235
   macro avg       0.79      0.70      0.72       235
weighted avg       0.83      0.84      0.83       235


ROC AUC Score: 0.8351
Matthews Correlation Coefficient (MCC): 0.4733
Balanced Accuracy: 0.6957
GradientBoostingClassifier Confusion Matrix (values only):
[[176   9]
 [ 28  22]]
Legacy recall: 0.44


In [19]:
#Gradient_Boosting_Optimization, default GB (no grid or threshold)

gb_default = Gradient_Boosting_Optimization(
    model=GradientBoostingClassifier(random_state=1945),
    parameter_grid=None,
    cv_folds=5,
    positive_label=1,
    optimize_scoring="recall",
    auto_calibrate_threshold=False)  #No threshold optimization


gb_default.train_test_split(X, y, train_size=0.8, random_state=1945)
gb_default.optimize()
gb_default_results = gb_default.evaluate(show_plots=False)
print("GB default recall:", gb_default_results["classification_report"]["1"]["recall"])



GradientBoostingClassifier Evaluation:

Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.95      0.90       185
           1       0.71      0.44      0.54        50

    accuracy                           0.84       235
   macro avg       0.79      0.70      0.72       235
weighted avg       0.83      0.84      0.83       235


ROC AUC Score: 0.8351
Matthews Correlation Coefficient (MCC): 0.4733
Balanced Accuracy: 0.6957
Confusion Matrix:
          Predicted 0  Predicted 1
Actual 0          176            9
Actual 1           28           22
Recall (positive=1): 0.4400
F2 Score: 0.4762
False Negatives: 28
GradientBoostingClassifier Confusion Matrix (values only):
[[176   9]
 [ 28  22]]
GB default recall: 0.44


In [20]:
#Gradient_Boosting_Optimization, grid-search & F2 threshold optimization
gb_grid = Gradient_Boosting_Optimization(
    model=GradientBoostingClassifier(random_state=1945),
    parameter_grid={
        "learning_rate": [0.01, 0.05, 0.1],
        "n_estimators": [150, 250, 350],
        "max_depth": [3, 4, 5],
        "min_samples_leaf": [1, 3, 5],
        "subsample": [0.8, 1.0]},
    cv_folds=5,
    positive_label=1,
    optimize_scoring="recall",
    auto_calibrate_threshold=True,
    threshold_beta=2.0)

gb_grid.train_test_split(X, y, train_size=0.8, random_state=1945)
gb_grid.optimize()
gb_grid_results = gb_grid.evaluate(show_plots=False)

print("Baseline recall:", legacy_results["classification_report"]["1"]["recall"])
print("Default GB recall:", gb_default_results["classification_report"]["1"]["recall"])
print("Grid-search recall:", gb_grid_results["recall"])
print("Grid search best params:", gb_grid.best_model.get_params())
print("Grid search decision threshold:", gb_grid_results["decision_threshold"])



Best Hyperparameters Found:
{'learning_rate': 0.1, 'max_depth': 4, 'min_samples_leaf': 3, 'n_estimators': 350, 'subsample': 1.0}
Best Cross-Validation Score: 0.4432
Applied custom decision threshold: 0.737

GradientBoostingClassifier Evaluation:

Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.96      0.89       185
           1       0.68      0.30      0.42        50

    accuracy                           0.82       235
   macro avg       0.76      0.63      0.66       235
weighted avg       0.80      0.82      0.79       235


ROC AUC Score: 0.7835
Matthews Correlation Coefficient (MCC): 0.3683
Balanced Accuracy: 0.6311
Confusion Matrix:
          Predicted 0  Predicted 1
Actual 0          178            7
Actual 1           35           15
Recall (positive=1): 0.3000
F2 Score: 0.3378
False Negatives: 35
GradientBoostingClassifier Confusion Matrix (values only):
[[178   7]
 [ 35  15]]
Baseline recall: 0.44
Default GB rec

In [21]:
#Threshold Comparison: 
thresholds = [0.20, 0.25, 0.30, 0.35, 0.40, 0.45]
rows = []

for th in thresholds:
    gb_grid.set_decision_threshold(th)
    res = gb_grid.evaluate(show_plots=False)
    rows.append({
        "threshold": th,
        "recall": res["recall"],
        "precision": res["classification_report"]["1"]["precision"],
        "false_negatives": res["false_negatives"]})

gb_grid.set_decision_threshold(None) 
pd.DataFrame(rows)


Applied custom decision threshold: 0.200

GradientBoostingClassifier Evaluation:

Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.88      0.89       185
           1       0.57      0.62      0.60        50

    accuracy                           0.82       235
   macro avg       0.73      0.75      0.74       235
weighted avg       0.83      0.82      0.82       235


ROC AUC Score: 0.7835
Matthews Correlation Coefficient (MCC): 0.4822
Balanced Accuracy: 0.7478
Confusion Matrix:
          Predicted 0  Predicted 1
Actual 0          162           23
Actual 1           19           31
Recall (positive=1): 0.6200
F2 Score: 0.6102
False Negatives: 19
GradientBoostingClassifier Confusion Matrix (values only):
[[162  23]
 [ 19  31]]
Applied custom decision threshold: 0.250

GradientBoostingClassifier Evaluation:

Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.89      0.8

Unnamed: 0,threshold,recall,precision,false_negatives
0,0.2,0.62,0.574074,19
1,0.25,0.56,0.583333,22
2,0.3,0.54,0.586957,23
3,0.35,0.5,0.625,25
4,0.4,0.48,0.631579,26
5,0.45,0.48,0.666667,26


In [22]:
#TODO Look into threshold differences and also print format