In [1]:
#Imports
import numpy as np
import pandas as pd
import sys, os, random
import importlib
from sklearn.preprocessing import StandardScaler
from pathlib import Path
from datetime import datetime
import json
import joblib

#Warning Supression
import warnings
warnings.filterwarnings("ignore", module="skopt") #Ignore scikit-optimize warning print lines
from scipy.linalg import LinAlgWarning
warnings.filterwarnings("ignore", category=LinAlgWarning) #For QDA
warnings.filterwarnings("ignore", category=UserWarning) #For LightBoost

#Class Import
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "../.."))) #Allow for imports from src
from src.models import ML_Class_2
importlib.reload(ML_Class_2) #Ensures file is uptodate!
from src.models.ML_Class_2 import Model_Tester_V2

#Utils Import
from src.models.model_artifacts import (get_artifact_dir, load_model, load_models, save_model, save_models,)
ARTIFACT_DIR = get_artifact_dir("algorithm_test_3")
from src.models import model_specs
importlib.reload(model_specs) #Ensures file is uptodate!
from src.models.model_specs import MODEL_SPECS
from src.models.perf_utils import track_performance

#Set Seed
os.environ["PYTHONHASHSEED"] = "1945"
random.seed(1945)
np.random.seed(1945)

### Models Tested:

| Key | Algorithm | Library |
|:----|:-----------|:---------|
| **dt** | Decision Tree Classifier | scikit-learn |
| **rf** | Random Forest Classifier | scikit-learn |
| **et** | Extra Trees Classifier | scikit-learn |
| **bag** | Bagging Classifier (Tree Base) | scikit-learn |
| **gb** | Gradient Boosting Classifier | scikit-learn |
| **ada** | AdaBoost Classifier | scikit-learn |
| **qda** | Quadratic Discriminant Analysis | scikit-learn |
| **xgb** | XGBoost Classifier | xgboost |
| **xgbrf** | XGBoost Random Forest | xgboost |
| **lgbm** | LightGBM Classifier | lightgbm |
| **cat** | CatBoost Classifier | catboost |

 **Note:**  
Preliminary algorithm tests were done in Algorithm_Test_2. In this notebook, further optimization and comparison are done!

In [2]:
# --- Data Loading and Preprocessing --- 

#Complied data of convoys
#Routes examined are HX, SC, OB, ON, ONS
df = pd.read_csv('/Users/matthewplambeck/Desktop/Convoy Predictor/data/processed/Complete_Convoy_Data.csv')
df = df.drop(columns=['Unnamed: 0'])
df.shape #Test
#Drop unecessary/redundent features
df = df.drop(columns=['Convoy Number', 'Number of Ships Sunk', 'Depart_Date', 'Arrival/Dispersal Date', 'Number of Escorts Sunk', \
                         'Number of Stragglers Sunk', 'Total Tons of Ships Sunk', 'Escort Sink Percentage', 'Straggler Sink Percentage'])
df.reset_index(drop=True).head(3)
#Feature Names for later feature analysis:
feature_names = list(df)
feature_names[:-1] #Drop Risk (y)
#Convert Overall Sink Percentage to binary 1( High)
df['Risk'] = (df['Overall Sink Percentage'] > 0).astype(int) 
#Risk is binary based off whether a ship was sunk while in a convoy:  (0 = No Ships Sunk, 1 = At Least One Ship Sunk)
X = np.array(df.drop(columns=['Overall Sink Percentage', 'Risk'])) #Remove Overall Sink Percentage as it leaks data
y = df['Risk'].values #Prediction value

In [3]:
# --- Threshold Calibration Helpers ---
#Set to True to rerun the full hyperparameter search. When False, the notebook
#reloads the last optimized estimator, recalibrates the decision threshold,
#and re-evaluates the model so metrics stay in sync with the new beta value.
RETRAIN_MODELS = False

def prepare_tester(model_key, *, scaler=None, cv_folds=None):
    spec = MODEL_SPECS[model_key]
    tester = Model_Tester_V2(
        model=spec["estimator"],
        scaler=scaler,
        parameter_grid=spec["grid_large"],
        cv_folds=cv_folds or spec.get("cv_folds", 5),
        feature_names=feature_names,
        model_config=spec["config"],)
    tester.train_test_split(X, y, train_size=0.8, random_state=1945)
    if callable(tester.parameter_grid):
        tester.parameter_grid = tester.parameter_grid(tester.y_train)
    return tester

def refresh_model(model_key, tester, *, method="halving", scoring="recall",
                  perf_label=None, optimize_kwargs=None):
    optimize_kwargs = optimize_kwargs or {}
    artifact_path = ARTIFACT_DIR / f"{model_key}.joblib"
    needs_opt = RETRAIN_MODELS or not artifact_path.exists()

    if needs_opt:
        def _run_opt():
            tester.optimize(scoring=scoring, method=method, **optimize_kwargs)
        if perf_label:
            track_performance(perf_label)(_run_opt)()
        else:
            _run_opt()
    else:
        load_model(model_key, directory=ARTIFACT_DIR, assign_to=tester)

    tester._calibrate_threshold()
    results = tester.evaluate(show_plots=False)
    save_model(model_key, tester, directory=ARTIFACT_DIR)
    return results


**Start of Algorithms Tests**

In [4]:
#Decision Tree

dt = prepare_tester("dt")
dt_results = refresh_model("dt", dt, method="halving", perf_label="dt_optimize")

Applied decision threshold: 0.4493

DecisionTreeClassifier Evaluation:

Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.49      0.64       185
           1       0.31      0.84      0.45        50

    accuracy                           0.57       235
   macro avg       0.61      0.67      0.55       235
weighted avg       0.79      0.57      0.60       235


ROC AUC Score: 0.7449
Matthews Correlation Coefficient (MCC): 0.2751
Balanced Accuracy: 0.6659
DecisionTreeClassifier Confusion Matrix:
[[91 94]
 [ 8 42]]


In [25]:
#Random Forest

rf = prepare_tester("rf")
rf_results = refresh_model("rf", rf, method="halving", perf_label="rf_optimize")

Applied decision threshold: 0.3932

RandomForestClassifier Evaluation:

Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.85      0.88       185
           1       0.56      0.70      0.62        50

    accuracy                           0.82       235
   macro avg       0.74      0.78      0.75       235
weighted avg       0.84      0.82      0.83       235


ROC AUC Score: 0.8128
Matthews Correlation Coefficient (MCC): 0.5145
Balanced Accuracy: 0.7770
RandomForestClassifier Confusion Matrix:
[[158  27]
 [ 15  35]]


In [26]:
#Extra Trees

et = prepare_tester("et")
et_results = refresh_model("et", et, method="halving", perf_label="et_optimize")

Applied decision threshold: 0.5302

ExtraTreesClassifier Evaluation:

Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.89      0.89       185
           1       0.60      0.64      0.62        50

    accuracy                           0.83       235
   macro avg       0.75      0.76      0.76       235
weighted avg       0.84      0.83      0.84       235


ROC AUC Score: 0.8123
Matthews Correlation Coefficient (MCC): 0.5156
Balanced Accuracy: 0.7632
ExtraTreesClassifier Confusion Matrix:
[[164  21]
 [ 18  32]]


In [27]:
#Bagging Classifier

bag = prepare_tester("bag")
bag_results = refresh_model("bag", bag, method="halving", perf_label="bag_optimize")

Applied decision threshold: 0.4586

BaggingClassifier Evaluation:

Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.79      0.84       185
           1       0.47      0.70      0.56        50

    accuracy                           0.77       235
   macro avg       0.69      0.74      0.70       235
weighted avg       0.81      0.77      0.78       235


ROC AUC Score: 0.8214
Matthews Correlation Coefficient (MCC): 0.4310
Balanced Accuracy: 0.7446
BaggingClassifier Confusion Matrix:
[[146  39]
 [ 15  35]]


In [5]:
#Gradient Boosting Classifier

gb = prepare_tester("gb")
gb_results = refresh_model("gb", gb, method="halving", perf_label="gb_optimize")

Applied decision threshold: 0.4570

GradientBoostingClassifier Evaluation:

Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.92      0.89       185
           1       0.59      0.44      0.51        50

    accuracy                           0.82       235
   macro avg       0.73      0.68      0.70       235
weighted avg       0.80      0.82      0.81       235


ROC AUC Score: 0.8036
Matthews Correlation Coefficient (MCC): 0.4033
Balanced Accuracy: 0.6795
GradientBoostingClassifier Confusion Matrix:
[[170  15]
 [ 28  22]]


In [29]:
#AdaBoost

ada = prepare_tester("ada")
ada_results = refresh_model("ada", ada, method="halving", perf_label="ada_optimize")

Applied decision threshold: 0.1875

AdaBoostClassifier Evaluation:

Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.55      0.69       185
           1       0.34      0.86      0.49        50

    accuracy                           0.61       235
   macro avg       0.64      0.70      0.59       235
weighted avg       0.81      0.61      0.65       235


ROC AUC Score: 0.7818
Matthews Correlation Coefficient (MCC): 0.3334
Balanced Accuracy: 0.7030
AdaBoostClassifier Confusion Matrix:
[[101  84]
 [  7  43]]


In [30]:
#QuadraticDiscriminantAnalysis

qda = prepare_tester("qda", scaler=StandardScaler())
qda_results = refresh_model("qda", qda, method="halving", perf_label="qda_optimize")

Applied decision threshold: 0.1770

QuadraticDiscriminantAnalysis Evaluation:

Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.52      0.67       185
           1       0.33      0.88      0.48        50

    accuracy                           0.60       235
   macro avg       0.64      0.70      0.58       235
weighted avg       0.81      0.60      0.63       235


ROC AUC Score: 0.7897
Matthews Correlation Coefficient (MCC): 0.3335
Balanced Accuracy: 0.7022
QuadraticDiscriminantAnalysis Confusion Matrix:
[[97 88]
 [ 6 44]]


In [31]:
#XGBoost

xgb = prepare_tester("xgb")
xgb_results = refresh_model("xgb", xgb, method="halving", perf_label="xgb_optimize")

Applied decision threshold: 0.6045

XGBClassifier Evaluation:

Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.89      0.88       185
           1       0.53      0.46      0.49        50

    accuracy                           0.80       235
   macro avg       0.70      0.68      0.68       235
weighted avg       0.79      0.80      0.79       235


ROC AUC Score: 0.7670
Matthews Correlation Coefficient (MCC): 0.3725
Balanced Accuracy: 0.6759
XGBClassifier Confusion Matrix:
[[165  20]
 [ 27  23]]


In [32]:
#XGBoost Random Forest
from xgboost import XGBRFClassifier

xgbrf = prepare_tester("xgbrf")
xgbrf_results = refresh_model("xgbrf", xgbrf, method="halving", perf_label="xgbrf_optimize")

Applied decision threshold: 0.5835

XGBRFClassifier Evaluation:

Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.68      0.78       185
           1       0.39      0.74      0.51        50

    accuracy                           0.69       235
   macro avg       0.65      0.71      0.64       235
weighted avg       0.80      0.69      0.72       235


ROC AUC Score: 0.7942
Matthews Correlation Coefficient (MCC): 0.3506
Balanced Accuracy: 0.7105
XGBRFClassifier Confusion Matrix:
[[126  59]
 [ 13  37]]


In [33]:
#LightGBM

lgbm = prepare_tester("lgbm", cv_folds=3)
lgbm_results = refresh_model("lgbm", lgbm, method="random", perf_label="lgbm_optimize", optimize_kwargs={"n_iter": 32})

Applied decision threshold: 0.1794

LGBMClassifier Evaluation:

Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.84      0.85       185
           1       0.46      0.52      0.49        50

    accuracy                           0.77       235
   macro avg       0.67      0.68      0.67       235
weighted avg       0.78      0.77      0.77       235


ROC AUC Score: 0.7469
Matthews Correlation Coefficient (MCC): 0.3437
Balanced Accuracy: 0.6789
LGBMClassifier Confusion Matrix:
[[155  30]
 [ 24  26]]


In [35]:
#CatBoost

cat = prepare_tester("cat")
cat_results = refresh_model("cat", cat, method="halving", perf_label="cat_optimize")

Applied decision threshold: 0.0381

CatBoostClassifier Evaluation:

Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.76      0.82       185
           1       0.43      0.66      0.52        50

    accuracy                           0.74       235
   macro avg       0.66      0.71      0.67       235
weighted avg       0.79      0.74      0.76       235


ROC AUC Score: 0.7717
Matthews Correlation Coefficient (MCC): 0.3681
Balanced Accuracy: 0.7111
CatBoostClassifier Confusion Matrix:
[[141  44]
 [ 17  33]]


In [None]:
# --- Test Of Loading Trained Models ---

dt_reload = prepare_tester("dt")
load_model("dt", directory=ARTIFACT_DIR, assign_to=dt_reload)
dt_reload_results = dt_reload.evaluate(show_plots=False)

In [None]:
# --- Method Used for Optimization and Orginal Tests --- 

# #Decision Tree

# spec = MODEL_SPECS["dt"]
# dt = Model_Tester_V2(
#     model=spec["estimator"],
#     parameter_grid=spec["grid_large"],
#     cv_folds=5,
#     feature_names=feature_names,
#     model_config=spec["config"],)
    
# dt.train_test_split(X, y, train_size=0.8, random_state=1945)

# @track_performance("dt_optimize")
# def run_dt_opt():
#     dt.optimize(scoring="recall", method='halving')

# run_dt_opt()
# dt_results = dt.evaluate(show_plots=False)

# save_model("dt", dt, directory=ARTIFACT_DIR)

In [4]:
# --- Comparison To GB Model in Classification_Test_2.ipynb --- 

spec = MODEL_SPECS["gb"]
gb2 = Model_Tester_V2(
    model=spec["estimator"],
    parameter_grid=spec["grid_large"],
    cv_folds=5,
    feature_names=feature_names,
    model_config=spec["config"],)

gb2.train_test_split(X, y, train_size=0.8, random_state=1945)

@track_performance("gb2_optimize")
def run_gb_opt():
    gb2.optimize(scoring="recall", method='grid')

run_gb_opt()
gb2_results = gb2.evaluate(show_plots=False)

save_model("gb2", gb2, directory=ARTIFACT_DIR)


Optimization Method: Grid
Best Hyperparameters Found:
{'learning_rate': 0.12, 'max_depth': 3, 'max_features': None, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 300, 'subsample': 0.7}
Best Cross-Validation Recall: 0.4677
Performance Stats:
gb2_optimize completed in 63.33mins | Î”RSS -168.00 MB | CPU 90.5%
Applied decision threshold: 0.3930

GradientBoostingClassifier Evaluation:

Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.90      0.90       185
           1       0.62      0.58      0.60        50

    accuracy                           0.83       235
   macro avg       0.75      0.74      0.75       235
weighted avg       0.83      0.83      0.83       235


ROC AUC Score: 0.8248
Matthews Correlation Coefficient (MCC): 0.4939
Balanced Accuracy: 0.7414
GradientBoostingClassifier Confusion Matrix:
[[167  18]
 [ 21  29]]


PosixPath('/Users/matthewplambeck/Desktop/Convoy Predictor/artifacts/algorithm_test_3/gb2.joblib')