In [None]:
from sklearn.metrics import mean_squared_error, classification_report, confusion_matrix, r2_score


from sklearn_evaluation.plot import grid_search  # For plotting results from grid search
from tabulate import tabulate

from imblearn.metrics import geometric_mean_score
import joblib
# Data handling and manipulation
import pandas as pd  # Data analysis and manipulation tool
import numpy as np  # Numerical operations on arrays and matrices


In [5]:
base_model = joblib.load('./Data/gridSearch_rf.pickle')
balanced_model = joblib.load('./Data/gridSearch_rf_balanced.pickle')
imbalanced_model = joblib.load('./Data/gridSearch_rf_imbalanced.pickle')

best_models: dict = {"base": base_model.best_estimator_, "balanced": balanced_model.best_estimator_, "imbalanced": imbalanced_model.best_estimator_}


In [3]:
X_test = pd.read_pickle('Data/X_test.pickle')
y_test = pd.read_pickle('Data/y_test.pickle')
X_train = pd.read_pickle('Data/X_train.pickle')
y_train= pd.read_pickle('Data/y_train.pickle')

In [18]:


for key, model in best_models.items():
    y_test_pred = model.predict(X_test)
    report = classification_report(y_test, y_test_pred, output_dict=True)  # Output as a dictionary

    # Convert the dictionary into rows for the table
    rows = []
    for metric, values in report.items():
        if isinstance(values, dict):  # Handle metrics like precision, recall, f1-score
            rows.append([metric] + list(values.values()))
        else:  # Handle single metrics like accuracy
            rows.append([metric, values])
            
    # Create headers: dynamically based on dictionary keys
    headers = ["Metric"] + (list(report["weighted avg"].keys()) if "weighted avg" in report else [])

    print(f"Test Data Model: {key}  Geometric Mean: {geometric_mean_score(y_test, y_test_pred)}")
    print(tabulate(rows, headers=headers, tablefmt="grid"))  # Use "grid" format for better visuals
    print("\n")



Test Data Model: base  Geometric Mean: 0.7149113728705784
+--------------+-------------+----------+------------+-----------+
| Metric       |   precision |   recall |   f1-score |   support |
| 0            |    0.932919 | 0.992948 |   0.961998 |     13614 |
+--------------+-------------+----------+------------+-----------+
| 1            |    0.914818 | 0.514728 |   0.658786 |      2003 |
+--------------+-------------+----------+------------+-----------+
| accuracy     |    0.931613 |          |            |           |
+--------------+-------------+----------+------------+-----------+
| macro avg    |    0.923869 | 0.753838 |   0.810392 |     15617 |
+--------------+-------------+----------+------------+-----------+
| weighted avg |    0.930598 | 0.931613 |   0.923109 |     15617 |
+--------------+-------------+----------+------------+-----------+


Test Data Model: balanced  Geometric Mean: 0.8092142691682905
+--------------+-------------+----------+------------+-----------+
| Metri

In [21]:


for key, model in best_models.items():
    y_train_pred = model.predict(X_train)
    report = classification_report(y_train, y_train_pred, output_dict=True)  # Output as a dictionary

    # Convert the dictionary into rows for the table
    rows = []
    for metric, values in report.items():
        if isinstance(values, dict):  # Handle metrics like precision, recall, f1-score
            rows.append([metric] + list(values.values()))
        else:  # Handle single metrics like accuracy
            rows.append([metric, values])
            
    # Create headers: dynamically based on dictionary keys
    headers = ["Metric"] + (list(report["weighted avg"].keys()) if "weighted avg" in report else [])

    print(f"Train Data Model: {key}  Geometric Mean: {geometric_mean_score(y_train, y_train_pred)}")
    print(tabulate(rows, headers=headers, tablefmt="grid"))  # Use "grid" format for better visuals
    print("\n")



Train Data Model: base  Geometric Mean: 0.7046304342072924
+--------------+-------------+----------+------------+-----------+
| Metric       |   precision |   recall |   f1-score |   support |
| 0            |    0.934536 | 0.993399 |   0.963069 |     54838 |
+--------------+-------------+----------+------------+-----------+
| 1            |    0.913293 | 0.499803 |   0.646052 |      7629 |
+--------------+-------------+----------+------------+-----------+
| accuracy     |    0.933117 |          |            |           |
+--------------+-------------+----------+------------+-----------+
| macro avg    |    0.923915 | 0.746601 |   0.804561 |     62467 |
+--------------+-------------+----------+------------+-----------+
| weighted avg |    0.931942 | 0.933117 |   0.924352 |     62467 |
+--------------+-------------+----------+------------+-----------+


Train Data Model: balanced  Geometric Mean: 0.8072605103076389
+--------------+-------------+----------+------------+-----------+
| Met