In [24]:
from sklearn.metrics import mean_squared_error, classification_report, confusion_matrix, r2_score


from sklearn_evaluation.plot import grid_search  # For plotting results from grid search
from tabulate import tabulate

from imblearn.metrics import geometric_mean_score
import joblib
# Data handling and manipulation
import pandas as pd  # Data analysis and manipulation tool
import numpy as np  # Numerical operations on arrays and matrices


In [25]:
base_model = joblib.load('./Data/gridSearch_rf.pickle')
balanced_model = joblib.load('./Data/gridSearch_rf_balanced.pickle')
imbalanced_model = joblib.load('./Data/gridSearch_rf_imbalanced.pickle')

best_models: dict = {"base": base_model.best_estimator_, "balanced": balanced_model.best_estimator_, "imbalanced": imbalanced_model.best_estimator_}


In [26]:
X_test = pd.read_pickle('Data/X_test.pickle')
y_test = pd.read_pickle('Data/y_test.pickle')
X_train = pd.read_pickle('Data/X_train.pickle')
y_train= pd.read_pickle('Data/y_train.pickle')

In [27]:


for key, model in best_models.items():
    y_test_pred = model.predict(X_test)
    report = classification_report(y_test, y_test_pred, output_dict=True)  # Output as a dictionary

    # Convert the dictionary into rows for the table
    rows = []
    for metric, values in report.items():
        if isinstance(values, dict):  # Handle metrics like precision, recall, f1-score
            rows.append([metric] + list(values.values()))
        else:  # Handle single metrics like accuracy
            rows.append([metric, values])
            
    # Create headers: dynamically based on dictionary keys
    headers = ["Metric"] + (list(report["weighted avg"].keys()) if "weighted avg" in report else [])

    print(f"Test Data Model: {key}  Geometric Mean: {geometric_mean_score(y_test, y_test_pred)}")
    print(tabulate(rows, headers=headers, tablefmt="grid"))  # Use "grid" format for better visuals
    print("\n")



Test Data Model: base  Geometric Mean: 0.6629396740699007
+--------------+-------------+----------+------------+-----------+
| Metric       |   precision |   recall |   f1-score |   support |
| 0            |    0.931778 | 0.994691 |   0.962207 |     41440 |
+--------------+-------------+----------+------------+-----------+
| 1            |    0.915677 | 0.441835 |   0.596058 |      5407 |
+--------------+-------------+----------+------------+-----------+
| accuracy     |    0.930881 |          |            |           |
+--------------+-------------+----------+------------+-----------+
| macro avg    |    0.923727 | 0.718263 |   0.779133 |     46847 |
+--------------+-------------+----------+------------+-----------+
| weighted avg |    0.92992  | 0.930881 |   0.919947 |     46847 |
+--------------+-------------+----------+------------+-----------+


Test Data Model: balanced  Geometric Mean: 0.7880439262304331
+--------------+-------------+----------+------------+-----------+
| Metri

In [28]:


for key, model in best_models.items():
    y_train_pred = model.predict(X_train)
    report = classification_report(y_train, y_train_pred, output_dict=True)  # Output as a dictionary

    # Convert the dictionary into rows for the table
    rows = []
    for metric, values in report.items():
        if isinstance(values, dict):  # Handle metrics like precision, recall, f1-score
            rows.append([metric] + list(values.values()))
        else:  # Handle single metrics like accuracy
            rows.append([metric, values])
            
    # Create headers: dynamically based on dictionary keys
    headers = ["Metric"] + (list(report["weighted avg"].keys()) if "weighted avg" in report else [])

    print(f"Train Data Model: {key}  Geometric Mean: {geometric_mean_score(y_train, y_train_pred)}")
    print(tabulate(rows, headers=headers, tablefmt="grid"))  # Use "grid" format for better visuals
    print("\n")



Train Data Model: base  Geometric Mean: 0.6699044446931706
+--------------+-------------+----------+------------+-----------+
| Metric       |   precision |   recall |   f1-score |   support |
| 0            |    0.93214  | 0.99463  |   0.962372 |    165545 |
+--------------+-------------+----------+------------+-----------+
| 1            |    0.917256 | 0.451195 |   0.604861 |     21842 |
+--------------+-------------+----------+------------+-----------+
| accuracy     |    0.931287 |          |            |           |
+--------------+-------------+----------+------------+-----------+
| macro avg    |    0.924698 | 0.722912 |   0.783616 |    187387 |
+--------------+-------------+----------+------------+-----------+
| weighted avg |    0.930405 | 0.931287 |   0.9207   |    187387 |
+--------------+-------------+----------+------------+-----------+


Train Data Model: balanced  Geometric Mean: 0.7901590994106724
+--------------+-------------+----------+------------+-----------+
| Met