In [None]:
# conda env create --force -f environment.yml

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sklearn
import scipy

from typing import List

from sklearn.calibration import calibration_curve
from sklearn.isotonic import IsotonicRegression
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPRegressor

In [None]:
plt.style.use("seaborn-v0_8-deep")

# Load data

In [None]:
median_ensemble_test = pd.read_csv("./delivery_1_cv_v7_seeds/split_v7_2021_test_cc_dispatcher_precision_recall_harmonic_mean_median_ensemble_individual_predictions.csv")
median_ensemble_val = pd.read_csv("./delivery_1_cv_v7_seeds/split_v7_2021_test_cc_dispatcher_precision_recall_harmonic_mean_median_ensemble_individual_predictions_val.csv")

In [None]:
all_ensembles_test = pd.read_csv("./delivery_1_cv_v7_seeds/split_v7_2021_test_cc_dispatcher_precision_recall_harmonic_mean_predictions.csv")
all_ensembles_val = pd.read_csv("./delivery_1_cv_v7_seeds/split_v7_2021_test_cc_dispatcher_precision_recall_harmonic_mean_predictions_val.csv")

In [None]:
column_map = {
    "logits 20": "logits 1",
    "logits 21": "logits 2",
    "logits 22": "logits 3",
    "logits 23": "logits 4",
    "logits 24": "logits 5",
    "probs 20": "probs 1",
    "probs 21": "probs 2",
    "probs 22": "probs 3",
    "probs 23": "probs 4",
    "probs 24": "probs 5",
}
median_ensemble_test.rename(columns=column_map, inplace=True)
median_ensemble_test["ensemble_preds"] = median_ensemble_test["ensemble_probs"] > 0.5

median_ensemble_val.rename(columns=column_map, inplace=True)
median_ensemble_val["ensemble_preds"] = median_ensemble_val["ensemble_probs"] > 0.5

median_ensemble_test

In [None]:
def plot_histogram(arrays: List[np.ndarray], labels: List, **kwargs):
    
    density = kwargs.get("density", False)
    fig, ax = plt.subplots(figsize=(6.4, 4.8))

    for array, label in zip(arrays, labels):
        ax.hist(array, bins=kwargs.get("bins", 50), alpha=kwargs.get("alpha", 0.5), label=label, density=density)

    ax.set_yscale(kwargs.get("yscale", "log"))
    ax.set_xlabel("Predicted probability")
    if density:
        ax.set_ylabel("Density")
    else:
        ax.set_ylabel("Count")
    ax.legend()
    return fig, ax

In [None]:
def plot_calibration_curve(targets: List[np.ndarray], model_probs: List[np.ndarray], labels=None, **kwargs):
    if not isinstance(targets, list):
        targets = [targets]
    if not isinstance(model_probs, list):
        model_probs = [model_probs]

    if len(model_probs) != len(targets):
        assert len(model_probs) == 1 or len(targets) == 1, "Number of models and targets must be equal or 1"
        if len(model_probs) == 1:
            model_probs = model_probs * len(targets)
        else:
            targets = targets * len(model_probs)

    fig, ax = plt.subplots(figsize=(6.4, 4.8))
    ax.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated")

    for target, probs, label in zip(targets, model_probs, labels):
        prob_true, prob_pred = calibration_curve(
            target,
            probs,
            n_bins=kwargs.get("n_bins", 20),
            strategy=kwargs.get("strategy", "uniform"),
        )
        ax.plot(prob_pred, prob_true, marker="o", markersize=3, label=label)

    ax.set_xlabel("Mean predicted probability")
    ax.set_ylabel("Fraction of positives")
    ax.legend()
    return fig, ax

In [None]:
fig, ax = plot_calibration_curve(
    targets=median_ensemble_test["y"],
    model_probs=[median_ensemble_test["ensemble_probs"], median_ensemble_test["probs 1"]],
    labels=["Ensemble", "Single model"],
    n_bins=20,
)
fig.savefig("calibration_curve_ensemble_and_single_model_uncalibrated.pdf", bbox_inches="tight")
fig, ax = plot_histogram(arrays=[median_ensemble_test["ensemble_probs"], median_ensemble_test["probs 1"]], labels=["Ensemble", "Model 1"], bins=30)
fig.savefig("histogram_ensemble_and_single_model.pdf", bbox_inches="tight")

In [None]:
fig, ax = plot_calibration_curve(
    targets=median_ensemble_test["y"],
    model_probs=[
        median_ensemble_test["probs 1"],
        median_ensemble_test["probs 2"],
        median_ensemble_test["probs 3"],
        median_ensemble_test["probs 4"],
        median_ensemble_test["probs 5"],
        median_ensemble_test["ensemble_probs"],
    ],
    labels=["Individual model", "Individual model", "Individual model", "Individual model", "Individual model", "Ensemble"],
    n_bins=20,
)
fig.savefig("calibration_curve_ensemble_and_all_models_uncalibrated.pdf", bbox_inches="tight")
plot_histogram(
    arrays=[
        median_ensemble_test["probs 1"],
        median_ensemble_test["probs 2"],
        median_ensemble_test["probs 3"],
        median_ensemble_test["probs 4"],
        median_ensemble_test["probs 5"],
        median_ensemble_test["ensemble_probs"],
    ],
    labels=["Individual model", "Individual model", "Individual model", "Individual model", "Individual model", "Ensemble"],
    alpha=0.3
)

In [None]:
is_male = median_ensemble_test["gender"] == "M"
is_female = median_ensemble_test["gender"] == "K"
plot_calibration_curve(
    targets=[median_ensemble_test["y"][is_male], median_ensemble_test["y"][is_female]],
    model_probs=[median_ensemble_test["ensemble_probs"][is_male], median_ensemble_test["ensemble_probs"][is_female]],
    labels=["Male", "Female"],
    n_bins=20,
)
plot_histogram(
    arrays=[median_ensemble_test["ensemble_probs"][is_male], median_ensemble_test["ensemble_probs"][is_female]],
    labels=["Male", "Female"],
)

In [None]:
condition = median_ensemble_test["h"]

plot_calibration_curve(
    targets=[median_ensemble_test["y"][condition], median_ensemble_test["y"][~condition]],
    model_probs=[median_ensemble_test["ensemble_probs"][condition], median_ensemble_test["ensemble_probs"][~condition]],
    labels=["Call-taker recognition", "No call-taker recognition"],
    n_bins=20,
)
plot_histogram(
    arrays=[median_ensemble_test["ensemble_probs"][condition], median_ensemble_test["ensemble_probs"][~condition]],
    labels=["Call-taker recognition", "No call-taker recognition"],
)

In [None]:
plot_histogram(
    arrays=[median_ensemble_test["ensemble_probs"][condition]],
    labels=["Call-taker recognition"],
    yscale="linear"
)

In [None]:
condition = median_ensemble_test["h"] == median_ensemble_test["ensemble_preds"]

plot_calibration_curve(
    targets=[median_ensemble_test["y"][condition], median_ensemble_test["y"][~condition]],
    model_probs=[median_ensemble_test["ensemble_probs"][condition], median_ensemble_test["ensemble_probs"][~condition]],
    labels=["Model/call-taker agreement", "Model/call-taker disagreement"],
    n_bins=20,
)
plot_histogram(
    arrays=[median_ensemble_test["ensemble_probs"][condition], median_ensemble_test["ensemble_probs"][~condition]],
    labels=["Model/call-taker agreement", "Model/call-taker disagreement"],
)

In [None]:
is_old = median_ensemble_test["age"] >= 65
plot_calibration_curve(
    targets=[median_ensemble_test["y"][is_old], median_ensemble_test["y"][~is_old]],
    model_probs=[median_ensemble_test["ensemble_probs"][is_old], median_ensemble_test["ensemble_probs"][~is_old]],
    labels=["65+", "18-65"],
    n_bins=20,
)
plot_histogram(
    arrays=[median_ensemble_test["ensemble_probs"][is_old], median_ensemble_test["ensemble_probs"][~is_old]],
    labels=["65+", "18-65"],
)

In [None]:
condition = median_ensemble_test["y"]

plot_calibration_curve(
    targets=[median_ensemble_test["y"][condition], median_ensemble_test["y"][~condition]],
    model_probs=[median_ensemble_test["ensemble_probs"][condition], median_ensemble_test["ensemble_probs"][~condition]],
    labels=["Model/call-taker agreement", "Model/call-taker disagreement"],
    n_bins=20,
)
plot_histogram(
    arrays=[median_ensemble_test["ensemble_probs"][condition], median_ensemble_test["ensemble_probs"][~condition]],
    labels=["Model/call-taker agreement", "Model/call-taker disagreement"],
)

# Calibration

## Platt scaling

In [None]:
logistic = LogisticRegression(penalty="none", fit_intercept=True)
logistic.fit(median_ensemble_val["ensemble_probs"].to_numpy()[:, np.newaxis], median_ensemble_val["y"].to_numpy())
ensemble_probs_logistic = logistic.predict_proba(median_ensemble_test["ensemble_probs"].to_numpy()[:, np.newaxis])[:, 1]
ensemble_probs_logistic_val = logistic.predict_proba(median_ensemble_val["ensemble_probs"].to_numpy()[:, np.newaxis])[:, 1]

individual_probs_logistic = []
individual_probs_logistic_val = []
for i in range(1, 6):
    logistic = LogisticRegression(penalty="none", fit_intercept=True)
    logistic.fit(median_ensemble_val[f"probs {i}"].to_numpy()[:, np.newaxis], median_ensemble_val["y"].to_numpy())
    individual_probs_logistic += [logistic.predict_proba(median_ensemble_test[f"probs {i}"].to_numpy()[:, np.newaxis])[:, 1]]
    individual_probs_logistic_val += [logistic.predict_proba(median_ensemble_val[f"probs {i}"].to_numpy()[:, np.newaxis])[:, 1]]

In [None]:
logistic.coef_, logistic.intercept_, logistic.n_iter_

In [None]:
ensemble_probs_logistic.min(), ensemble_probs_logistic.max()

In [None]:
fig, ax = plot_calibration_curve(
    targets=median_ensemble_val["y"],
    model_probs=[ensemble_probs_logistic_val, *individual_probs_logistic_val],
    labels=["Ensemble", *["Individual model"] * 5],
    n_bins=20,
)
plot_histogram(arrays=[ensemble_probs_logistic_val], labels=["Ensemble"])

In [None]:
fig, ax = plot_calibration_curve(
    targets=median_ensemble_test["y"],
    # model_probs=[ensemble_probs_logistic, *individual_probs_logistic],
    model_probs=[ensemble_probs_logistic, *individual_probs_logistic],
    labels=["Ensemble"], #, *["Individual model"] * 5],
    n_bins=20,
    strategy="uniform",
    # n_bins=1000,
    # strategy="quantile",
)
fig.savefig("calibration_curve_ensemble_logistic.pdf", bbox_inches="tight")
plot_histogram(arrays=[ensemble_probs_logistic], labels=["Ensemble"])

## Isotonic

In [None]:
isotonic = IsotonicRegression(y_min=0, y_max=1, increasing=True, out_of_bounds="clip")
isotonic.fit(median_ensemble_val["ensemble_probs"].to_numpy(), median_ensemble_val["y"].to_numpy())
ensemble_probs_isotonic = isotonic.transform(median_ensemble_test["ensemble_probs"].to_numpy())
ensemble_probs_isotonic_val = isotonic.transform(median_ensemble_val["ensemble_probs"].to_numpy())

individual_probs_isotonic = []
individual_probs_isotonic_val = []
for i in range(1, 6):
    isotonic_i = IsotonicRegression(y_min=0, y_max=1, increasing=True, out_of_bounds="clip")
    isotonic_i.fit(median_ensemble_val[f"probs {i}"].to_numpy(), median_ensemble_val["y"].to_numpy())
    individual_probs_isotonic += [isotonic_i.transform(median_ensemble_test[f"probs {i}"].to_numpy())]
    individual_probs_isotonic_val += [isotonic_i.transform(median_ensemble_val[f"probs {i}"].to_numpy())]

In [None]:
median_ensemble_val["ensemble_probs"]

In [None]:
plot_histogram(arrays=[ensemble_probs_isotonic, ensemble_probs_isotonic_val], labels=["Ensemble test", "Ensemble validation"])

In [None]:
fig, ax = plt.subplots(figsize=(6.4, 4.8))
ax.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated")
ax.plot(isotonic.X_thresholds_, isotonic.y_thresholds_, "-", marker="o", markersize=3, label="Isotonic")
fig.savefig("calibration_fit_isotonic.pdf", bbox_inches="tight")

val_pos = median_ensemble_val["ensemble_probs"][median_ensemble_val["y"] == 1]
val_neg = median_ensemble_val["ensemble_probs"][median_ensemble_val["y"] == 0]
plot_histogram(arrays=[val_neg, val_pos], labels=["Ensemble probs (val-neg)", "Ensemble probs (val-pos)"])

In [None]:
ensemble_probs_isotonic

In [None]:
fig, ax = plot_calibration_curve(
    targets=median_ensemble_val["y"],
    model_probs=[ensemble_probs_isotonic_val, *individual_probs_isotonic_val],
    labels=["Ensemble"],#, *["Individual model"] * 5],
    n_bins=10,
    strategy="uniform",
    # n_bins=1000,
    # strategy="quantile",
)
fig.savefig("calibration_curve_ensemble_isotonic.pdf", bbox_inches="tight")
plot_histogram(arrays=[ensemble_probs_isotonic_val], labels=["Ensemble"])

In [None]:
fig, ax = plot_calibration_curve(
    targets=median_ensemble_test["y"],
    model_probs=[ensemble_probs_isotonic, *individual_probs_isotonic],
    labels=["Ensemble"],#, *["Individual model"] * 5],
    # n_bins=10,
    # strategy="uniform",
    n_bins=1000,
    strategy="quantile",
)
fig.savefig("calibration_curve_ensemble_isotonic.pdf", bbox_inches="tight")
plot_histogram(arrays=[ensemble_probs_isotonic], labels=["Ensemble"])

In [None]:
median_ensemble_val

## MLP

In [None]:
mlp = MLPRegressor(hidden_layer_sizes=(8, 8, 8), activation="relu", solver="adam", max_iter=1000, random_state=0)
mlp.out_activation_ = "sigmoid"
mlp.fit(median_ensemble_val["ensemble_probs"].to_numpy()[:, np.newaxis], median_ensemble_val["y"].to_numpy()[:, np.newaxis])
ensemble_probs_mlp = mlp.predict(median_ensemble_test["ensemble_probs"].to_numpy()[:, np.newaxis]).clip(0)
ensemble_probs_mlp_val = mlp.predict(median_ensemble_val["ensemble_probs"].to_numpy()[:, np.newaxis]).clip(0)

individual_probs_mlp = []
individual_probs_mlp_val = []
for i in range(1, 6):
    mlp_i = MLPRegressor(hidden_layer_sizes=(8, 8, 8), activation="relu", solver="adam", max_iter=1000, random_state=0)
    mlp_i.out_activation_ = "sigmoid"
    mlp_i.fit(median_ensemble_val[f"probs {i}"].to_numpy()[:, np.newaxis], median_ensemble_val["y"].to_numpy()[:, np.newaxis])
    individual_probs_mlp += [mlp_i.predict(median_ensemble_test[f"probs {i}"].to_numpy()[:, np.newaxis]).clip(0)]
    individual_probs_mlp_val += [mlp_i.predict(median_ensemble_val[f"probs {i}"].to_numpy()[:, np.newaxis]).clip(0)]

In [None]:
plot_histogram(arrays=[ensemble_probs_mlp, ensemble_probs_mlp_val], labels=["Ensemble test", "Ensemble validation"])

In [None]:
fig, ax = plot_calibration_curve(
    targets=median_ensemble_val["y"],
    model_probs=[ensemble_probs_mlp_val, *individual_probs_mlp_val],
    labels=["Ensemble"],#, *["Individual model"] * 5],
    n_bins=10,
    strategy="uniform",
    # n_bins=1000,
    # strategy="quantile",
)
# fig.savefig("calibration_curve_ensemble_mlp.pdf", bbox_inches="tight")
plot_histogram(arrays=[ensemble_probs_mlp_val], labels=["Ensemble"])

In [None]:
fig, ax = plot_calibration_curve(
    targets=median_ensemble_test["y"],
    model_probs=[ensemble_probs_mlp, *individual_probs_mlp],
    labels=["Ensemble"],#, *["Individual model"] * 5],
    # n_bins=10,
    # strategy="uniform",
    n_bins=1000,
    strategy="quantile",
)
fig.savefig("calibration_curve_ensemble_mlp.pdf", bbox_inches="tight")
plot_histogram(arrays=[ensemble_probs_mlp], labels=["Ensemble"])

## Plot-making

In [None]:
fig, ax = plot_calibration_curve(
    targets=median_ensemble_test["y"],
    # model_probs=[ensemble_probs_logistic, *individual_probs_logistic],
    model_probs=[median_ensemble_test["ensemble_probs"], ensemble_probs_logistic, ensemble_probs_isotonic],
    labels=["Ensemble uncalibrated", "Ensemble logistic calibration", "Ensemble isotonic calibration"], #, *["Individual model"] * 5],
    # n_bins=20,
    # strategy="uniform",
    n_bins=1000,
    strategy="quantile",
)
fig.savefig("calibration_curve_ensemble_logistic.pdf", bbox_inches="tight")
plot_histogram(arrays=[median_ensemble_test["ensemble_probs"], ensemble_probs_logistic, ensemble_probs_isotonic], labels=["Ensemble uncalibrated", "Ensemble logistic calibration", "Ensemble isotonic calibration"])

In [None]:
raise Exception()

# Ensemble of ensembles

In [None]:
all_ensembles_test

In [None]:
all_ensembles_val

In [None]:
all_ensembles_test_probs = [all_ensembles_test[f"ensemble {i} probs"] for i in range(1, 12)]
labels = [f"Ensemble {i}" for i in range(1, 12)]

plot_calibration_curve(
    targets=median_ensemble_test["y"],
    model_probs=all_ensembles_test_probs,
    labels=labels,
    n_bins=20,
)

In [None]:
all_ensembles_test_probs = np.stack([all_ensembles_test[f"ensemble {i} probs"] for i in range(1, 12)])
all_ensembles_test_preds = np.stack([all_ensembles_test[f"ensemble {i} preds"] for i in range(1, 12)])
all_ensembles_val_probs = np.stack([all_ensembles_val[f"ensemble {i} probs"] for i in range(1, 12)])
all_ensembles_val_preds = np.stack([all_ensembles_val[f"ensemble {i} preds"] for i in range(1, 12)])
all_ensembles_test_preds.shape

In [None]:
med_ensemble_test = median_ensemble_test["ensemble_probs"]
# majority_vote_test = np.mean(all_ensembles_test_preds, axis=0) > 0.5
super_ensemble_probs_test = scipy.stats.hmean(all_ensembles_test_probs, axis=0)
mean_ensemble_probs_test = np.mean(all_ensembles_test_probs, axis=0)

med_ensemble_val = median_ensemble_val["ensemble_probs"]
# majority_vote_val = np.mean(all_ensembles_test_preds, axis=0) > 0.5
super_ensemble_probs_val = scipy.stats.hmean(all_ensembles_val_probs, axis=0)
mean_ensemble_probs_val = np.mean(all_ensembles_val_probs, axis=0)

In [None]:
plot_calibration_curve(
    targets=median_ensemble_test["y"],
    model_probs=[med_ensemble_test, super_ensemble_probs_test, mean_ensemble_probs_test],
    labels=["Median ensemble", "Harmonic mean of all ensembles", "Mean of all ensembles"],
    n_bins=20,
)
plot_histogram(arrays=[med_ensemble_test, super_ensemble_probs_test, mean_ensemble_probs_test], labels=["Majority vote", "Super ensemble", "Mean ensemble"])

## Platt scaling

In [None]:
logistic = LogisticRegression(penalty="none", fit_intercept=True)
logistic.fit(super_ensemble_probs_val[:, np.newaxis], median_ensemble_val["y"].to_numpy())
ensemble_probs_logistic = logistic.predict_proba(super_ensemble_probs_test[:, np.newaxis])[:, 1]

In [None]:
logistic.coef_, logistic.intercept_, logistic.n_iter_

In [None]:
ensemble_probs_logistic.min(), ensemble_probs_logistic.max()

In [None]:
plot_calibration_curve(
    targets=median_ensemble_test["y"],
    model_probs=[ensemble_probs_logistic],
    labels=["Ensemble"],
    n_bins=20,
)
plot_histogram(arrays=[ensemble_probs_logistic], labels=["Ensemble"])

In [None]:
is_old = median_ensemble_test["age"] >= 65
plot_calibration_curve(
    targets=[median_ensemble_test["y"][is_old], median_ensemble_test["y"][~is_old]],
    model_probs=[ensemble_probs_logistic[is_old], ensemble_probs_logistic[~is_old]],
    labels=["65+", "18-65"],
    n_bins=20,
)
plot_histogram(
    arrays=[ensemble_probs_logistic[is_old], ensemble_probs_logistic[~is_old]],
    labels=["65+", "18-65"],
)

## Isotonic

In [None]:
isotonic = IsotonicRegression(y_min=0, y_max=1, increasing=True, out_of_bounds="clip")
isotonic.fit(super_ensemble_probs_val, median_ensemble_val["y"])
ensemble_probs_isotonic = isotonic.transform(super_ensemble_probs_test)

In [None]:
plot_histogram(arrays=[ensemble_probs_isotonic], labels=["Ensemble validation"])

In [None]:
fig, ax = plt.subplots(figsize=(6.4, 4.8))
ax.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated")
ax.plot(isotonic.X_thresholds_, isotonic.y_thresholds_, "-", marker="o", markersize=3, label="Isotonic")

In [None]:
plot_calibration_curve(
    targets=median_ensemble_test["y"],
    model_probs=[ensemble_probs_isotonic],
    labels=["Ensemble"],
    n_bins=20,
)
plot_histogram(arrays=[ensemble_probs_isotonic], labels=["Ensemble"])