In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import kraft
import numpy as np
import pandas as pd
import sklearn as skl

In [None]:
dataframe = pd.read_csv(
    "data/titanic.tsv", sep="	", usecols=("sex", "age", "fare", "survived")
).dropna()

In [None]:
series_0 = dataframe["age"]

series_0.name = "Age"

In [None]:
series_target = dataframe["survived"]

series_target.name = "Survival"

In [None]:
plot = True


def package(serieses):

    dimension_names = tuple(series.name for series in serieses)

    grid_nd, grid_nd_posterior_probability = kraft.get_posterior_probability(
        np.asarray(serieses).T,
        plot=plot,
        dimension_names=dimension_names,
        grid_1ds=tuple(
            kraft.make_grid_1d(series.min(), series.max(), 1 / 8, 8)
            for series in serieses
        ),
    )

    def target_posterior_probability_template(target_dimension_number):

        return kraft.target_posterior_probability(
            grid_nd,
            grid_nd_posterior_probability,
            target_dimension_number,
            plot=plot,
            dimension_names=dimension_names,
        )

    grid_nd_0, grid_nd_posterior_probability_0 = target_posterior_probability_template(
        0
    )

    grid_nd_1, grid_nd_posterior_probability_1 = target_posterior_probability_template(
        1
    )

    return (
        serieses,
        (grid_nd, grid_nd_posterior_probability),
        (grid_nd_0, grid_nd_posterior_probability_0),
        (grid_nd_1, grid_nd_posterior_probability_1),
    )

In [None]:
math = {}

for serieses in (
    (series_0,),
    (series_1,),
    (series_2,),
    (
        series_0,
        series_1,
    ),
    (
        series_0,
        series_2,
    ),
    (
        series_1,
        series_2,
    ),
):

    math[", ".join(series.name for series in serieses)] = package(
        serieses + (series_target,)
    )

In [None]:
roc = {}

for name, (serieses, p_s__, p_s0__, p_s1__) in math.items():

    grid_1ds = kraft.get_grid_1ds(p_s1__[0])

    grid_nd_posterior_probabilities = kraft.shape(p_s1__[1], grid_1ds)

    vector_target = np.full(series_target.size, np.nan)

    for i in range(vector_target.size):

        vector_target[i] = grid_nd_posterior_probabilities[
            tuple(
                np.argmin(np.absolute(grid_1d - series.values[i]))
                for grid_1d, series in zip(grid_1ds, serieses)
            )
        ]

    fpr, tpr, t = skl.metrics.roc_curve(
        series_target.values, kraft.normalize(vector_target, "0-1")
    )

    roc[name] = {
        "fpr": fpr,
        "tpr": tpr,
        "auc": skl.metrics.auc(fpr, tpr),
    }

In [None]:
kraft.plot_plotly(
    {
        "layout": {
            "height": 640,
            "width": 800,
            "title": {"text": "Receiver Operating Characteristic"},
            "xaxis": {"title": "False Positive Rate"},
            "yaxis": {"title": "True Positive Rate"},
        },
        "data": [
            {
                "name": "Random = 0.50",
                "x": (0, 1),
                "y": (0, 1),
                "mode": "lines",
                "marker": {"color": "#d8d8d8"},
            },
            *(
                {
                    "name": "{} = {:.2f}".format(name, roc[name]["auc"]),
                    "x": roc[name]["fpr"],
                    "y": roc[name]["tpr"],
                    "mode": "lines",
                }
                for name in roc
            ),
        ],
    }
)

In [None]:
kraft.plot_nomogram(
    (series_target == 0).sum() / series_target.size,
    (series_target == 1).sum() / series_target.size,
    tuple(math.keys()),
    tuple(tuple_[2][1] for tuple_ in math.values()),
    tuple(tuple_[3][1] for tuple_ in math.values()),
)