In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd
import sklearn as skl

import kraft

In [None]:
dataframe = pd.read_csv(
    "titanic.tsv", sep="\t", usecols=("sex", "age", "fare", "survived")
).dropna()

dataframe

In [None]:
g = (dataframe["sex"] == "male").astype(int)

g.name = "Gender"

g

In [None]:
a = dataframe["age"]

a.name = "Age"

a

In [None]:
dataframe["fare"].sort_values()

In [None]:
f = pd.Series(
    kraft.log(dataframe["fare"].values, min_before_logging="0<"), index=dataframe.index,
)

f.name = "Fare"

f

In [None]:
s = dataframe["survived"]

s.name = "Survival"

s

In [None]:
target_value = 1

plot = False


def infer_(*seriess):

    return kraft.infer(
        np.asarray(seriess).T,
        target_value,
        plot=plot,
        names=tuple(series.name for series in seriess),
        grids=tuple(
            kraft.make_grid(series.min(), series.max(), 1 / 8, 32) for series in seriess
        ),
    )

In [None]:
p_s1__g = infer_(g, s)

In [None]:
p_s1__a = infer_(a, s)

In [None]:
p_s1__f = infer_(f, s)

In [None]:
p_s1__g_a = infer_(g, a, s)

In [None]:
p_s1__g_f = infer_(g, f, s)

In [None]:
p_s1__a_f = infer_(a, f, s)

In [None]:
p_s1__g_a_f = infer_(g, a, f, s)

In [None]:
maths = (
    "P(S = 1 | G)",
    "P(S = 1 | A)",
    "P(S = 1 | F)",
    "P(S = 1 | G, A)",
    "P(S = 1 | G, F)",
    "P(S = 1 | A, F)",
    "P(S = 1 | G, A, F)",
)

math_roc = {math: {} for math in maths}

for math, p_s1__v, vs in zip(
    maths,
    (p_s1__g, p_s1__a, p_s1__f, p_s1__g_a, p_s1__g_f, p_s1__a_f, p_s1__g_a_f),
    ((g,), (a,), (f,), (g, a), (g, f), (a, f), (g, a, f)),
):

    grids, value = kraft.unmesh(*p_s1__v)

    s_ = np.full(s.size, np.nan)

    for i in range(s_.size):

        s_[i] = value[
            tuple(
                np.argmin(np.absolute(grid - v.values[i])) for grid, v in zip(grids, vs)
            )
        ]

    fpr, tpr, t = skl.metrics.roc_curve(s, kraft.normalize(s_, "0-1"))

    math_roc[math]["fpr"] = fpr

    math_roc[math]["tpr"] = tpr

    math_roc[math]["auc"] = skl.metrics.auc(fpr, tpr)

In [None]:
kraft.plot_plotly(
    {
        "layout": {
            "height": 640,
            "width": 640,
            "title": {
                "text": "Receiver Operating Characteristic<br>{}".format(
                    ", ".join((g.name, a.name, f.name))
                )
            },
            "xaxis": {"title": "False Positive Rate"},
            "yaxis": {"title": "True Positive Rate"},
            "legend": {"orientation": "h"},
        },
        "data": [
            {
                "name": "Random",
                "x": (0, 1),
                "y": (0, 1),
                "mode": "lines",
                "marker": {"color": "#d8d8d8"},
            },
            *(
                {
                    "name": "{} | {:0.3f}".format(math, math_roc[math]["auc"]),
                    "x": math_roc[math]["fpr"],
                    "y": math_roc[math]["tpr"],
                    "mode": "lines",
                }
                for math in maths
            ),
        ],
    }
)

In [None]:
# kraft.plot_bayesian_nomogram(
#     s.values, 1, 0, n_grid, (p_s__g, p_s__a, p_s__f), (g.name, a.name, f.name)
# )