In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import kraft
import numpy as np
import pandas as pd
import sklearn as skl

In [None]:
df = pd.read_csv(
    "data/titanic.tsv", sep="\t", usecols=["sex", "age", "fare", "survived"]
).dropna()

df

In [None]:
s0 = (df["sex"] == "male").astype(int)

s0.name = "Sex"

In [None]:
s1 = df["age"]

s1.name = "Age"

In [None]:
s2 = pd.Series(kraft.array.log(df["fare"].to_numpy() + 1), index=df.index)

s2.name = "Fare"

In [None]:
st = df["survived"]

st.name = "Survival"

In [None]:
def package(s_):

    n_ = [s.name for s in s_]

    gn0, v0 = kraft.probability.get_posterior_probability(
        np.asarray(s_).T,
        target=0,
        plot=False,
        dimension_name_=n_,
        g1_=[kraft.grid.make_g1(s.min(), s.max(), 1 / 8, 8) for s in s_],
    )

    gn, v = kraft.probability.get_posterior_probability(
        np.asarray(s_).T,
        target=1,
        plot=True,
        dimension_name_=n_,
        g1_=[kraft.grid.make_g1(s.min(), s.max(), 1 / 8, 8) for s in s_],
    )

    return s_, gn0, v0, gn, v

In [None]:
math = {}

for s_ in [
    [s0],
    [s1],
    [s2],
    [
        s0,
        s1,
    ],
    [
        s0,
        s2,
    ],
    [
        s1,
        s2,
    ],
]:

    math[", ".join(s.name for s in s_)] = package(s_ + [st])

In [None]:
roc = {}

for n, (s_, _, _, gn, v) in math.items():

    g1_ = kraft.grid.get_g1_(gn)

    v = v.reshape([g1.size for g1 in g1_])

    vt = np.full(st.size, np.nan)

    for i in range(vt.size):

        vt[i] = v[
            tuple(
                np.argmin(np.absolute(g1 - s.to_numpy()[i])) for g1, s in zip(g1_, s_)
            )
        ]

    fpr, tpr, t = skl.metrics.roc_curve(st.to_numpy(), kraft.array.normalize(vt, "0-1"))

    roc[n] = {
        "fpr": fpr,
        "tpr": tpr,
        "auc": skl.metrics.auc(fpr, tpr),
    }

In [None]:
kraft.plot.plot_plotly(
    {
        "layout": {
            "height": 640,
            "width": 800,
            "title": {"text": "Receiver Operating Characteristic"},
            "xaxis": {"title": "False Positive Rate"},
            "yaxis": {"title": "True Positive Rate"},
        },
        "data": [
            {
                "name": "Random = 0.50",
                "x": (0, 1),
                "y": (0, 1),
                "mode": "lines",
                "marker": {"color": "#d8d8d8"},
            },
            *(
                {
                    "name": "{} = {:.2f}".format(name, roc[name]["auc"]),
                    "x": roc[name]["fpr"],
                    "y": roc[name]["tpr"],
                    "mode": "lines",
                }
                for name in roc
            ),
        ],
    }
)

In [None]:
kraft.probability.plot_nomogram(
    (st == 0).sum() / st.size,
    (st == 1).sum() / st.size,
    math.keys(),
    [t[2] for t in math.values()],
    [t[4] for t in math.values()],
)