In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd
import sklearn as skl

import kraft

In [None]:
dataframe = pd.read_csv(
    "data/titanic.tsv", sep="\t", usecols=("sex", "age", "fare", "survived")
).dropna()

dataframe

In [None]:
g = (dataframe["sex"] == "male").astype(int)

g.name = "Gender"

In [None]:
a = dataframe["age"]

a.name = "Age"

In [None]:
f = pd.Series(kraft.log(dataframe["fare"].values + 1), index=dataframe.index)

f.name = "Fare"

In [None]:
s = dataframe["survived"]

s.name = "Survival"

target_1 = 1

target_0 = 0

In [None]:
plot = True


def package(serieses):

    dimension_names = tuple(series.name for series in serieses)

    p_t__ = kraft.get_posterior_probability(
        np.asarray(serieses).T,
        plot=plot,
        dimension_names=dimension_names,
        grid_1ds=tuple(
            kraft.make_grid_1d(series.min(), series.max(), 1 / 8, 8)
            for series in serieses
        ),
    )

    def function_tempalte(target_dimensio_number):

        return kraft.target_posterior_probability(
            *p_t__, target_dimensio_number, plot=plot, dimension_names=dimension_names
        )

    p_t1__ = function_tempalte(target_1)

    p_t0__ = function_tempalte(target_0)

    return serieses, p_t__, p_t1__, p_t0__

In [None]:
math = {}

for serieses in (
    (g, s),
    (a, s),
    (f, s),
    (g, a, s),
    (g, f, s),
    (a, f, s),
):

    math[", ".join(series.name for series in serieses[:-1])] = package(serieses)

In [None]:
roc = {}

for name, (vs, p_s__, p_s1__, p_s0__) in math.items():

    grid_1ds = kraft.get_grid_1ds(p_s1__[0])

    values = kraft.shape(p_s1__[1], grid_1ds)

    s_ = np.full(s.size, np.nan)

    for i in range(s_.size):

        s_[i] = values[
            tuple(
                np.argmin(np.absolute(grid_1d - v.values[i]))
                for grid_1d, v in zip(grid_1ds, vs)
            )
        ]

    fpr, tpr, t = skl.metrics.roc_curve(s, kraft.normalize(s_, "0-1"))

    roc[name] = {
        "fpr": fpr,
        "tpr": tpr,
        "auc": skl.metrics.auc(fpr, tpr),
    }

In [None]:
kraft.plot_plotly(
    {
        "layout": {
            "height": 640,
            "width": 800,
            "title": {"text": "Receiver Operating Characteristic"},
            "xaxis": {"title": "False Positive Rate"},
            "yaxis": {"title": "True Positive Rate"},
        },
        "data": [
            {
                "name": "Random = 0.50",
                "x": (0, 1),
                "y": (0, 1),
                "mode": "lines",
                "marker": {"color": "#d8d8d8"},
            },
            *(
                {
                    "name": "{} = {:.2f}".format(name, roc[name]["auc"]),
                    "x": roc[name]["fpr"],
                    "y": roc[name]["tpr"],
                    "mode": "lines",
                }
                for name in roc
            ),
        ],
    }
)

In [None]:
kraft.plot_nomogram(
    (s == target_1).sum() / s.size,
    (s == target_0).sum() / s.size,
    tuple(math.keys()),
    tuple((v[2][1], v[3][1]) for v in math.values()),
)