In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from numpy import arange, nan, prod
from numpy.random import choice, normal, randint, sample
from pandas import DataFrame, Index, Series


def simulate_array(shape, how):

    if how == "uniform":

        array = sample(size=shape)

    elif how == "normal":

        array = normal(size=shape)

    elif how == "range":

        array = arange(prod(shape)).reshape(shape)

    else:

        array = randint(0, how, size=shape)

    return array


def simulate_series_or_dataframe(
    name_0, name_1, *simulate_array_arguments, break_dataframe=None
):

    array = simulate_array(*simulate_array_arguments)

    if len(array.shape) == 1:

        series_or_dataframe = Series(array, name=name_1)

    elif len(array.shape) == 2:

        series_or_dataframe = DataFrame(
            array,
            columns=Index(
                ("{}{}".format(name_1, i) for i in range(array.shape[1])), name="Column"
            ),
        )

    series_or_dataframe.index = Index(
        ("{}{}".format(name_0, i) for i in range(array.shape[0])), name="Index"
    )

    if len(series_or_dataframe.shape) == 2 and break_dataframe is not None:

        if break_dataframe < series_or_dataframe.shape[0]:

            for i in range(break_dataframe):

                series_or_dataframe.iloc[i] = i

        series_or_dataframe.loc[
            choice(
                series_or_dataframe.index,
                size=series_or_dataframe.shape[0] // break_dataframe,
                replace=False,
            ),
            choice(
                series_or_dataframe.columns,
                size=series_or_dataframe.shape[1] // break_dataframe,
                replace=False,
            ),
        ] = nan

    return series_or_dataframe

In [None]:
import pandas as pd

import kraft

In [None]:
function = kraft.compute_ic

In [None]:
for size in (
    (1, 8),
    (2, 8),
    (4, 8),
    (8, 8),
    (16, 8),
    (32, 8),
    (64, 8),
    (128, 8),
    (256, 8),
    (8, 2),
    (8, 4),
    (8, 16),
    (8, 32),
    (8, 64),
    (8, 128),
    (8, 256),
):

    target = simulate_series_or_dataframe("Column", "Target", size[1], "uniform")

    data = simulate_series_or_dataframe("Index", "Column", size, "uniform")

    kraft.function_heat_map(target, data, function, n_extreme=None)

In [None]:
size = (1, 10)

target = simulate_series_or_dataframe("Column", "Target", size[1], "uniform")

data = simulate_series_or_dataframe("Index", "Column", size, "uniform")

for n_permutation, n_sampling in ((0, 0), (0, 10), (10, 0), (10, 10)):

    kraft.function_heat_map(
        target, data, function, n_permutation=n_permutation, n_sampling=n_sampling
    )

In [None]:
size = (10, 20)

target = simulate_series_or_dataframe("Column", "Target", size[1], "uniform")

data = simulate_series_or_dataframe(
    "Index", "Column", size, "uniform", break_dataframe=2
)

kraft.function_heat_map(target, data, function)

In [None]:
data = simulate_series_or_dataframe(
    "Index", "Column", (20, 10), "uniform", break_dataframe=2
)

for target_vector in (
    ((1,) * 1 + (2,) * 2 + (0,) * 7),
    ((2,) * 2 + (3,) * 3 + (0,) * 5),
):

    target = pd.Series(target_vector, index=data.columns)

    for target_ascending in (None, True, False):

        kraft.function_heat_map(
            target,
            data,
            function,
            vector_ascending=target_ascending,
            vector_data_type="categorical",
        )

In [None]:
target = pd.read_csv(
    "function_heat_map_vector.tsv", sep="\t", index_col=0, squeeze=True
)

data = pd.read_csv("function_heat_map_matrix.tsv", sep="\t", index_col=0)

kraft.function_heat_map(
    target, data, function, n_sampling=0, n_permutation=0, vector_data_type="binary"
)

kraft.function_heat_map(
    data.iloc[0], data.iloc[:1], function, n_sampling=0, n_permutation=0, plot_std=3
)

kraft.function_heat_map(
    data.iloc[0], data.iloc[:2], function, n_sampling=0, n_permutation=0, plot_std=3
)

In [None]:
target = simulate_series_or_dataframe("Column", "Target", 10, "uniform")

data_continuous = simulate_series_or_dataframe("Index", "Column", (2, 10), "uniform")

data_categorical = simulate_series_or_dataframe("Index", "Column", (4, 10), 3)

data_binary = simulate_series_or_dataframe("Index", "Column", (8, 10), 2)

data_continuous.drop(data_continuous.columns[-1], axis=1, inplace=True)

data_categorical.drop(data_categorical.columns[-2], axis=1, inplace=True)

data_binary.drop(data_binary.columns[-3], axis=1, inplace=True)

data_dicts = {
    "Continuous Data": {"dataframe": data_continuous, "data_type": "continuous"},
    "Categorical Data": {"dataframe": data_categorical, "data_type": "categorical"},
    "Binary Data": {"dataframe": data_binary, "data_type": "binary"},
}

for plot_only_shared_by_target_and_all_data in (False, True):

    kraft.function_heat_map_summary(
        target,
        data_dicts,
        {
            data_name: kraft.function_heat_map(
                target, data_dict["dataframe"], plot=False
            )
            for data_name, data_dict in data_dicts.items()
        },
        plot_only_shared_by_target_and_all_data=plot_only_shared_by_target_and_all_data,
    )