In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd

import kraft

In [None]:
def make_target_and_data(
    size, target_how="uniform", data_how="uniform", break_data=False
):

    target = pd.Series(
        kraft.make_array(size[1], target_how),
        index=("Column {}".format(i) for i in range(size[1])),
        name="Target ({})".format(target_how),
    )

    data = pd.DataFrame(
        kraft.make_array(size, data_how),
        index=("Row {}".format(i) for i in range(size[0])),
        columns=target.index,
    )

    if break_data:

        if 3 < size[0]:

            for i in range(3):

                data.iloc[i] = i

        data.loc[
            np.random.choice(data.index, size=data.index.size // 3, replace=False),
            np.random.choice(data.columns, size=data.columns.size // 3, replace=False),
        ] = np.nan

    return target, data

In [None]:
target, data = make_target_and_data((1, 10))

for n_permutation, n_sampling in ((0, 0), (0, 10), (10, 0), (10, 10)):

    kraft.make_match_panel(
        target, data, n_permutation=n_permutation, n_sampling=n_sampling
    )

In [None]:
for size in (
    (8, 2),
    (8, 4),
    (8, 16),
    (8, 32),
    (8, 64),
    (8, 128),
    (8, 256),
    (1, 8),
    (2, 8),
    (4, 8),
    (8, 8),
    (16, 8),
    (32, 8),
    (64, 8),
    (128, 8),
    (256, 8),
):

    kraft.make_match_panel(*make_target_and_data(size=size), n_extreme=None)

In [None]:
kraft.make_match_panel(*make_target_and_data((10, 20), break_data=True))

In [None]:
data = make_target_and_data(break_data=True)[1]

for target_vector in (
    ((1,) * 1 + (2,) * 2 + (0,) * 7),
    ((2,) * 2 + (3,) * 3 + (0,) * 5),
):

    target = pd.Series(target_vector, index=data.columns)

    for target_ascending in (None, True, False):

        kraft.make_match_panel(
            target,
            data,
            target_ascending=target_ascending,
            target_data_type="categorical",
        )

In [None]:
target = pd.read_csv("target.tsv", sep="\t", index_col=0, squeeze=True)

data = pd.read_csv("data.tsv", sep="\t", index_col=0)

kraft.make_match_panel(
    target, data, n_sampling=0, n_permutation=0, target_data_type="binary"
)

kraft.make_match_panel(
    data.iloc[0], data.iloc[:1], n_sampling=0, n_permutation=0, plot_std=3
)

kraft.make_match_panel(
    data.iloc[0], data.iloc[:2], n_sampling=0, n_permutation=0, plot_std=3
)

In [None]:
n_row = 8

n_column = 8

target = pd.Series(
    np.random.normal(size=n_column),
    index=("Column {}".format(i) for i in range(n_column)),
)

data_binary = pd.DataFrame(
    np.random.randint(0, 2, size=(n_row, n_column)),
    index=("Binary {}".format(i) for i in range(n_row)),
    columns=target.index,
)

data_categorical = pd.DataFrame(
    np.random.randint(0, 3, size=(n_row, n_column)),
    index=("Categorical {}".format(i) for i in range(n_row)),
    columns=target.index,
)

data_continuous = pd.DataFrame(
    np.random.normal(size=(n_row, n_column)),
    index=("Continuous {}".format(i) for i in range(n_row)),
    columns=target.index,
)

data_binary.iloc[:, 0] = np.nan

data_categorical.iloc[:, 1] = np.nan

data_continuous.iloc[:, 2] = np.nan

data_binary.drop(target.index[-1], axis=1, inplace=True)

data_categorical.drop(target.index[-2], axis=1, inplace=True)

data_continuous.drop(target.index[-3], axis=1, inplace=True)

data_dicts = {
    "Binary": {"dataframe": data_binary, "type": "binary"},
    "Categorical": {"dataframe": data_categorical, "type": "categorical"},
    "Continuous": {"dataframe": data_continuous, "type": "continuous"},
}

score_moe_p_value_fdr = kraft.make_match_panel(
    target,
    pd.concat((data_dict["dataframe"] for data_dict in data_dicts.values())),
    plot=False,
)

score_moe_p_value_fdr_dicts = {
    data_name: score_moe_p_value_fdr for data_name in data_dicts.keys()
}

for plot_only_shared_by_target_and_all_data in (False, True):

    kraft.make_match_panel_summary(
        target,
        data_dicts,
        score_moe_p_value_fdr_dicts,
        plot_only_shared_by_target_and_all_data=plot_only_shared_by_target_and_all_data,
    )