In [None]:
import plotly

In [None]:
plotly.offline.init_notebook_mode(connected=True)

In [None]:
import numpy as np

In [None]:
import ccal

In [None]:
np.random.seed(seed=ccal.RANDOM_SEED)

In [None]:
import pandas as pd

In [None]:
n_row = 64

n_column = 64

target = pd.Series(
    np.random.normal(size=n_column),
    name="Target Name",
    index=("Column {}".format(i) for i in range(n_column)),
)

data = pd.DataFrame(
    np.random.normal(size=(n_row, n_column)),
    index=("ABCDEFGHIJKLMNOPQRSTUVWXYZ {}".format(i) for i in range(n_row)),
    columns=target.index,
)

ccal.make_match_panel(target, data)

data.iloc[0] = 0

data.iloc[1] = 1

data.iloc[2] = 2

data.loc[
    np.random.choice(data.index, size=data.index.size // 2, replace=False),
    np.random.choice(data.columns, size=data.columns.size // 2, replace=False),
] = np.nan

ccal.make_match_panel(target, data)

In [None]:
_2d_array = np.asarray(((0, np.nan), (0, np.nan)))

target = pd.Series(
    np.random.normal(size=_2d_array.shape[1]),
    index=("Column {}".format(i) for i in range(_2d_array.shape[1])),
)

data = pd.DataFrame(
    _2d_array,
    index=("Index {}".format(i) for i in range(_2d_array.shape[0])),
    columns=target.index,
)


ccal.make_match_panel(target, data)

In [None]:
for n_row, n_column in (
    (8, 1),
    (8, 2),
    (8, 4),
    (8, 16),
    (8, 32),
    (8, 64),
    (8, 128),
    (8, 256),
    (1, 8),
    (2, 8),
    (4, 8),
    (8, 8),
    (16, 8),
    (32, 8),
    (64, 8),
    (128, 8),
    (256, 8),
):

    target = pd.Series(
        np.random.normal(size=n_column),
        index=("Column {}".format(i) for i in range(n_column)),
    )

    data = pd.DataFrame(
        np.random.normal(size=(n_row, n_column)),
        index=("Index {}".format(i) for i in range(n_row)),
        columns=target.index,
    )

    ccal.make_match_panel(
        target,
        data,
        n_extreme=None,
        title="{}-x-{} Match Panel".format(n_row, n_column),
    )

In [None]:
n_row = 24

n_column = 16

data = pd.DataFrame(
    np.random.normal(size=(n_row, n_column)),
    index=("Index {}".format(i) for i in range(n_row)),
    columns=("Column {}".format(i) for i in range(n_column)),
)

data.iloc[:2, -2:] = np.nan

for target_values in (
    ((0,) * 8 + (1,) * 8),
    ((0,) * 1 + (2,) * 2 + (1,) * 13),
    ((0,) * 2 + (2,) * 2 + (1,) * 12),
):

    target = pd.Series(target_values, index=data.columns)

    if target.dropna().unique().size <= 2:

        target_type = "binary"

    else:

        target_type = "categorical"

    for target_ascending in (None, True, False):

        ccal.make_match_panel(
            target, data, target_ascending=target_ascending, target_type=target_type
        )

In [None]:
target = pd.read_csv("target_0.tsv", sep="\t", index_col=0, squeeze=True)

data = pd.read_csv("data_0.tsv", sep="\t", index_col=0)

ccal.make_match_panel(target, data, target_type="binary")

ccal.make_match_panel(data.iloc[0], data.iloc[:1], plot_std=3)

ccal.make_match_panel(data.iloc[0], data.iloc[:2], plot_std=3)

In [None]:
n_row = 8

n_column = 8

target = pd.Series(
    np.random.normal(size=n_column),
    index=("Column {}".format(i) for i in range(n_column)),
)

data_continuous = pd.DataFrame(
    np.random.random_sample(size=(n_row, n_column)),
    index=("Continuous {}".format(i) for i in range(n_row)),
    columns=target.index,
)

data_categorical = pd.DataFrame(
    np.random.randint(0, 10, size=(n_row, n_column)),
    index=("Categorical {}".format(i) for i in range(n_row)),
    columns=target.index,
)

data_binary = pd.DataFrame(
    np.random.randint(0, 2, size=(n_row, n_column)),
    index=("Binary {}".format(i) for i in range(n_row)),
    columns=target.index,
)

for data, data_type in (
    (data_continuous, "continuous"),
    (data_categorical, "categorical"),
    (data_binary, "binary"),
):

    ccal.make_match_panel(target, data_binary, data_type=data_type)

data_continuous.iloc[:, 0] = np.nan

data_categorical.iloc[:, 1] = np.nan

data_binary.iloc[:, 2] = np.nan

data_continuous.drop(target.index[-1], axis=1, inplace=True)

data_categorical.drop(target.index[-2], axis=1, inplace=True)

data_binary.drop(target.index[-3], axis=1, inplace=True)

In [None]:
data_dicts = {
    "Continuous": {"df": data_continuous, "type": "continuous"},
    "Categorical": {"df": data_categorical, "type": "categorical"},
    "Binary": {"df": data_binary, "type": "binary"},
}

score_moe_p_value_fdr = ccal.make_match_panel(
    target,
    pd.concat((data_dict["df"] for data_dict in data_dicts.values())),
    plot=False,
)

score_moe_p_value_fdr_dicts = {
    data_name: score_moe_p_value_fdr for data_name in data_dicts.keys()
}

for plot_only_shared_by_target_and_all_data in (False, True):

    ccal.make_summary_match_panel(
        target,
        data_dicts,
        score_moe_p_value_fdr_dicts,
        plot_only_shared_by_target_and_all_data=plot_only_shared_by_target_and_all_data,
    )