In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from environment import *

with open("setting.yaml") as yaml_file:

    SETTING = yaml.load(yaml_file)

PATH = make_path_dict(SETTING)

In [None]:
gene_x_sample = pd.read_csv(PATH["gene_x_sample.processed.tsv"], sep="\t", index_col=0)

target_x_sample = pd.read_csv(PATH["target_x_sample.tsv"], sep="\t", index_col=0)

In [None]:
def compute_log_ratio(logged_values, target):

    return logged_values[target == 1].mean() - logged_values[target == 0].mean()

In [None]:
make_match_panel_keyword_arguments = {
    "n_job": SETTING["n_job"],
    "n_sampling": SETTING["n_sampling"],
    "n_permutation": SETTING["n_permutation"],
    "target_type": "binary",
    "plot_std": SETTING["plot_std"],
}

for target_name, target_values in target_x_sample.iterrows():

    target_values = target_values[target_values != -1]

    output_directory_path = os.path.join(
        PATH["find_differentially_expressed_gene/"], target_name
    )

    ccal.establish_path(output_directory_path, "directory")

    target_value_min_n = target_values.value_counts().min()

    for match_function, min_n in (
        (ccal.compute_information_coefficient, 3),
        (compute_log_ratio, 1),
    ):

        if target_value_min_n < min_n:

            continue

        file_path_prefix = os.path.join(
            output_directory_path, "all.{}".format(match_function.__name__)
        )

        ccal.make_match_panel(
            target_values,
            gene_x_sample,
            match_function=match_function,
            n_extreme=SETTING["n_extreme"],
            title="All ({})".format(match_function.__name__),
            file_path_prefix=file_path_prefix,
            **make_match_panel_keyword_arguments,
        )

        score_moe_p_value_fdr = pd.read_csv(
            "{}.tsv".format(file_path_prefix), sep="\t", index_col=0
        )

        ccal.make_match_panel(
            target_values,
            gene_x_sample.reindex(SETTING["genes_to_peek"]),
            score_moe_p_value_fdr=score_moe_p_value_fdr,
            n_extreme=None,
            title="Peek ({})".format(match_function.__name__),
            **make_match_panel_keyword_arguments,
        )

        y = score_moe_p_value_fdr["Score"].sort_values()

        y.name = "Score ({})".format(match_function.__name__)

        ccal.plot_point_and_annotate(
            None,
            y,
            "y",
            annotation=(("Peek", SETTING["genes_to_peek"], 8, "#20d9ba"),),
            title={"text": target_name},
            html_file_path=os.path.join(
                output_directory_path,
                "{}.plot_point_and_annotate.html".format(match_function.__name__),
            ),
        )