In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from environment import *

with open("setting.yaml") as yaml_file:

    SETTING = yaml.load(yaml_file)

PATH = make_path_dict(SETTING)

In [None]:
target_x_sample = pd.read_csv(PATH["target_x_sample.tsv"], sep="\t", index_col=0)

In [None]:
highlights = {
    "Peek": {"elements": SETTING["gene_sets_to_peek"], "size": 12, "color": "#20d9ba"}
}

In [None]:
from numpy import nonzero

from ccal import plot_and_save


def plot(element_x_y, highlights, title=None, html_file_path=None):

    opacity = 0.24

    is_negative = element_x_y.iloc[:, 0] < 0

    data = [
        dict(
            type="scatter",
            name="-",
            x=element_x_y.iloc[:, 0][is_negative],
            y=element_x_y.iloc[:, 1][is_negative],
            text=element_x_y.index[is_negative],
            mode="markers",
            marker=dict(color="#0088ff", opacity=opacity),
        ),
        dict(
            type="scatter",
            name="+",
            x=element_x_y.iloc[:, 0][~is_negative],
            y=element_x_y.iloc[:, 1][~is_negative],
            text=element_x_y.index[~is_negative],
            mode="markers",
            marker=dict(color="#ff1968", opacity=opacity),
        ),
    ]

    annotations = []

    for group_name, group in highlights.items():

        element_x_y__group = element_x_y.loc[element_x_y.index & group["elements"]]

        data.append(
            dict(
                type="scatter",
                name=group_name,
                x=element_x_y__group.iloc[:, 0],
                y=element_x_y__group.iloc[:, 1],
                text=element_x_y__group.index,
                mode="markers",
                marker=dict(
                    size=group["size"],
                    color=group["color"],
                    line=dict(width=1, color="#ebf6f7"),
                ),
            )
        )

        annotations += [
            dict(
                x=x,
                y=y,
                text=index,
                font=dict(size=10),
                arrowhead=2,
                arrowsize=0.8,
                clicktoshow="onoff",
            )
            for index, (x, y) in element_x_y__group.iterrows()
        ]

    plot_and_save(
        dict(
            layout=dict(
                title=title,
                xaxis=dict(title=element_x_y.columns[0]),
                yaxis=dict(title=element_x_y.columns[1]),
                annotations=annotations,
            ),
            data=data,
        ),
        html_file_path,
        None,
    )

In [None]:
for target_name in target_x_sample.index:

    output_directory_path = "{}/{}".format(
        PATH["compare_differentially_expressed_gene_set/"], target_name
    )

    ccal.establish_path(output_directory_path, "directory")

    match_function_name = "compute_information_coefficient"

    score_moe_p_value_fdr = pd.read_csv(
        "{}/{}/all.{}.tsv".format(
            PATH["find_differentially_expressed_gene_set/"],
            target_name,
            match_function_name,
        ),
        sep="\t",
        index_col=0,
    )

    score_moe_p_value_fdr["1 - P-Value"] = 1 - score_moe_p_value_fdr["P-Value"]

    if not score_moe_p_value_fdr["1 - P-Value"].isna().any():

        plot(
            score_moe_p_value_fdr[["Score", "1 - P-Value"]],
            highlights,
            title="All",
            html_file_path="{}/all.html".format(output_directory_path),
        )

        for gene_set_file_path in SETTING["gene_set_file_paths"]:

            gene_sets_name = gene_set_file_path.split("/")[-1]

            plot(
                score_moe_p_value_fdr.loc[
                    score_moe_p_value_fdr.index
                    & ccal.read_gmt(gene_set_file_path).index,
                    ["Score", "1 - P-Value"],
                ],
                highlights,
                title=gene_sets_name,
                html_file_path="{}/{}.html".format(
                    output_directory_path, gene_sets_name
                ),
            )