In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from environment import *

with open("setting.yaml") as yaml_file:

    SETTING = yaml.load(yaml_file)

PATH = make_path_dict(SETTING)

CCAL version 0.9.1 @ /home/kwatme/github/ccal/ccal/__init__.py


In [3]:
w = pd.read_table(PATH["w.tsv"], index_col=0)

w.columns.name = "Factor"

h = pd.read_table(PATH["h.tsv"], index_col=0)

h.columns.name = SETTING["sample_alias"]

In [4]:
for feature_type, w_or_h, df, apply_axis, drop_axis in (
    ("feature", "w", w, 1, 0),
    ("sample", "h", h, 0, 1),
):

    if SETTING["gps_map_caotic_{}_element_fraction_to_drop".format(w_or_h)] is not None:

        print("{} shape before: {}".format(w_or_h.title(), df.shape))

        element_entropy = df.apply(ccal.compute_entropy, axis=apply_axis)

        df.drop(
            ccal.select_series_indices(
                element_entropy,
                ">",
                fraction=SETTING[
                    "gps_map_caotic_{}_element_fraction_to_drop".format(w_or_h)
                ],
                title=dict(
                    text="{} Ranking by Entropy in {}".format(
                        SETTING["{}_alias".format(feature_type)], w_or_h.title()
                    )
                ),
                xaxis=dict(title="Rank"),
                yaxis=dict(title="Entropy"),
            ),
            axis=drop_axis,
            inplace=True,
        )

        print("{} shape after: {}".format(w_or_h.title(), df.shape))

In [5]:
gps_map = ccal.GPSMap(
    w=w.T,
    h=h,
    w_n_pull=SETTING["gps_map_w_n_pull"],
    w_pull_power=SETTING["gps_map_w_pull_power"],
    h_n_pull=SETTING["gps_map_h_n_pull"],
    h_pull_power=SETTING["gps_map_h_pull_power"],
    plot=False,
)

In [6]:
for element_type, w_or_h, elements in (
    ("feature", "w", gps_map.w_elements),
    ("sample", "h", gps_map.h_elements),
):

    title = "{}<br>{} GPS Map".format(
        SETTING["feature_x_sample_alias"], SETTING["{}_alias".format(element_type)]
    )

    gps_map.plot_gps_map(
        w_or_h,
        element_marker_size=SETTING["gps_map_{}_element_marker_size".format(w_or_h)],
        title=title,
        html_file_path="{}/gps_map.html".format(PATH["{}|gps_map/".format(w_or_h)]),
    )

    if os.path.isfile(PATH["{}|cluster_x_column.tsv".format(w_or_h)]):

        gps_map.set_element_labels(
            w_or_h,
            pd.read_table(PATH["{}|cluster_x_column.tsv".format(w_or_h)], index_col=0)
            .apply(lambda column: column.argmax())
            .str.lstrip("C")
            .astype(int)[elements],
            bandwidth_factor=SETTING["gps_map_{}_bandwidth_factor".format(w_or_h)],
        )

        if SETTING["plotly_directory_path"] is None:

            plotly_html_file_path = None

        else:

            plotly_html_file_path = "{}/{} GPS Map.html".format(
                SETTING["plotly_directory_path"], element_type.title()
            )

        gps_map.plot_gps_map(
            w_or_h,
            element_marker_size=SETTING[
                "gps_map_{}_element_marker_size".format(w_or_h)
            ],
            title=title,
            html_file_path="{}/gps_map.html".format(
                PATH["{}|hcc|gps_map/".format(w_or_h)]
            ),
            plotly_html_file_path=plotly_html_file_path,
        )

file:///media/kwatme/CarrotCake/project/model_and_infer.gtex_v7_ribosomal_rna_in_blood_and_brain/output/signal/raw/nmf/3/w/gps_map/gps_map.html


file:///media/kwatme/CarrotCake/project/model_and_infer.gtex_v7_ribosomal_rna_in_blood_and_brain/output/signal/raw/nmf/3/h/gps_map/gps_map.html


In [7]:
ccal.dump_gps_map(gps_map, PATH["gps_map.pickle.gz"])

In [8]:
signal = pd.read_table(PATH["feature_x_sample.signal.tsv"], index_col=0)

signal.columns.name = SETTING["sample_alias"]

signal.fillna(value=0, inplace=True)

In [9]:
for element_type, w_or_h, axis in (("feature", "w", 1), ("sample", "h", 0)):

    for index, element_value in pd.DataFrame(
        [signal.mean(axis=axis), signal.std(axis=axis), (signal == 0).sum(axis=axis)],
        index=("Mean", "Standard Deviation", "Number of 0"),
    ).iterrows():

        gps_map.plot_gps_map(
            w_or_h,
            annotation_x_element=element_value.to_frame().T,
            element_marker_size=SETTING[
                "gps_map_{}_element_marker_size".format(w_or_h)
            ],
            title="{} GPS Map<br>{}".format(
                SETTING["{}_alias".format(element_type)], index
            ),
        )