In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from environment import *
from setting import *

In [None]:
feature_x_sample_processed = pd.read_table(
    PATH_DICT["feature_x_sample.processed.tsv"], index_col=0
)

In [None]:
feature_x_sample_processed_normalized = ccal.normalize_df(
    feature_x_sample_processed,
    RAW_SIGNAL_NORMALIZATION_AXIS,
    RAW_SIGNAL_NORMALIZATION_METHOD,
    raise_for_bad=False,
)

In [None]:
if SELECT_FEATURE_AUTOMATICALLY:

    features = ccal.select_series_low_and_high_index(
        feature_x_sample_processed_normalized.std(axis=1),
        title="{} Ranking by Raw Signal".format(FEATURE_ALIAS),
        value_name="Raw Signal Standard Deviation",
    )[1]

else:

    features = feature_x_sample_processed_normalized.index

print("Selected {} {}.".format(features.size, FEATURE_ALIAS))

In [None]:
if SELECT_SAMPLE_AUTOMATICALLY:

    samples = ccal.select_series_low_and_high_index(
        feature_x_sample_processed_normalized.std(),
        title="{} Ranking by Raw Signal".format(SAMPLE_ALIAS),
        value_name="Raw Signal Standard Deviation",
    )[1]

else:

    samples = feature_x_sample_processed_normalized.columns

print("Selected {} {}.".format(samples.size, SAMPLE_ALIAS))

In [None]:
raw_signal = feature_x_sample_processed_normalized.loc[features, samples]

ccal.summarize_feature_x_sample(
    raw_signal,
    feature_x_sample_alias="Raw Signal",
    feature_alias=FEATURE_ALIAS,
    sample_alias=SAMPLE_ALIAS,
    feature_x_sample_value_name="Signal",
    plot_heat_map_max_size=PLOT_HEAT_MAP_MAX_SIZE,
    plot_distributions_max_size=PLOT_DISTRIBUTIONS_MAX_SIZE,
    plot_rug_max_size=PLOT_RUG_MAX_SIZE,
)

In [None]:
raw_signal.to_csv(PATH_DICT["feature_x_sample.raw_signal.tsv"], sep="\t")

In [None]:
feature_context = pd.read_table(
    PATH_DICT["feature_x_sample.feature_context.tsv"], index_col=0
)

feature_context_negative = -feature_context.clip(upper=0)

feature_context_positive = feature_context.clip(lower=0)

In [None]:
sample_context = pd.read_table(
    PATH_DICT["feature_x_sample.sample_context.tsv"], index_col=0
).T

sample_context_negative = -sample_context.clip(upper=0)

sample_context_positive = sample_context.clip(lower=0)

In [None]:
if "feature" in ELEMENT_TYPES and "sample" in ELEMENT_TYPES:

    normalization_axis = None

    feature_context_negative = ccal.normalize_df(
        feature_context_negative,
        normalization_axis,
        CONTEXT_SIGNAL_NORMALIZATION_METHOD,
        raise_for_bad=False,
    )

    feature_context_positive = ccal.normalize_df(
        feature_context_positive,
        normalization_axis,
        CONTEXT_SIGNAL_NORMALIZATION_METHOD,
        raise_for_bad=False,
    )

    sample_context_negative = ccal.normalize_df(
        sample_context_negative,
        normalization_axis,
        CONTEXT_SIGNAL_NORMALIZATION_METHOD,
        raise_for_bad=False,
    )

    sample_context_positive = ccal.normalize_df(
        sample_context_positive,
        normalization_axis,
        CONTEXT_SIGNAL_NORMALIZATION_METHOD,
        raise_for_bad=False,
    )

    context_negative = feature_context_negative + sample_context_negative

    context_positive = feature_context_positive + sample_context_positive

elif "feature" in ELEMENT_TYPES:

    context_negative = feature_context_negative

    context_positive = feature_context_positive

elif "sample" in ELEMENT_TYPES:

    context_negative = sample_context_negative

    context_positive = sample_context_positive

In [None]:
if SELECT_FEATURE_AUTOMATICALLY:

    context_negative_features = ccal.select_series_low_and_high_index(
        context_negative.sum(axis=1),
        title="{} Ranking by Context Negative Signal".format(FEATURE_ALIAS),
        value_name="Negative Signal Sum",
    )[1]

    context_positive_features = ccal.select_series_low_and_high_index(
        context_positive.sum(axis=1),
        title="{} Ranking by Context Positive Signal".format(FEATURE_ALIAS),
        value_name="Positive Signal Sum",
    )[1]

else:

    context_negative_features = (
        context_negative.replace(0, np.nan).dropna(how="all").index
    )

    context_positive_features = (
        context_positive.replace(0, np.nan).dropna(how="all").index
    )

print(
    "Selected {} context negative and {} context positive {}.".format(
        context_negative_features.size, context_positive_features.size, FEATURE_ALIAS
    )
)

In [None]:
if SELECT_SAMPLE_AUTOMATICALLY:

    context_negative_samples = ccal.select_series_low_and_high_index(
        context_negative.sum(),
        title="{} Ranking by Context Negative Signal".format(SAMPLE_ALIAS),
        value_name="Negative Signal Sum",
    )[1]

    context_positive_samples = ccal.select_series_low_and_high_index(
        context_positive.sum(),
        title="{} Ranking by Context Positive Signal".format(SAMPLE_ALIAS),
        value_name="Positive Signal Sum",
    )[1]

else:

    context_negative_samples = (
        context_negative.replace(0, np.nan).dropna(how="all", axis=1).columns
    )

    context_positive_samples = (
        context_positive.replace(0, np.nan).dropna(how="all", axis=1).columns
    )

print(
    "Selected {} context negative and {} context positive {}.".format(
        context_negative_samples.size, context_positive_samples.size, SAMPLE_ALIAS
    )
)

In [None]:
context_negative_signal = context_negative.loc[
    context_negative_features, context_negative_samples
]

context_positive_signal = context_positive.loc[
    context_positive_features, context_positive_samples
]

In [None]:
if "negative" in CONTEXTS and "positive" in CONTEXTS:

    context_negative_signal.index = (
        "(-) {}".format(feature_name) for feature_name in context_negative_signal.index
    )

    context_positive_signal.index = (
        "(+) {}".format(feature_name) for feature_name in context_positive_signal.index
    )

    context_signal = pd.concat((context_negative_signal, context_positive_signal))

elif "negative" in CONTEXTS:

    context_signal = context_negative_signal

elif "positive" in CONTEXTS:

    context_signal = context_positive_signal

ccal.summarize_feature_x_sample(
    context_signal,
    feature_x_sample_alias="Context Signal",
    feature_alias=FEATURE_ALIAS,
    sample_alias=SAMPLE_ALIAS,
    feature_x_sample_value_name="Signal",
    plot_heat_map_max_size=PLOT_HEAT_MAP_MAX_SIZE,
    plot_distributions_max_size=PLOT_DISTRIBUTIONS_MAX_SIZE,
    plot_rug_max_size=PLOT_RUG_MAX_SIZE,
)

In [None]:
if CONTEXT_SIGNAL_NORMALIZATION_METHOD is not None:

    context_signal = ccal.normalize_df(
        context_signal,
        CONTEXT_SIGNAL_NORMALIZATION_AXIS,
        CONTEXT_SIGNAL_NORMALIZATION_METHOD,
        raise_for_bad=False,
    )

    ccal.summarize_feature_x_sample(
        context_signal,
        feature_x_sample_alias="Context Signal",
        feature_alias=FEATURE_ALIAS,
        sample_alias=SAMPLE_ALIAS,
        feature_x_sample_value_name="Signal",
        plot_heat_map_max_size=PLOT_HEAT_MAP_MAX_SIZE,
        plot_distributions_max_size=PLOT_DISTRIBUTIONS_MAX_SIZE,
        plot_rug_max_size=PLOT_RUG_MAX_SIZE,
    )

In [None]:
context_signal.to_csv(PATH_DICT["feature_x_sample.context_signal.tsv"], sep="\t")