In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from environment import *

import analysis as a

p = make_paths(a)

In [None]:
feature_1d_context_matrix = pd.read_table(
    p['feature_1d_context_matrix_file_path'],
    index_col=0,
)

sample_1d_context_matrix = pd.read_table(
    p['sample_1d_context_matrix_file_path'],
    index_col=0,
)

In [None]:
negative_signal_matrix = ccal.make_2d_signal_matrix(
    feature_1d_context_matrix,
    sample_1d_context_matrix,
    'negative',
    features=a.FEATURES,
    samples=a.SAMPLES,
    n_top_feature=a.N_TOP_FEATURE,
    n_top_sample=a.N_TOP_SAMPLE,
    select_feature_automatically=a.SELECT_FEATURE_AUTOMATICALLY,
    select_sample_automatically=a.SELECT_SAMPLE_AUTOMATICALLY,
    feature_normalization_method=a.FEATURE_CONTEXT_NORMALIZATION_METHOD,
    sample_normalization_method=a.SAMPLE_CONTEXT_NORMALIZATION_METHOD,
)

negative_signal_matrix

In [None]:
positive_signal_matrix = ccal.make_2d_signal_matrix(
    feature_1d_context_matrix,
    sample_1d_context_matrix,
    'positive',
    features=a.FEATURES,
    samples=a.SAMPLES,
    n_top_feature=a.N_TOP_FEATURE,
    n_top_sample=a.N_TOP_SAMPLE,
    select_feature_automatically=a.SELECT_FEATURE_AUTOMATICALLY,
    select_sample_automatically=a.SELECT_SAMPLE_AUTOMATICALLY,
    feature_normalization_method=a.FEATURE_CONTEXT_NORMALIZATION_METHOD,
    sample_normalization_method=a.SAMPLE_CONTEXT_NORMALIZATION_METHOD,
)

positive_signal_matrix

In [None]:
if a.SELECT_CONTEXT == 'negative':

    signal_matrix = negative_signal_matrix

elif a.SELECT_CONTEXT == 'positive':

    signal_matrix = positive_signal_matrix

elif a.SELECT_CONTEXT == 'both':

    negative_signal_matrix.index = ('(-) {}'.format(feature) for feature in negative_signal_matrix.index)

    positive_signal_matrix.index = ('(+) {}'.format(feature) for feature in positive_signal_matrix.index)

    signal_matrix = pd.concat((
            negative_signal_matrix,
            positive_signal_matrix,
        ))

signal_matrix.to_csv(
    p['signal_matrix_file_path'],
    sep='\t',
)

signal_matrix

In [None]:
if signal_matrix.size < 1e6:

    ccal.plot_heat_map(
        signal_matrix,
        title='({}) Signal'.format(a.SELECT_CONTEXT),
    )

In [None]:
values = signal_matrix.unstack()

print('{:,} values'.format(len(values)))

not_na_values = values.dropna()

print('{:,} not-NA values'.format(len(not_na_values)))

not_na_or_0_values = not_na_values[not_na_values != 0]

print('{:,} not-NA-or-0 values'.format(len(not_na_or_0_values)))

ccal.plot_distributions(
    (
        'Not-NA Value',
        'Not-NA-or-0 Value',
    ),
    (
        not_na_values,
        not_na_or_0_values,
    ),
    plot_rug=False,
    title='Value Distribution',
)

In [None]:
for element in (
    'feature',
    'sample',
):
    
    if element == 'feature':

        df = signal_matrix

        to_peek = a.FEATURES_TO_PEEK

    elif element == 'sample':

        df = signal_matrix.T

        to_peek = a.SAMPLES_TO_PEEK

    skew_t_pdf_fit_parameter = pd.read_table(
        p['{}_skew_t_pdf_fit_parameter_file_path'.format(element)],
        index_col=0,
    )

    signal_summary = df.sum(axis=1).sort_values()

    if to_peek is None:
        
        ranks = []
        
    else:
        
        ranks = [signal_summary.index.tolist().index(index) for index in signal_matrix.index & to_peek]

    title = '({}) Signal Summary'.format(element)

    ccal.plot_points(
        (
            'All',
            'To Peek',
        ),
        (
            tuple(range(signal_summary.size)),
            ranks,
        ),
        (
            signal_summary,
            signal_summary[ranks],
        ),
        texts=(
            signal_summary.index,
            signal_summary.index[ranks],
        ),
        modes=(
            'markers',
            'markers+text',
        ),
        title=title,
        xaxis_title='Rank',
        yaxis_title=title,
    )