In [None]:
%load_ext autoreload

%autoreload 2

In [None]:
from environment import *

import setting

path_dict = path(setting)

In [None]:
feature_1d_context_matrix = pd.read_table(
    path_dict['feature_context_matrix_file_path'],
    index_col=0,
)

sample_1d_context_matrix = pd.read_table(
    path_dict['sample_context_matrix_file_path'],
    index_col=0,
)

In [None]:
def normalize_context_matrix(df):
    
    df = pd.DataFrame(
        ccal.normalize_nd_array(
            df.values,
            1,
            '0-1',
            raise_for_bad_value=False,
        ),
        df.index,
        df.columns,
    )

    rows_without_signal = df.index[df.isna().all(axis=1)]

    if rows_without_signal.size:

        print('Setting 0 for {} element(s) ...'.format(rows_without_signal.size))

        df.loc[rows_without_signal] = 0
    
    return df

In [None]:
feature_1d_context_matrix_negative = feature_1d_context_matrix.copy()

feature_1d_context_matrix_negative[0 < feature_1d_context_matrix_negative] = 0

feature_1d_context_matrix_negative *= -1

feature_1d_context_matrix_negative = normalize_context_matrix(feature_1d_context_matrix_negative)

In [None]:
sample_1d_context_matrix_negative = sample_1d_context_matrix.copy()

sample_1d_context_matrix_negative[0 < sample_1d_context_matrix_negative] = 0

sample_1d_context_matrix_negative *= -1

sample_1d_context_matrix_negative = normalize_context_matrix(sample_1d_context_matrix_negative)

In [None]:
negative_signal_matrix = feature_1d_context_matrix_negative + sample_1d_context_matrix_negative.T

negative_signal_matrix.index = ('(-) {}'.format(feature) for feature in negative_signal_matrix.index)

In [None]:
feature_1d_context_matrix_positive = feature_1d_context_matrix.copy()

feature_1d_context_matrix_positive[feature_1d_context_matrix_positive < 0] = 0

feature_1d_context_matrix_positive = normalize_context_matrix(feature_1d_context_matrix_positive)

In [None]:
sample_1d_context_matrix_positive = sample_1d_context_matrix.copy()

sample_1d_context_matrix_positive[sample_1d_context_matrix_positive < 0] = 0

sample_1d_context_matrix_positive = normalize_context_matrix(sample_1d_context_matrix_positive)

In [None]:
positive_signal_matrix = feature_1d_context_matrix_positive + sample_1d_context_matrix_positive.T

positive_signal_matrix.index = ('(+) {}'.format(feature) for feature in positive_signal_matrix.index)

In [None]:
signal_matrix = pd.concat((
        negative_signal_matrix,
        positive_signal_matrix,
    ))

signal_matrix

In [None]:
signal_matrix.to_csv(
    path_dict['signal_matrix_file_path'],
    sep='\t',
)

In [None]:
selected_negative_features = ccal.select_elements_by_context(
    feature_1d_context_matrix,
    'negative',
    n_top=setting.N_TOP_FEATURE,
    select_automatically=setting.SELECT_FEATURE_AUTOMATICALLY,
)

selected_negative_features = ['(-) {}'.format(feature) for feature in selected_negative_features]

print(len(selected_negative_features))

In [None]:
selected_positive_features = ccal.select_elements_by_context(
    feature_1d_context_matrix,
    'positive',
    n_top=setting.N_TOP_FEATURE,
    select_automatically=setting.SELECT_FEATURE_AUTOMATICALLY,
)

selected_positive_features = ['(+) {}'.format(feature) for feature in selected_positive_features]

print(len(selected_positive_features))

In [None]:
if setting.SELECT_CONTEXT == 'negative':
    
    selected_features = selected_negative_features
    
elif setting.SELECT_CONTEXT == 'positive':
    
    selected_features = selected_positive_features
    
elif setting.SELECT_CONTEXT == 'both':
    
    selected_features = selected_negative_features + selected_positive_features

In [None]:
selected_signal_matrix = signal_matrix.loc[selected_features]

selected_signal_matrix

In [None]:
selected_signal_matrix.to_csv(
    path_dict['selected_signal_matrix_file_path'],
    sep='\t',
)

In [None]:
if selected_signal_matrix.size < 1e6:

    ccal.plot_heat_map(
        selected_signal_matrix,
        title='Selected Signal',
    )

In [None]:
values = selected_signal_matrix.unstack()

print('{:,} values'.format(len(values)))

not_na_values = values.dropna()

print('{:,} not-NA values'.format(len(not_na_values)))

not_na_or_0_values = not_na_values[not_na_values != 0]

print('{:,} not-NA-or-0 values'.format(len(not_na_or_0_values)))

ccal.plot_distributions(
    (
        'Not-NA Value',
        'Not-NA-or-0 Value',
    ),
    (
        not_na_values,
        not_na_or_0_values,
    ),
    plot_rug=False,
    title='Value Distribution',
)

In [None]:
for element in (
    'feature',
    'sample',
):
    
    if element == 'feature':

        df = selected_signal_matrix

        to_peek = setting.FEATURES_TO_PEEK

    elif element == 'sample':

        df = selected_signal_matrix.T

        to_peek = setting.SAMPLES_TO_PEEK

    skew_t_pdf_fit_parameter = pd.read_table(
        path_dict['{}_skew_t_pdf_fit_parameter_file_path'.format(element)],
        index_col=0,
    )

    signal_summary = df.sum(axis=1).sort_values()

    if to_peek is None:
        
        ranks = []
        
    else:
        
        ranks = [signal_summary.index.tolist().index(index) for index in signal_matrix.index & to_peek]

    title = '({}) Selected Signal Sum'.format(element)

    ccal.plot_points(
        (
            'All',
            'To Peek',
        ),
        (
            tuple(range(signal_summary.size)),
            ranks,
        ),
        (
            signal_summary,
            signal_summary[ranks],
        ),
        texts=(
            signal_summary.index,
            signal_summary.index[ranks],
        ),
        modes=(
            'markers',
            'markers+text',
        ),
        title=title,
        xaxis_title='Rank',
        yaxis_title=title,
    )