In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from environment import *
from setting import *

In [None]:
if 'feature' in ELEMENTS:
    
    feature_1d_context_matrix = pd.read_table(
        PATH_DICT['feature_context_matrix_file_path'],
        index_col=0,
    )
    
if 'sample' in ELEMENTS:
    
    sample_1d_context_matrix = pd.read_table(
        PATH_DICT['sample_context_matrix_file_path'],
        index_col=0,
    )

In [None]:
if 'feature' in ELEMENTS and 'sample' in ELEMENTS:
    
    negative_signal_matrix = -feature_1d_context_matrix.clip(upper=0) + -sample_1d_context_matrix.clip(upper=0).T
    
    positive_signal_matrix = feature_1d_context_matrix.clip(lower=0) + sample_1d_context_matrix.clip(lower=0).T
    
elif 'feature' in ELEMENTS:
    
    negative_signal_matrix = -feature_1d_context_matrix.clip(upper=0)
    
    positive_signal_matrix = feature_1d_context_matrix.clip(lower=0)
    
elif 'sample' in ELEMENTS:
    
    negative_signal_matrix = -sample_1d_context_matrix.clip(upper=0).T
    
    positive_signal_matrix = sample_1d_context_matrix.clip(lower=0).T
    
negative_signal_matrix.index = ('(-) {}'.format(feature) for feature in negative_signal_matrix.index)

positive_signal_matrix.index = ('(+) {}'.format(feature) for feature in positive_signal_matrix.index)

In [None]:
signal_matrix = pd.concat((
    negative_signal_matrix,
    positive_signal_matrix,
))

signal_matrix.to_csv(
    PATH_DICT['signal_matrix_file_path'],
    sep='\t',
)

signal_matrix

In [None]:
from ccal import plot_points
from pandas import Index


def select_df_index(
    df,
    select_automatically,
    n_top,
    name,
):
    
    if select_automatically:
        
        index_sum = df.sum(axis=1)
        
        threshold = index_sum.mean() + index_sum.std() / 2
        
        df_index = index_sum.index[threshold < index_sum]
        
        plot_points(
            (tuple(range(index_sum.size)), ) * 2,
            (
                index_sum.sort_values(),
                (threshold, ) * index_sum.size,
            ),
            names=(
                'Index Sum',
                'Threshold',
            ),
            modes=(
                'markers',
                'lines',
            ),
            title='Select DataFrame Index<br>{}'.format(name),
            xaxis_title='Rank',
            yaxis_title='{} Value'.format(name),
        )
        
    elif n_top is not None:
        
        df_index = Index(())
        
        for column_name, column_values in df.items():
            
            df_index |= column_values.dropna().sort_values()[-n_top:].index
            
    else:
        
        df_index = df.index
        
    print('Selected {} DataFrame index of: {}.'.format(
        df_index.size,
        name,
    ))
    
    return df_index

In [None]:
selected_negative_features = select_df_index(
    negative_signal_matrix,
    SELECT_FEATURE_AUTOMATICALLY,
    N_TOP_FEATURE,
    '(feature) Negative Signal Matrix',
)

selected_positive_features = select_df_index(
    positive_signal_matrix,
    SELECT_FEATURE_AUTOMATICALLY,
    N_TOP_FEATURE,
    '(feature) Positive Signal Matrix',
)

selected_negative_samples = select_df_index(
    negative_signal_matrix.T,
    SELECT_SAMPLE_AUTOMATICALLY,
    N_TOP_SAMPLE,
    '(sample) Negative Signal Matrix',
)

selected_positive_samples = select_df_index(
    positive_signal_matrix.T,
    SELECT_SAMPLE_AUTOMATICALLY,
    N_TOP_SAMPLE,
    '(sample) Positive Signal Matrix',
)

In [None]:
if 'negative' in CONTEXTS and 'positive' in CONTEXTS:
    
    selected_features = selected_negative_features | selected_positive_features
    
    selected_samples = selected_negative_samples | selected_positive_samples
    
elif 'negative' in CONTEXTS:
    
    selected_features = selected_negative_features
    
    selected_samples = selected_negative_samples
    
elif 'positive' in CONTEXTS:
    
    selected_features = selected_positive_features
    
    selected_samples = selected_positive_samples
    
selected_signal_matrix = signal_matrix.loc[selected_features, selected_samples]

In [None]:
selected_signal_matrix = pd.DataFrame(
     ccal.normalize_nd_array(
         selected_signal_matrix.values,
         1,
         '0-1',
         raise_for_bad=False,
     ),
     index=selected_signal_matrix.index,
     columns=selected_signal_matrix.columns,
 )

selected_signal_matrix.to_csv(
    PATH_DICT['selected_signal_matrix_file_path'],
    sep='\t',
)

ccal.summarize_feature_x_sample(
    selected_signal_matrix,
    title='Selected Signal Matrix',
)

selected_signal_matrix