# Clustergrammer Appyter

In [None]:
from appyter import magic
magic.init(lambda _=globals: _())

In [None]:
import pandas as pd

# Clustergrammer-Web
import IPython
from IPython.display import display

# Display clustergrammer
from maayanlab_bioinformatics.plotting import display_clustergrammer


In [None]:
%%appyter hide_code_exec

{% do SectionField(
    name = 'Data_Section',
    title = 'Load your data',
    subtitle = 'Please load your data as a tab-separated matrix. \
    Further explanation of input file format can be found \
    <a href="https://clustergrammer.readthedocs.io/matrix_format_io.html"> here </a>.',
    img = 'clustergrammer_screenshot.png') %}

{% do SectionField(
    name = 'Normalization_Section',
    title = 'Select normalization preferences',
    subtitle = 'Choose a normalization method to apply, if desired.',
    img = 'clustergrammer_screenshot.png') %}

{% do SectionField(
    name = 'Filter_Section',
    title = 'Select filtering preferences',
    subtitle = "Choose filtering methods, if desired.",
    img = 'clustergrammer_screenshot.png') %}

{% do SectionField(
    name = 'Cluster_Section',
    title = 'Select clustering preferences',
    subtitle = 'Please select your clustering preferences. If none are selected, default parameters will be used.',
    img = 'clustergrammer_screenshot.png') %}







In [None]:
%%appyter code_exec

data_filename = {{ FileField(
    name = 'data_filename',
    label = 'Data file (tab-separated .txt or .tsv)',
    default = 'rc_two_cats.txt',
    examples = {'rc_two_cats.txt': url_for('static', filename = 'rc_two_cats.txt')},
    description = 'Upload data as tab-separated .txt or .tsv file.',
    section = 'Data_Section') }}



In [None]:
%%appyter code_exec

normalization_method = {{ ChoiceField(
  name='normalization_method',
  label='Normalization method',
  description='Please select a normalization method',
  default='Z-score',
  choices={
    'Z-score': "'zscore'",
    'Quantile normalization': "'qn'",
    'No normalization': "None",
  },
  section='Normalization_Section') }}

normalization_axis = {{ ChoiceField(
  name='normalization_axis',
  label='Normalization axis',
  description='Please select axis to normalize if a normalization method was chosen',
  default='Row',
  choices={
    'Row': "'row'",
    'Column': "'col'",
  },
  section='Normalization_Section') }}

In [None]:
%%appyter code_exec

{% do DescriptionField(name = 'filter_div0', 
                       text = '<hr> <a style="color:tomato;">Filtering methods will be applied in the order selected below: </a> <hr>', 
                       section = 'Filter_Section') %}

filter_method_temp = {{ ChoiceField(
    name='filter_method_temp',
    label='First filtering method',
    description="Please select a filtering method. \
    To choose multiple, hold down the \'Ctrl\' (PC) or \'Cmd\' (Mac) key while selecting your choices",
    choices={
        'Top n (sum or variance)': "'filter_topn'",
        'By category': "'filter_cat'",
        'By row or column name': "'filter_name'",
        'By sum over rows or columns': "'filter_sum'",
        'Values above threshold': "'filter_thresh'",
        'No filtering': "None",
    },
    default='Top n (sum or variance)',
    section='Filter_Section') }}

filter_method_temp2 = {{ ChoiceField(
    name='filter_method_temp2',
    label='Second filtering method',
    description="Please select a filtering method. \
    To choose multiple, hold down the \'Ctrl\' (PC) or \'Cmd\' (Mac) key while selecting your choices",
    choices={
        'Top n (sum or variance)': "'filter_topn'",
        'By category': "'filter_cat'",
        'By row or column name': "'filter_name'",
        'By sum over rows or columns': "'filter_sum'",
        'Values above threshold': "'filter_thresh'",
        'No filtering': "None",
    },
    default='Values above threshold',
    section='Filter_Section') }}

filter_method_temp3 = {{ ChoiceField(
    name='filter_method_temp3',
    label='Third filtering method',
    description="Please select a filtering method. \
    To choose multiple, hold down the \'Ctrl\' (PC) or \'Cmd\' (Mac) key while selecting your choices",
    choices={
        'Top n (sum or variance)': "'filter_topn'",
        'By category': "'filter_cat'",
        'By row or column name': "'filter_name'",
        'By sum over rows or columns': "'filter_sum'",
        'Values above threshold': "'filter_thresh'",
        'No filtering': "None",
    },
    default='No filtering',
    section='Filter_Section') }}


{% do DescriptionField(name = 'filter_div1', 
                       text = '<hr> <a style="color:tomato;">Please select parameters for *applicable* filtering methods below</a> <hr>', 
                       section = 'Filter_Section') %}

#filter_N_top(inst_rc, N_top, rank_type='sum')

topn_type = {{ ChoiceField(
    name = 'topn_type',
    label = 'Top N - Ranking method',
    description = 'Please select a ranking method',
    default = 'Sum',
    choices={
        'Sum' : "'sum'",
        'Variance' : "'variance'"
    },
    section = 'Filter_Section') }}

topn_val = {{ IntField(
    name='topn_val', 
    label='Top N - Select value for <strong>n</strong>', 
    min=0, 
    max=200, 
    default=100, 
    description='Top <strong>N</strong> samples will be used for filtering', 
    section='Filter_Section') }}

topn_axis = {{ ChoiceField(
    name = 'topn_axis',
    label = 'Top N - Axis',
    description = 'Please select if you would like to filter by row or by column',
    default = 'Row',
    choices={
        'Row' : "'row'",
        'Column' : "'col'"
    },
    section = 'Filter_Section') }}

{% do DescriptionField(name = 'filter_div2', 
                       text = '<hr>', 
                       section = 'Filter_Section') %}

#filter_sum(inst_rc, threshold, take_abs=True)

sum_axis = {{ ChoiceField(
    name = 'sum_axis',
    label = 'Sum - Axis',
    description = 'Please select if you would like to filter by row or by column',
    default = 'Row',
    choices={
        'Row' : "'row'",
        'Column' : "'col'"
    },
    section = 'Filter_Section') }}

sum_threshold = {{ IntField(
    name='sum_threshold', 
    label='Sum - Threshold', 
    min=0, 
    max=200, 
    default=1, 
    description='Select threshold for sum across rows / columns', 
    section='Filter_Section') }}

sum_take_abs = {{ BoolField(
    name = 'sum_take_abs', 
    label = 'Sum - Absolute value?', 
    default = 'true', 
    description = 'Select \'Yes\' if you would like to take the absolute value of the threshold. Otherwise, select \'No\'', 
    section = 'Filter_Section') }}

{% do DescriptionField(name = 'filter_div5', 
                       text = '<hr>', 
                       section = 'Filter_Section') %}

#filter_threshold(inst_rc, threshold, num_occur=1)

threshold_axis = {{ ChoiceField(
    name = 'threshold_axis',
    label = 'Threshold - Axis',
    description = 'Please select if you would like to filter by row or by column',
    default = 'Row',
    choices={
        'Row' : "'row'",
        'Column' : "'col'"
    },
    section = 'Filter_Section') }}

threshold_threshold = {{ IntField(
    name='threshold_threshold', 
    label='Threshold - Threshold', 
    min=0, 
    max=200, 
    default=1, 
    description='Select threshold value', 
    section='Filter_Section') }}

threshold_num_occur = {{ IntField(
    name='threshold_num_occur', 
    label='Threshold - Number of occurrences', 
    min=0, 
    max=200, 
    default=1, 
    description='Number of occurrences above threshold', 
    section='Filter_Section') }}


In [None]:
%%appyter code_exec

cluster_dist_type = {{ ChoiceField(
    name = 'cluster_dist_type',
    label = 'Distance metric',
    description = 'Please select a distance metric for clustering',
    default = 'Cosine',
    choices = {
        'Cosine' : "'cosine'",
        'Euclidean' : "'euclidean'",
        'Minkowski' : "'minkowski'",
        'Jaccard' : "'jaccard'"},
    section = 'Cluster_Section') }}


cluster_dendro = {{ BoolField(
    name = 'cluster_dendro', 
    label = 'Generate <a href="https://clustergrammer.readthedocs.io/interacting_with_viz.html#interactive-dendrogram">\
    dendrogram</a>?',
    default = 'true', 
    description = 'Select \'Yes\' if you would like generate an interactive dendrogram. Otherwise, select \'No\'', 
    section = 'Cluster_Section') }}



# Clustergrammer

Analysis using Clustergrammer

Fernandez, N. F. et al. Clustergrammer, a web-based heatmap visualization and analysis tool for high-dimensional biological data. Sci. Data 4:170151 doi: 10.1038/sdata.2017.151 (2017).

## Load net and data

In [None]:
import clustergrammer
from clustergrammer import Network
net = Network()

In [None]:
try:
    net.load_file(data_filename)
    print(f'{data_filename} loaded successfully for Clustergrammmer')
except:
    print(f'Could not load input file: {data_filename}')


## Normalization

In [None]:
if normalization_method is not None:
    try:
        net.normalize(axis=normalization_axis, norm_type=normalization_method)
        print(f'Normalized {normalization_axis}s using {normalization_method}')
    except:
        print(f'Could not normalize using {normalization_method}')

## Filtering

In [None]:
filter_method_ordered = []
filter_method_ordered.extend([x for x in [filter_method_temp, filter_method_temp2, filter_method_temp3] if x is not None])

filter_method_ordered

In [None]:
# Filter the matrix rows or columns based on sum/variance, and only keep the top N.

for method in filter_method_ordered:
    if method == 'filter_topn':
        try:
            net.filter_N_top(inst_rc=topn_axis, N_top=topn_val, rank_type=topn_type)
            print(f'Filtered top {topn_val} samples by {topn_axis}')
        except:
            print("Error: Could not filter top N")
    '''if method == 'filter_cat':
        try:
            net.filter_cat(axis=cat_axis, cat_index=cat_index, cat_name=cat_name)
            print(f'Filtered {cat_axis} number {cat_index}')
        except:
            print("Error: Could not filter by category")
    if method == 'filter_name':
        try:
            net.filter_names(axis=names_axis, names=names_names)
            print(f'Filtered the following {names_axis}s: {names_names}')
        except:
            print("Error: Could not filter by name")'''
    if method == 'filter_sum':
        try:
            net.filter_sum(inst_rc=sum_axis, threshold=sum_threshold, take_abs=sum_take_abs)
            print(f'Filtered {sum_axis}s under {sum_threshold} threshold, {"taking" if sum_take_abs == True else "not taking"} absolute value')
        except:
            print("Error: Could not filter by sum")
    if method == 'filter_thresh':
        try:
            net.filter_threshold(inst_rc=threshold_axis, threshold=threshold_threshold, num_occur=threshold_num_occur)
            print(f'Filtered for values with {threshold_num_occur}{"s" if threshold_num_occur > 1 else ""} occurrences above {threshold_threshold} threshold')
        except:
            print("Error: Could not filter by threshold")
    

## Clustering

In [None]:
try:
    net.cluster(dist_type=cluster_dist_type, 
            run_clustering=True, 
            dendro=cluster_dendro, 
            views=['N_row_sum', 'N_row_var'], 
            linkage_type='average', 
            sim_mat=False, 
            filter_sim=0.1, 
            calc_cat_pval=False, 
            run_enrichr=None, 
            enrichrgram=None)
    print(f'Clustering preferences selected: {cluster_dist_type} distance, {"loading" if cluster_dendro == True else "not loading"} dendrogram')
except:
    print('ERROR: net.cluster(...) method did not run successfully')

## Generate and display Clustergram

In [None]:
display_clustergrammer(net)