[notebook_Readme.md on github](https://github.com/KnowEnG/Spreadsheets_Transformation/blob/master/docs/notebook_Readme.md)

In [1]:
%%html
<style>div.input {display:none;} div.output_stderr{display:none}</style>

In [2]:
"""    To Start This Notebook Click On:    Cell > Run All    (in the jupyter menu above)    """
import warnings
warnings.filterwarnings('ignore')

import os
import io
import sys

import pandas as pd
from pandas.io.common import EmptyDataError
import numpy as np

from IPython.display import display, HTML
import ipywidgets as widgets
import traitlets

from knpackage import toolbox as kn

sys.path.insert(1, '../Spreadsheets_Transformation/src')
import spreadsheets_transformation_toolbox as stt
from layout_notebooks import *

DEFAULT_INPUT_FILES = {'transpose': 'transpose_spreadsheet.tsv', 
                       'common_samples_1': 'intersect_spreadsheet_A.tsv', 
                       'common_samples_2': 'intersect_spreadsheet_B.tsv', 
                       'merge_1': 'merge_spreadsheet_A.txt', 
                       'merge_2': 'merge_spreadsheet_B.txt',
                       'select_rows_spreadsheet': 'select_rows_spreadsheet.tsv',
                       'select_rows_list': 'select_rows_list.txt',
                       'select_averages_spreadsheet': 'average_spreadsheet.tsv',
                       'select_averages_dict': 'average_labels.tsv',
                       'select_categorical_spreadsheet': 'select_phenotype_spreadsheet.df',
                       'select_categorical_phenotype': 'select_phenotype_phenotype.txt',
                       'numerical_spreadsheet': 'other_transforms_spreadsheet.tsv',
                       'stats_spreadsheet': 'descriptive_statistic_spreadsheet.tsv',
                      }

results_dir = USER_RESULTS_DIRECTORY
input_data_dir = USER_DATA_DIRECTORY

In [None]:
show_cell_title('Category to Binary')

def category_column_to_binary_matrix(button):
    """ local function - read phenotype file and selected category - write a binar samples x categories spreadsheet
    
    Args: 
        button:         an ipywidgets.Button object with 2 ipywidgets.Dropdown (.file_selector) objects containing 
                        the selected file names in the .value fields.
    """
    if button.description == 'Clear':
        visualize_selected_file(button)
        return
    
    run_parameters = {}
    run_parameters['results_directory'] = button.results_directory
    phenotype_file_name = os.path.join(input_data_dir, get_category_file_button.file_selector.value)
    column_id = select_column_dropdown.value
        
    cat_bin_df = stt.select_categorical_binary(phenotype_file_name, column_id)
    
    cat_bin_result_file_name = os.path.join(button.results_directory, 'categorical_binary' + '.tsv')
    cat_bin_df.to_csv(cat_bin_result_file_name, sep='\t',float_format='%g')
    
    button.fname_list           = [cat_bin_result_file_name]
    visualize_selected_file(button)
    
no_file_selected_columns_list = ['no file', 'no file']
select_column_dropdown = widgets.Dropdown(options=no_file_selected_columns_list,
                                           value=no_file_selected_columns_list[0],
                                           description='phenotype:')

def set_columns_dropdown_options(change):
    """ set the columns options based on file selection """
    file_name = os.path.join(input_data_dir, get_category_file_button.file_selector.value)
    if os.path.isfile(file_name):
        selected_df = pd.read_csv(file_name, sep='\t', index_col=0, header=0)
        columns_list = np.unique(selected_df.columns.tolist())
        select_column_dropdown.options = columns_list
        select_column_dropdown.value = columns_list[0]

get_category_file_button                   = get_select_view_file_button_set(input_data_dir)
get_category_file_button.file_selector.observe(set_columns_dropdown_options, names='value')
if os.path.isfile(os.path.join(input_data_dir, DEFAULT_INPUT_FILES['select_categorical_phenotype'])):
    get_category_file_button.file_selector.value \
                                                = DEFAULT_INPUT_FILES['select_categorical_phenotype']
    

cat_col_execute_button        = get_single_file_execute_button(
                                    input_data_dir, 
                                    results_dir, 
                                    file_selector=get_category_file_button.file_selector, 
                                    button_name='Select',
                                    )
cat_col_execute_button.on_click(category_column_to_binary_matrix)

# display control widgets:
show_select_view_button(get_category_file_button)
show_widget_left(select_column_dropdown)
show_execute_button(cat_col_execute_button)

In [3]:
show_cell_title('Transpose')

def transpose_selected_file(button):
    """ callback for the transpose_execute_button
    
    Args: 
        button:         an ipywidgets.Button object with an ipywidgets.Dropdown (.file_selector) object containing 
                        the selected file name as its .value field.
    """
    if button.description == 'Clear':
        visualize_selected_file(button)
        return
    
    input_data_directory        = button.input_data_directory
    input_file_name             = button.file_selector.value
    spreadsheet_T_df            = stt.transpose_df(
                                    kn.get_spreadsheet_df(
                                        os.path.join(input_data_directory, input_file_name)
                                    ))

    results_directory           = button.results_directory
    transform_name              = "transpose"
    result_file_name            = stt.get_outfile_name(
                                    results_directory, 
                                    input_file_name, 
                                    transform_name, 
                                    timestamp=False,
                                    )
    spreadsheet_T_df.to_csv(result_file_name, sep='\t',float_format='%g')
    
    button.fname_list           = [result_file_name]
    visualize_selected_file(button)
    
# get the control widgets
get_transpose_file_button       = get_select_view_file_button_set(input_data_dir)
if os.path.isfile(os.path.join(input_data_dir, DEFAULT_INPUT_FILES['transpose'])):
    get_transpose_file_button.file_selector.value \
                                = DEFAULT_INPUT_FILES['transpose']

transpose_execute_button        = get_single_file_execute_button(
                                    input_data_dir, 
                                    results_dir, 
                                    file_selector=get_transpose_file_button.file_selector, 
                                    button_name='Transpose',
                                    )
transpose_execute_button.on_click(transpose_selected_file)

# display control widgets
show_select_view_button(get_transpose_file_button)
show_execute_button(transpose_execute_button)

In [4]:
show_cell_title('Intersect')

def get_common_samples(button):
    """ callback for common_samples_execute_button
    
    Args: 
        button:         an ipywidgets.Button object with 2 ipywidgets.Dropdown (.file_selector) objects containing 
                        the selected file names in the .value fields.

    """
    if button.description == 'Clear':
        visualize_selected_file(button)
        return

    input_data_directory                = button.input_data_directory    
    spreadsheet_1_file_name             = os.path.join(input_data_directory, button.file_1_selector.value)
    spreadsheet_2_file_name             = os.path.join(input_data_directory, button.file_2_selector.value)
    transform_name                      = "common_samples"
    
    spreadsheet_1_df                    = kn.get_spreadsheet_df(spreadsheet_1_file_name)
    spreadsheet_2_df                    = kn.get_spreadsheet_df(spreadsheet_2_file_name)
    
    spreadsheet_1_df, spreadsheet_2_df  = stt.common_samples_df(spreadsheet_1_df, spreadsheet_2_df)
    
    results_directory                   = button.results_directory
    result_1_file_name                  = stt.get_outfile_name(
                                            results_directory, 
                                            spreadsheet_1_file_name, 
                                            transform_name, 
                                            timestamp=False,
                                            )
    spreadsheet_1_df.to_csv(result_1_file_name, sep='\t', float_format='%g')
    
    result_2_file_name                  = stt.get_outfile_name(
                                            results_directory, 
                                            spreadsheet_2_file_name, 
                                            transform_name, 
                                            timestamp=False,
                                            )
    spreadsheet_2_df.to_csv(result_2_file_name, sep='\t', float_format='%g')
    
    button.fname_list                   = [spreadsheet_1_file_name, spreadsheet_2_file_name]
    visualize_selected_file(button)

# get spreadsheet 1 control widgets
common_samples_flistbx_1_view_button    = get_select_view_file_button_set(input_data_dir)
if os.path.isfile(os.path.join(input_data_dir, DEFAULT_INPUT_FILES['common_samples_1'])):
    common_samples_flistbx_1_view_button.file_selector.value = DEFAULT_INPUT_FILES['common_samples_1']

# get spreadsheet 2 control widgets
common_samples_flistbx_2_view_button    = get_select_view_file_button_set(input_data_dir)
if os.path.isfile(os.path.join(input_data_dir, DEFAULT_INPUT_FILES['common_samples_2'])):
    common_samples_flistbx_2_view_button.file_selector.value = DEFAULT_INPUT_FILES['common_samples_2']

# get the execute button
file_select_dict = {    'file_1_selector': common_samples_flistbx_1_view_button.file_selector,
                        'file_2_selector': common_samples_flistbx_2_view_button.file_selector }
common_samples_execute_button           = get_two_files_execute_button(
                                            input_data_dir, 
                                            results_dir, 
                                            file_select_dict=file_select_dict,
                                            button_name='Intersect')
common_samples_execute_button.on_click(get_common_samples)

# show the controls
show_select_view_button(common_samples_flistbx_1_view_button)
show_select_view_button(common_samples_flistbx_2_view_button)
show_execute_button(common_samples_execute_button)

In [5]:
show_cell_title('Merge')

def merge_spreadsheets(button):
    """ Callback for merge_execute_button
        
    Args: 
        button:         an ipywidgets.Button object with 2 ipywidgets.Dropdown (.file_selector) objects containing 
                        the selected file names in the .value fields.
    """
    if button.description == 'Clear':
        visualize_selected_file(button)
        return

    input_data_directory                = button.input_data_directory    
    spreadsheet_1_file_name             = os.path.join(input_data_directory, button.file_1_selector.value)
    spreadsheet_2_file_name             = os.path.join(input_data_directory, button.file_2_selector.value)
    transform_name                      = "merge"
    
    spreadsheet_1_df                    = kn.get_spreadsheet_df(spreadsheet_1_file_name)
    spreadsheet_2_df                    = kn.get_spreadsheet_df(spreadsheet_2_file_name)
    
    merge_df                            = stt.merge_df(spreadsheet_1_df, spreadsheet_2_df)
    
    results_directory                   = button.results_directory
    merge_file_name                     = stt.get_outfile_name(
                                            results_directory, 
                                            spreadsheet_1_file_name, 
                                            transform_name, 
                                            timestamp=False,
                                            )
    merge_df.to_csv(merge_file_name, sep='\t', float_format='%g')
    
    button.fname_list                   = [merge_file_name]
    visualize_selected_file(button)

# get spreadsheet 1 control widgets
merge_flistbx_1_view_button             = get_select_view_file_button_set(input_data_dir)
if os.path.isfile(os.path.join(input_data_dir, DEFAULT_INPUT_FILES['merge_1'])):
    merge_flistbx_1_view_button.file_selector.value = DEFAULT_INPUT_FILES['merge_1']

# get spreadsheet 2 control widgets
merge_flistbx_2_view_button             = get_select_view_file_button_set(input_data_dir)
if os.path.isfile(os.path.join(input_data_dir, DEFAULT_INPUT_FILES['merge_2'])):
    merge_flistbx_2_view_button.file_selector.value = DEFAULT_INPUT_FILES['merge_2']

# get the execute button
merge_select_dict = {    'file_1_selector': merge_flistbx_1_view_button.file_selector,
                        'file_2_selector': merge_flistbx_2_view_button.file_selector }
merge_execute_button                    = get_two_files_execute_button(
                                            input_data_dir, 
                                            results_dir, 
                                            file_select_dict=merge_select_dict,
                                            button_name='Merge')
merge_execute_button.on_click(merge_spreadsheets)

# show the controls
show_select_view_button(merge_flistbx_1_view_button)
show_select_view_button(merge_flistbx_2_view_button)
show_execute_button(merge_execute_button)

In [6]:
show_cell_title('Select Rows')

def read_a_list_file(input_file_name):
    """ local function to read a list from a text file
    
    Args:
        input_file_name:    full path name of a file containing a list
    
    Returns:
        string_input:       the list that was contained in the file
    """
    with open(input_file_name, 'r') as fh:
        str_input = fh.read()
    return list(str_input.split())

def select_genes(button):
    """ local function to read files and get common samples and write 
        using the Spreadsheets_Transformation - spreadsheets_transformation_toolbox
    
    Args: 
        button:         an ipywidgets.Button object with 2 ipywidgets.Dropdown (.file_selector) objects containing 
                        the selected file names in the .value fields.
    """
    if button.description == 'Clear':
        visualize_selected_file(button)
        return

    input_data_directory                = button.input_data_directory    
    spreadsheet_1_file_name             = os.path.join(input_data_directory, button.file_1_selector.value)
    spreadsheet_2_file_name             = os.path.join(input_data_directory, button.file_2_selector.value)
    transform_name                      = "select_rows"
    
    gene_samples_df                     = kn.get_spreadsheet_df(spreadsheet_1_file_name)
    gene_list_df                        = read_a_list_file(spreadsheet_2_file_name)

    select_spreadsheet_df               = stt.select_genes_df(gene_samples_df, gene_list_df)
    
    results_directory                   = button.results_directory
    gene_rows_file_name                 = stt.get_outfile_name(
                                            results_directory, 
                                            spreadsheet_1_file_name, 
                                            transform_name, 
                                            timestamp=False,
                                            )
    select_spreadsheet_df.to_csv(gene_rows_file_name, sep='\t', float_format='%g')
    button.fname_list                   = [gene_rows_file_name]
    visualize_selected_file(button)

# get spreadsheet 1 control widgets
select_rows_flistbx_1_view_button       = get_select_view_file_button_set(input_data_dir)
if os.path.isfile(os.path.join(input_data_dir, DEFAULT_INPUT_FILES['select_rows_spreadsheet'])):
    select_rows_flistbx_1_view_button.file_selector.value = DEFAULT_INPUT_FILES['select_rows_spreadsheet']

# get spreadsheet 2 control widgets
select_rows_flistbx_2_view_button       = get_select_view_file_button_set(input_data_dir)
if os.path.isfile(os.path.join(input_data_dir, DEFAULT_INPUT_FILES['select_rows_list'])):
    select_rows_flistbx_2_view_button.file_selector.value = DEFAULT_INPUT_FILES['select_rows_list']

# get the execute button
row_file_select_dict = {    'file_1_selector': select_rows_flistbx_1_view_button.file_selector,
                            'file_2_selector': select_rows_flistbx_2_view_button.file_selector }
select_rows_execute_button              = get_two_files_execute_button(
                                            input_data_dir, 
                                            results_dir, 
                                            file_select_dict=row_file_select_dict,
                                            button_name='Select')
select_rows_execute_button.on_click(select_genes)

# show the controls
show_select_view_button(select_rows_flistbx_1_view_button)
show_select_view_button(select_rows_flistbx_2_view_button)
show_execute_button(select_rows_execute_button)

In [7]:
show_cell_title('Average on Labels')

def get_cluster_averages(button):
    """ local function to get the averages for the dictionay selected samples in a spreadsheet
        using the Spreadsheets_Transformation - spreadsheets_transformation_toolbox
    
    Args: 
        button:         an ipywidgets.Button object with 2 ipywidgets.Dropdown (.file_selector) objects containing 
                        the selected file names in the .value fields.
    """
    if button.description == 'Clear':
        visualize_selected_file(button)
        return

    input_data_directory                = button.input_data_directory    
    spreadsheet_1_file_name             = os.path.join(input_data_directory, button.file_1_selector.value)
    spreadsheet_2_file_name             = os.path.join(input_data_directory, button.file_2_selector.value)
    transform_name                      = "selected_averages"
    
    spreadsheet_df                      = kn.get_spreadsheet_df(spreadsheet_1_file_name)
    labels_df                           = pd.read_csv(spreadsheet_2_file_name, 
                                                      index_col=0, 
                                                      sep='\t', 
                                                      names=['sample','cluster_number'],
                                                     )
    centroid_statistic                  = button.centroid_selector.value
    
    averages_df                         = stt.cluster_statistics_df(spreadsheet_df, labels_df, centroid_statistic)
    results_directory                   = button.results_directory
    averages_file_name                  = stt.get_outfile_name(
                                            results_directory, 
                                            spreadsheet_1_file_name, 
                                            transform_name, 
                                            timestamp=False,
                                            )
    averages_df.to_csv(averages_file_name, sep='\t', float_format='%g')
    
    button.fname_list                   = [averages_file_name]
    visualize_selected_file(button)


# get spreadsheet 1 control widgets
averages_flistbx_1_view_button = get_select_view_file_button_set(input_data_dir)
if os.path.isfile(os.path.join(input_data_dir, DEFAULT_INPUT_FILES['select_averages_spreadsheet'])):
    averages_flistbx_1_view_button.file_selector.value = DEFAULT_INPUT_FILES['select_averages_spreadsheet']

# get spreadsheet 2 control widgets
averages_flistbx_2_view_button    = get_select_view_file_button_set(input_data_dir)
if os.path.isfile(os.path.join(input_data_dir, DEFAULT_INPUT_FILES['select_averages_dict'])):
    averages_flistbx_2_view_button.file_selector.value = DEFAULT_INPUT_FILES['select_averages_dict']

# get the execute button
ave_file_select_dict = {    'file_1_selector': averages_flistbx_1_view_button.file_selector,
                            'file_2_selector': averages_flistbx_2_view_button.file_selector }
averages_execute_button           = get_two_files_execute_button(
                                            input_data_dir, 
                                            results_dir, 
                                            file_select_dict=ave_file_select_dict,
                                            button_name='Intersect')
averages_execute_button.on_click(get_cluster_averages)

AVAILABLE_STATISTICAL_CENTROIDS = ['std', 'median', 'mean']
centroid_selector_dropdown = widgets.Dropdown(options=AVAILABLE_STATISTICAL_CENTROIDS,
                                              value=AVAILABLE_STATISTICAL_CENTROIDS[0],
                                              description='Statistic:')
averages_execute_button.centroid_selector = centroid_selector_dropdown

# show the controls
show_select_view_button(averages_flistbx_1_view_button)
show_select_view_button(averages_flistbx_2_view_button)
show_widget_left(centroid_selector_dropdown)
show_execute_button(averages_execute_button)

In [8]:
show_cell_title('Select Phenotype')

def select_categorical(button):
    """ local function to read files and select categorical and write 
        using the Spreadsheets_Transformation - spreadsheets_transformation_toolbox
    
    Args: 
        button:         an ipywidgets.Button object with 2 ipywidgets.Dropdown (.file_selector) objects containing 
                        the selected file names in the .value fields.
    """
    if button.description == 'Clear':
        visualize_selected_file(button)
        return
    
    input_data_directory                = button.input_data_directory    
    spreadsheet_file_name               = os.path.join(input_data_directory, button.file_1_selector.value)
    phenotype_file_name                 = os.path.join(input_data_directory, button.file_2_selector.value)
    phenotype_id                        = button.phenotype_id_selector.value
    select_category                     = button.category_selectory.value

    
    transform_name                      = "categorical"
    
    spreadsheet_df                      = kn.get_spreadsheet_df(spreadsheet_file_name)
    phenotype_df                        = kn.get_spreadsheet_df(phenotype_file_name)
    
    spreadsheet_cat_df, phenotype_cat_df= stt.select_subtype_df(spreadsheet_df, 
                                                                phenotype_df, 
                                                                phenotype_id, 
                                                                select_category,
                                                                )
    
    results_directory                   = button.results_directory
    spreadsheet_cat_file_name           = stt.get_outfile_name(
                                            results_directory, 
                                            spreadsheet_file_name, 
                                            transform_name, 
                                            timestamp=False,
                                            )
    spreadsheet_cat_df.to_csv(spreadsheet_cat_file_name, sep='\t', float_format='%g')
    
    phenotype_cat_file_name             = stt.get_outfile_name(
                                            results_directory, 
                                            phenotype_file_name, 
                                            transform_name, 
                                            timestamp=False,
                                            )
    phenotype_cat_df.to_csv(phenotype_cat_file_name, sep='\t', float_format='%g')
    
    button.fname_list                   = [spreadsheet_cat_file_name, phenotype_cat_file_name]
    visualize_selected_file(button)

# get spreadsheet 1 control widgets
categorical_flistbx_1_view_button       = get_select_view_file_button_set(input_data_dir)
if os.path.isfile(os.path.join(input_data_dir, DEFAULT_INPUT_FILES['select_categorical_spreadsheet'])):
    categorical_flistbx_1_view_button.file_selector.value = DEFAULT_INPUT_FILES['select_categorical_spreadsheet']

# get spreadsheet 2 control widgets
categorical_flistbx_2_view_button       = get_select_view_file_button_set(input_data_dir)
if os.path.isfile(os.path.join(input_data_dir, DEFAULT_INPUT_FILES['select_categorical_phenotype'])):
    categorical_flistbx_2_view_button.file_selector.value = DEFAULT_INPUT_FILES['select_categorical_phenotype']

# get the execute button
cat_file_select_dict = {'file_1_selector': categorical_flistbx_1_view_button.file_selector,
                        'file_2_selector': categorical_flistbx_2_view_button.file_selector }
categorical_execute_button              = get_two_files_execute_button(
                                            input_data_dir, 
                                            results_dir, 
                                            file_select_dict=cat_file_select_dict,
                                            button_name='Select',
                                            )
categorical_execute_button.on_click(select_categorical)
        
def reset_phenotype_id_list(change):
    """ callback for phenotype file selector (categorical_flistbx_2_view_button.file_selector) change """
    new_file = os.path.join(input_data_dir, change.new)
    new_df = pd.read_csv(new_file, sep='\t', index_col=0, header=0)
    new_phenotpye_list              = new_df.columns.tolist()
    phenotype_id_listbox.options    = new_phenotpye_list
    
    phenotype_id_listbox.value      = new_phenotpye_list[0]
    
    a_list = new_df[phenotype_id_listbox.value]
    a_list = a_list.dropna()
    category_select_listbox.options = sorted(set(a_list))
    category_select_listbox.value   = category_select_listbox.options[0]
    
phenotype_id_listbox = widgets.Dropdown(options=[''],
                                        value='',
                                        description='column:',
                                       )
categorical_flistbx_2_view_button.target_dir = input_data_dir
categorical_flistbx_2_view_button.file_selector.observe(reset_phenotype_id_list, names='value')


def reset_category_list(change):
    """ callback for phenotype selection (phenotype_id_listbox) change """
    new_file = os.path.join(input_data_dir, categorical_flistbx_2_view_button.file_selector.value)
    new_df = pd.read_csv(new_file, sep='\t', index_col=0, header=0)
    
    a_list = new_df[phenotype_id_listbox.value]
    a_list = a_list.dropna()
    
    category_select_listbox.options = sorted(set(a_list))
    category_select_listbox.value   = category_select_listbox.options[0]

category_select_listbox = widgets.Dropdown(options=[''],
                                           value='',
                                           description='phenotype:')
phenotype_id_listbox.observe(reset_category_list, names='value')

# initialize phenotype category and id
categorical_flistbx_2_view_button.file_selector.current = categorical_flistbx_2_view_button.file_selector.value
categorical_flistbx_2_view_button.file_selector.value   = categorical_flistbx_2_view_button.file_selector.options[0]
categorical_flistbx_2_view_button.file_selector.value   = categorical_flistbx_2_view_button.file_selector.current
phenotype_id_listbox.value                              = phenotype_id_listbox.options[1]
phenotype_id_listbox.value                              = phenotype_id_listbox.options[0]

# attache the phenotype selectors to the execcute button
categorical_execute_button.phenotype_id_selector        = phenotype_id_listbox
categorical_execute_button.category_selectory           = category_select_listbox
categorical_execute_button.on_click(select_categorical)

# show the controls
show_select_view_button(categorical_flistbx_1_view_button)
show_select_view_button(categorical_flistbx_2_view_button)
show_widget_left(widgets.VBox([phenotype_id_listbox, category_select_listbox]))
show_execute_button(categorical_execute_button)

In [9]:
show_cell_title('Other Transformations')

def select_numerical_transform(button):
    """ callback for get_numerical_function_execute_button
    
    Args: 
        button:         an ipywidgets.Button object with 2 ipywidgets.Dropdown (.file_selector) objects containing 
                        the selected file names in the .value fields.
    """
    if button.description == 'Clear':
        visualize_selected_file(button)
        return
    
    input_data_directory            = button.input_data_directory
    input_file_name                 = button.file_selector.value
    spreadsheet_df                  = kn.get_spreadsheet_df(os.path.join(input_data_directory, input_file_name))

    numeric_function                = button.numerical_function.value
    results_directory               = button.results_directory
    transform_name                  = numeric_function
    
    if transform_name == 'abs':
        result_df                   = stt.abs_df(spreadsheet_df)
        
    elif transform_name == 'z_transform':
        z_transform_axis            = button.numerical_function.z_transform_axis.value
        z_transform_ddof            = button.numerical_function.z_transform_ddof.value
        result_df                   = stt.z_transform_df(
                                        spreadsheet_df, 
                                        axis=z_transform_axis, 
                                        ddof=z_transform_ddof,
                                        )
        
    elif transform_name == 'log_transform':
        log_transform_log_base      = button.numerical_function.log_transform_log_base.value
        if log_transform_log_base == "e":
            log_transform_log_base  = np.exp(1)
        log_transform_log_offset    = button.numerical_function.log_transform_log_offset.value
        result_df                   = stt.log_transform_df(spreadsheet_df,
                                        log_base=log_transform_log_base,
                                        log_offset=log_transform_log_offset,
                                        )
        
    elif transform_name == 'threshold':
        threshold_cut_off               = button.numerical_function.threshold_cut_off.value
        threshold_substitution_value    = button.numerical_function.threshold_substitution_value.value
        threshold_scope                 = button.numerical_function.threshold_scope.value
        result_df                       = stt.threshold_df(spreadsheet_df,
                                            cut_off=threshold_cut_off,
                                            sub_val=threshold_substitution_value,
                                            scope=threshold_scope,
                                            )

    result_file_name = stt.get_outfile_name(results_directory, input_file_name, transform_name, timestamp=False)
    result_df.to_csv(result_file_name, sep='\t',float_format='%g')
    button.fname_list = [result_file_name]
    visualize_selected_file(button)

# get the control widgets
get_other_transform_file_button         = get_select_view_file_button_set(input_data_dir)
if os.path.isfile(os.path.join(input_data_dir, DEFAULT_INPUT_FILES['numerical_spreadsheet'])):
    get_other_transform_file_button.file_selector.value = DEFAULT_INPUT_FILES['numerical_spreadsheet']
    
""" display a numeric function options listbox and if selected is:
    abs             - grey out all else in all cases
    z_transform     - show radio button row or columns, and a int text box for ddof
    log_transform   - show log_transform float base text box, and a float offset box
    threshold       - show a float threshold cut-off, substitution box, and threshold bound [SUB_BELOW, SUB_ABOVE]
"""
def reset_aux_controls(change):
    """ callback - sub controls setup for numerical_transform method options """
    if change.old == 'z_transform':
        change['owner'].z_transform_axis.disabled=True
        change['owner'].z_transform_ddof.disabled=True
        
    elif change.old == 'log_transform':
        change['owner'].log_transform_log_base.disabled=True
        change['owner'].log_transform_log_offset.disabled=True
        
    elif change.old == 'threshold':
        change['owner'].threshold_cut_off.disabled=True
        change['owner'].threshold_substitution_value.disabled=True
        change['owner'].threshold_scope.disabled=True
    
    if change.new == 'abs':
        pass
    
    elif change.new == 'z_transform':
        change['owner'].z_transform_axis.disabled=False
        change['owner'].z_transform_ddof.disabled=False
        
    elif change.new == 'log_transform':
        change['owner'].log_transform_log_base.disabled=False
        change['owner'].log_transform_log_offset.disabled=False

    elif change.new == 'threshold':
        change['owner'].threshold_cut_off.disabled=False
        change['owner'].threshold_substitution_value.disabled=False
        change['owner'].threshold_scope.disabled=False

numerical_function_options                          = ['abs', 'z_transform', 'log_transform', 'threshold']
numerical_function_dropdown                         = widgets.Dropdown(
                                                        options=numerical_function_options,
                                                        value=numerical_function_options[0],
                                                        description='stats function')

threshold_scope                                          = ['SUB_BELOW', 'SUB_ABOVE']
log_trans_base                                           = np.exp(1)
default_threshold                                        = 0.5
numerical_function_dropdown.z_transform_axis             = widgets.Dropdown(
                                                            options={'rows': 0, 'columns': 1},
                                                            value=0, description='axis', 
                                                            disabled=True)
numerical_function_dropdown.z_transform_ddof             = widgets.IntText(
                                                            value=0,
                                                            description='ddof',
                                                            disabled=True)
numerical_function_dropdown.log_transform_log_base       = widgets.FloatText(
                                                            value=log_trans_base,
                                                            description='log_base', 
                                                            disabled=True)
numerical_function_dropdown.log_transform_log_offset     = widgets.FloatText(
                                                            value=0,
                                                            description='offset', 
                                                            disabled=True)
numerical_function_dropdown.threshold_cut_off            = widgets.FloatText(
                                                            value=default_threshold,
                                                            description='cut-off', 
                                                            disabled=True)
numerical_function_dropdown.threshold_substitution_value = widgets.FloatText(
                                                            value=0,
                                                            description='substitute', 
                                                            disabled=True)
numerical_function_dropdown.threshold_scope              = widgets.Dropdown(
                                                            options=threshold_scope,
                                                            value=threshold_scope[0],
                                                            description='sub', disabled=True)
numerical_function_dropdown.observe(reset_aux_controls, names='value')

get_numerical_function_execute_button               = get_single_file_execute_button(
                                                        input_data_dir, 
                                                        results_dir, 
                                                        file_selector=get_other_transform_file_button.file_selector, 
                                                        button_name='Calculate',
                                                        )
get_numerical_function_execute_button.numerical_function = numerical_function_dropdown
get_numerical_function_execute_button.on_click(select_numerical_transform)

# display control widgets
show_select_view_button(get_other_transform_file_button)
show_widget_left(numerical_function_dropdown)
show_widget_left(widgets.VBox([
                        numerical_function_dropdown.z_transform_axis,
                        numerical_function_dropdown.z_transform_ddof,
                        numerical_function_dropdown.log_transform_log_base,
                        numerical_function_dropdown.log_transform_log_offset,
                        numerical_function_dropdown.threshold_cut_off,
                        numerical_function_dropdown.threshold_substitution_value,
                        numerical_function_dropdown.threshold_scope,
                        ]))
show_execute_button(get_numerical_function_execute_button)

In [10]:
show_cell_title('Descriptive Statistics')

def get_stats_value(button):
    """ callback for get_stats_execute_button
    
    Args: 
        button:         an ipywidgets.Button object with 2 ipywidgets.Dropdown (.file_selector) objects containing 
                        the selected file names in the .value fields.
    """
    if button.description == 'Clear':
        visualize_selected_file(button)
        return

    input_data_directory                        = button.input_data_directory
    input_file_name                             = button.file_selector.value
    spreadsheet_df                              = kn.get_spreadsheet_df(
                                                    os.path.join(input_data_directory, input_file_name)
                                                    )
    stats_function                              = button.stats_function.value
    direction_reference                         = button.direction_reference.value
    
    result_df                                   = stt.stats_df(
                                                    spreadsheet_df, 
                                                    stats_function, 
                                                    direction_reference,
                                                    )
    results_directory                           = button.results_directory
    stats_function                              = button.stats_function.value
    direction_reference                         = button.direction_reference.value
    transform_name                              = stats_function + '_' + direction_reference
    
    result_file_name                            = stt.get_outfile_name(
                                                    results_directory, 
                                                    input_file_name, 
                                                    transform_name, 
                                                    timestamp=False,
                                                    )
    result_df.to_csv(result_file_name, sep='\t',float_format='%g')
    
    button.fname_list                           = [result_file_name]
    visualize_selected_file(button)

# get the control widgets
get_stats_function_file_button                  = get_select_view_file_button_set(input_data_dir)

if os.path.isfile(os.path.join(input_data_dir, DEFAULT_INPUT_FILES['stats_spreadsheet'])):
    get_stats_function_file_button.file_selector.value \
                                                = DEFAULT_INPUT_FILES['stats_spreadsheet']

stats_function_options                          = {
                                                    'min': 'min',
                                                    'max': 'max', 
                                                    'mean': 'mean', 
                                                    'median': 'median', 
                                                    'variance': 'variation', 
                                                    'std_deviation': 'std_deviation', 
                                                    'sum': 'sum',
                                                    }

stats_function_dropdown                         = widgets.Dropdown(
                                                    options=stats_function_options,
                                                    value=stats_function_options['min'],
                                                    description='function',
                                                    )

direction_reference_options                     = ['columns', 'rows', 'all']
direction_reference_dropdown                    = widgets.Dropdown(
                                                    options=direction_reference_options,
                                                    value=direction_reference_options[0],
                                                    description='direction',
                                                    )

get_stats_execute_button                        = get_single_file_execute_button(
                                                    input_data_dir, 
                                                    results_dir, 
                                                    file_selector=get_stats_function_file_button.file_selector, 
                                                    button_name='Calculate',
                                                    )
get_stats_execute_button.stats_function         = stats_function_dropdown
get_stats_execute_button.direction_reference    = direction_reference_dropdown
get_stats_execute_button.on_click(get_stats_value)

# display control widgets
show_select_view_button(get_stats_function_file_button)
show_widget_left(widgets.VBox([stats_function_dropdown, direction_reference_dropdown]))
show_execute_button(get_stats_execute_button)