In [1]:
%%html
<style>
div.input {
    display:none;
}
div.output_stderr{
    display:none
}
</style>

In [2]:
#                                        DEVELOPMENT library list:
import warnings
warnings.filterwarnings('ignore')

import os
import sys

import pandas as pd
from pandas.io.common import EmptyDataError
import numpy as np

from IPython.display import display, HTML, clear_output
import ipywidgets as widgets
import traitlets

from knpackage import toolbox as kn

#                                             Define the required directory names structure
USER_BASE_DIRECTORY = os.getcwd()
USER_DATA_DIRECTORY = os.path.join(USER_BASE_DIRECTORY, 'user_data')
USER_RESULTS_DIRECTORY = os.path.join(USER_BASE_DIRECTORY, 'results')
USER_DATAFIE_EXTENSIONS_LIST = ['.tsv', '.txt', '.df', '.gz']

USER_BASE_DIRECTORIES_LIST = []
list_dir_list = os.listdir(USER_BASE_DIRECTORY)
for maybe_dir in list_dir_list:
    maybe_dir_full = os.path.join(USER_BASE_DIRECTORY, maybe_dir)
    if os.path.isdir(maybe_dir_full):
        USER_BASE_DIRECTORIES_LIST.append(maybe_dir_full)

#                                             Create user_data and results directories if they DNE
if len(USER_BASE_DIRECTORIES_LIST) < 1 or not USER_DATA_DIRECTORY in USER_BASE_DIRECTORIES_LIST:
    os.mkdir(USER_DATA_DIRECTORY)
    
if len(USER_BASE_DIRECTORIES_LIST) < 1 or not USER_RESULTS_DIRECTORY in USER_BASE_DIRECTORIES_LIST:
    os.mkdir(USER_RESULTS_DIRECTORY)

<a id='top'></a>

# Data File Transformations
1. [Transpose](#transpose)
2. [Common Samples](#common_samples)
3. [Merge](#merge)
4. [Select Genes](#select_genes)
5. [Cluster Averages](#cluster_averages)
6. [Select Categorical](#select_categorical)

In [3]:
def user_data_list(target_dir, FEXT):
    """ user_file_list = update_user_data_list(user_data_dir, FEXT) 
    """
    flist = os.listdir(target_dir)
    my_file_list = []
    for f in flist:
        if os.path.isfile(os.path.join(target_dir, f)):
            noNeed, f_ext = os.path.splitext(f)
            if f_ext in FEXT:
                my_file_list.append(f)
    if len(my_file_list) <= 0:
        my_file_list.append('No Data')
        
    return my_file_list

In [4]:
#                                             define Alias
results_dir = USER_RESULTS_DIRECTORY
output_dir = results_dir
target_dir = USER_DATA_DIRECTORY

#                           anonymous, linkable dropdown listbox:
def get_dropdown_files_listbox():
    """ user_data dropdown listbox - for use as master listbox | repeated display listbox 
    
    Usage - set new listbox linkage:
        flistbx_dlink = traitlets.dlink((files_dropdown_main, 'options'), (flistbx, 'options'))
    Usage - for display in multiple cells:
        files_dropdown_main = get_dropdown_files_listbox()
    Usage - display:
        display(files_dropdown_main)
    """
    files_dropdown_stock_box = widgets.Dropdown(
        options=user_data_list(target_dir, USER_DATAFIE_EXTENSIONS_LIST),
        value=user_data_list(target_dir, USER_DATAFIE_EXTENSIONS_LIST)[0],
        layout=widgets.Layout(width='80%', height='80px'),
        description='user_data:'
    )
    return files_dropdown_stock_box
files_dropdown_main = get_dropdown_files_listbox()

#                           repeatable files list update button:
def update_user_data(button):
    """ update files list in master (invisible) files dropdown list -- which is linked to all 
    Usage - display:
        display(update_user_data_button)
    Usage - update all main listbox children:
        push the button or -- update_user_data(update_user_data_button)
    """
    files_dropdown_main.options = user_data_list(target_dir, USER_DATAFIE_EXTENSIONS_LIST)
    files_dropdown_main.value=user_data_list(target_dir, USER_DATAFIE_EXTENSIONS_LIST)[0]

update_user_data_button = widgets.Button(description='Udate user_data', user_data_list='No Data')
update_user_data_button.on_click(update_user_data)

In [5]:
#                                         local function to open and Visualize -- callback for multipule buttons:
def visualize_selected_file(button):
    """ callback for buttons with 'fname_list' OR a dropdown listbox with 'file_selector' - display file upper left.
    Args:
        button:         an ipywidgets.Button object with an ipywidgets.Dropdown (.file_selector) object containing 
                        the selected file name as its .value field.
    """
    clear_output()
    try:
        if hasattr(button, 'fname_list'): 
            full_fname_list = button.fname_list
        else: 
            full_fname = os.path.join(target_dir, button.file_selector.value)
            full_fname_list = [full_fname]
            
        for full_fname in full_fname_list: 
            df = pd.read_csv(full_fname,sep='\t',header=0,index_col=0)
            Step = df.iloc[0:10,0:10];
            Step2 = HTML(Step.to_html())
            display(Step2)
            del Step2
        
    except OSError:
        print("No input data! ")
        
    except EmptyDataError:
        print("Empty input data! ")
        
    except:
        print("Invalid input data! ")
        
def clear(change):
    """ clear cell output callback for listbox selection change """
    clear_output()

<a id='transpose'></a>

<a id='transpose'></a>

## Transpose [[back to top]](#top) 
* **Select a spreadsheet file.**
* **"Transpose" will transpose, display and write a new spreadsheet.**

In [6]:
def transpose_selected_file(button):
    """ local function to read, transpose and write the new spreadsheet
    
    Args: 
        button:         an ipywidgets.Button object with an ipywidgets.Dropdown (.file_selector) object containing 
                        the selected file name as its .value field.
    """
    message = 'transpose incomplete'
    try:
        file_name = os.path.join(target_dir, button.file_selector.value)
        spreadsheet_df = pd.read_csv(file_name, sep='\t', index_col=0, header=0)
        spreadsheet_df = spreadsheet_df.transpose()
        
        name_base, file_extension = os.path.splitext(button.file_selector.value)
        outfile_name = os.path.join(output_dir, name_base + '_T.tsv')
        
        spreadsheet_df.to_csv(outfile_name, sep='\t')
        
        #                   reinitialize the files list in all dropdown listboxes dlink with main
        update_user_data(update_user_data_button)
        
        button.fname_list = [outfile_name]
        visualize_selected_file(button)
        
        outfile_path, outfile_name = os.path.split(outfile_name)
        message = 'Output written to\n%s'%(outfile_name)

    except OSError:
        message = "No input data! "
    except EmptyDataError:
        message = "Empty input data! "
    except:
        message = "Invalid input data! "

    print(message)

#                                         Create and display the widget controls:

#                           spreadsheet dropdown listbox:
transpose_flistbx = get_dropdown_files_listbox()
transpose_flistbx_dlink = traitlets.dlink((files_dropdown_main, 'options'), (transpose_flistbx, 'options'))
transpose_flistbx.observe(clear, names='value')

#                           spreadsheet view button:
visualize_file_button = widgets.Button(
    description='View',
    disabled=False,
    button_style='',
    tooltip='visualize selected file',
)
visualize_file_button.file_selector = transpose_flistbx
visualize_file_button.on_click(visualize_selected_file)

#                           display the controls:
display(update_user_data_button)
display(transpose_flistbx)
display(visualize_file_button)

In [7]:
#                           execute transpose - write button:
transpose_execute_button = widgets.Button(
    description='Transpose',
    disabled=False,
    button_style='',
    tooltip='transpose selected file',
    )
transpose_execute_button.file_selector = transpose_flistbx
transpose_execute_button.on_click(transpose_selected_file)
display(transpose_execute_button)

<a id='common_samples'></a>

## Common Samples [[back to top]](#top)
* **Select two samples x phenotypes files.**
* **"Get Common Samples" button to display and write two files with only the samples in common.**

In [8]:
def get_common_samples(button):
    """ local function to read 2 spreadsheet files, find and write the common samples to 2 new spreadsheets
    
    Args: 
        button:         an ipywidgets.Button object with 2 ipywidgets.Dropdown (.file_selector) objects containing 
                        the selected file names in the .value fields.

    """
    message = 'common samples incomplete'
    try:
        file_name_1 = os.path.join(target_dir, button.file_1_selector.value)
        file_name_2 = os.path.join(target_dir, button.file_2_selector.value)
        
        sxp_1_df = pd.read_csv(file_name_1, sep='\t', index_col=0, header=0)
        sxp_2_df = pd.read_csv(file_name_2, sep='\t', index_col=0, header=0)
        
        sxp_1_gene_names = kn.extract_spreadsheet_gene_names(sxp_1_df)
        sxp_2_gene_names = kn.extract_spreadsheet_gene_names(sxp_2_df)
        common_samples_list = kn.find_common_node_names(sxp_1_gene_names, sxp_2_gene_names)
        sxp_1_trim_df, sxp_2_trim_df = sxp_1_df.loc[common_samples_list], sxp_2_df.loc[common_samples_list]
        
        name_base_1, file_extension_1 = os.path.splitext(file_name_1)
        outfile_name_1 = os.path.join(output_dir, name_base_1 + '_Com.tsv')
        
        name_base_2, file_extension_2 = os.path.splitext(file_name_2)
        outfile_name_2 = os.path.join(output_dir, name_base_2 + '_Com.tsv')
        
        sxp_1_trim_df.to_csv(outfile_name_1, sep='\t', index=True, header=True)
        sxp_2_trim_df.to_csv(outfile_name_2, sep='\t', index=True, header=True)
        
        button.fname_list = [outfile_name_1, outfile_name_2]
        visualize_selected_file(button)
        
        pth_1, outfile_name_1 = os.path.split(outfile_name_1)
        pth_2, outfile_name_2 = os.path.split(outfile_name_2)
        message = 'common samples outputs written:\n%s\n%s'%(outfile_name_1, outfile_name_2)
        
    except OSError:
        message = "No input data! "
    except EmptyDataError:
        message = "Empty input data! "
    except:
        message = "Invalid input data! "

    print(message)

#                                         Create and display the widget controls:

#                           first dropdown listbox and visualize button:
common_samples_flistbx_1 = get_dropdown_files_listbox()
common_samples_flistbx_1_dlink = traitlets.dlink((files_dropdown_main, 'options'), 
                                                 (common_samples_flistbx_1, 'options'))
common_samples_flistbx_1.observe(clear, names='value')

common_samples_flistbx_1_view_button = widgets.Button(
    description='View',
    disabled=False,
    button_style='',
    tooltip='visualize selected file',
    file_selector=common_samples_flistbx_1
    )
common_samples_flistbx_1_view_button.on_click(visualize_selected_file)

display(update_user_data_button)
display(common_samples_flistbx_1)
display(common_samples_flistbx_1_view_button)

In [9]:
#                           second dropdown listbox and visualize button:
common_samples_flistbx_2 = get_dropdown_files_listbox()
common_samples_flistbx_2_dlink = traitlets.dlink((files_dropdown_main, 'options'), 
                                                 (common_samples_flistbx_2, 'options'))
common_samples_flistbx_2.observe(clear, names='value')
common_samples_flistbx_2_view_button = widgets.Button(
    description='View',
    disabled=False,
    button_style='',
    tooltip='visualize selected file',
    file_selector=common_samples_flistbx_2
    )
common_samples_flistbx_2_view_button.on_click(visualize_selected_file)

display(common_samples_flistbx_2)
display(common_samples_flistbx_2_view_button)

In [10]:
#                           common samples execute button:
common_samples_execute_button = widgets.Button(
    description='Get Common Samples',
    disabled=False,
    button_style='',
    tooltip='get common samples button',
    file_1_selector=common_samples_flistbx_1,
    file_2_selector=common_samples_flistbx_2,
    )
common_samples_execute_button.on_click(get_common_samples)
display(common_samples_execute_button)

<a id='merge'></a>

## Merge [[back to top]](#top) 
* **Select two samples x phenotypes spreadsheet files.**
* **"Merge" will display and write the union of samples and phenotypes spreadsheet.**

In [11]:
def merge_spreadsheets(button):
    """ Union: local function to read 2 spreadsheet files, find and write the common samples to 1 new spreadsheets
    
    Args: 
        button:         an ipywidgets.Button object with 2 ipywidgets.Dropdown (.file_selector) objects containing 
                        the selected file names in the .value fields.
    """
    message = 'merge incomplete'
    try:
        file_name_1 = os.path.join(target_dir, button.file_1_selector.value)
        file_name_2 = os.path.join(target_dir, button.file_2_selector.value)
        
        spreadsheet_1_df = pd.read_csv(file_name_1, sep='\t', index_col=0, header=0)
        spreadsheet_2_df = pd.read_csv(file_name_2, sep='\t', index_col=0, header=0)
        
        
        spreadsheet_1_samples = kn.extract_spreadsheet_gene_names(spreadsheet_1_df)
        spreadsheet_2_samples = kn.extract_spreadsheet_gene_names(spreadsheet_2_df)

        spreadsheet_1_phenotypes = list(spreadsheet_1_df.columns)
        spreadsheet_2_phenotypes = list(spreadsheet_2_df.columns)

        spreadsheet_X_df = pd.concat([spreadsheet_1_df, spreadsheet_2_df], axis=1)
        
        pth_1, name_base_1 = os.path.split(file_name_1)
        name_base_1, f_ext_1 = os.path.splitext(name_base_1)
        
        pth_2, name_base_2 = os.path.split(file_name_2)
        name_base_2, f_ext_2 = os.path.splitext(name_base_2)
        
        outfile_name = os.path.join(output_dir, name_base_1 + '_' + name_base_2 + '_Merged.tsv')
        
        spreadsheet_X_df.to_csv(outfile_name, sep='\t', index=True, header=True)
        
        button.fname_list = [outfile_name]
        visualize_selected_file(button)
        
        pth_1, outfile_name = os.path.split(outfile_name)
        message = 'merge spreadsheets output written:\n%s'%(outfile_name)

    except OSError:
        message = "No input data! "
    except EmptyDataError:
        message = "Empty input data! "
    except:
        message = "Invalid input data! "

    print(message)

#                           first dropdown listbox and visualize button:
merge_flistbx_1 = get_dropdown_files_listbox()
merge_flistbx_1_dlink = traitlets.dlink((files_dropdown_main, 'options'), (merge_flistbx_1, 'options'))
merge_flistbx_1.observe(clear, names='value')

merge_flistbx_1_view_button = widgets.Button(
    description='View',
    disabled=False,
    button_style='',
    tooltip='visualize selected file',
    file_selector=merge_flistbx_1
    )
merge_flistbx_1_view_button.on_click(visualize_selected_file)

display(update_user_data_button)
display(merge_flistbx_1)
display(merge_flistbx_1_view_button)

In [12]:
#                           second dropdown listbox and visualize button:
merge_flistbx_2 = get_dropdown_files_listbox()
merge_flistbx_2_dlink = traitlets.dlink((files_dropdown_main, 'options'), (merge_flistbx_2, 'options'))
merge_flistbx_2.observe(clear, names='value')

merge_flistbx_2_view_button = widgets.Button(
    description='View',
    disabled=False,
    button_style='',
    tooltip='visualize selected file',
    file_selector=merge_flistbx_2
    )
merge_flistbx_1_view_button.on_click(visualize_selected_file)

display(merge_flistbx_2)
display(merge_flistbx_1_view_button)

In [13]:
#                           merge files execute button:
merge_execute_button = widgets.Button(
    description='Merge Spreadsheets',
    disabled=False,
    button_style='',
    tooltip='merge spreadsheets',
    file_1_selector=merge_flistbx_1,
    file_2_selector=merge_flistbx_2,
    )
merge_execute_button.on_click(merge_spreadsheets)
display(merge_execute_button)

<a id='select_genes'></a>

## Select Genes [[back to top]](#top)
* **Select genes x samples spreadsheet file.**
* **Select a genes list file.**
* **"Select Genes" displays and writes the spreadsheet with only those genes selected.**

In [14]:
def read_a_list_file(input_file_name):
    """ local function to read a list from a text file
    
    Args:
        input_file_name:    full path name of a file containing a list
    
    Returns:
        string_input:       the list that was contained in the file
    """
    with open(input_file_name, 'r') as fh:
        str_input = fh.read()
    return list(str_input.split())

def select_genes(button):
    """ local function to read files and get common samples and write 
    
    Args: 
        button:         an ipywidgets.Button object with 2 ipywidgets.Dropdown (.file_selector) objects containing 
                        the selected file names in the .value fields.
    """
    message = 'select genes incomplete'
    try:
        file_name_1 = os.path.join(target_dir, button.file_1_selector.value)
        file_name_2 = os.path.join(target_dir, button.file_1_selector.value)

        gene_select_list = read_a_list_file(file_name_2)
        spreadsheet_df = pd.read_csv(file_name_1, sep='\t', index_col=0, header=0)

        gene_names = kn.extract_spreadsheet_gene_names(spreadsheet_df)
        intersection_names = sorted(kn.find_common_node_names(gene_names, gene_select_list))
        spreadsheet_intersected_df = spreadsheet_df.loc[intersection_names] 

        name_base_1, file_extension_1 = os.path.splitext(file_name_1)
        outfile_name = os.path.join(output_dir, name_base_1 + '_Slt_Gn.tsv')
        
        spreadsheet_intersected_df.to_csv(outfile_name, sep='\t', index=True, header=True)

        button.fname_list = [outfile_name]
        visualize_selected_file(button)
        
        pth_1, outfile_name = os.path.split(outfile_name)
        message = 'select genes output written:\n%s'%(outfile_name)

    except OSError:
        message = "No input data! "
    except EmptyDataError:
        message = "Empty input data! "
    except:
        message = "Invalid input data! "

    print(message)

#                           spreadsheet select dropdown listbox and visualize button:
select_spreadsheet_flistbx = get_dropdown_files_listbox()
select_spreadsheet_flistbx_dlink = traitlets.dlink((files_dropdown_main, 'options'), 
                                                   (select_spreadsheet_flistbx, 'options'))
select_spreadsheet_flistbx.observe(clear, names='value')
select_spreadsheet_flistbx.description='Spreadsheet:'

select_spreadsheet_flistbx_1_view_button = widgets.Button(
    description='View',
    disabled=False,
    button_style='',
    tooltip='visualize selected file',
    file_selector=select_spreadsheet_flistbx
    )
select_spreadsheet_flistbx_1_view_button.on_click(visualize_selected_file)

display(update_user_data_button)
display(select_spreadsheet_flistbx)
display(select_spreadsheet_flistbx_1_view_button)

In [15]:
#                           "list of genes" file selector dropdown listbox and visualize button:
select_listfile_flistbx = get_dropdown_files_listbox()
select_listfile_flistbx_dlink = traitlets.dlink((files_dropdown_main, 'options'), 
                                                  (select_listfile_flistbx, 'options'))
select_listfile_flistbx.description='list-file:'
select_listfile_flistbx.observe(clear, names='value')

select_listfile_flistbx_view_button = widgets.Button(
    description='View',
    disabled=False,
    button_style='',
    tooltip='visualize selected file',
    file_selector=select_listfile_flistbx
    )
select_listfile_flistbx_view_button.on_click(visualize_selected_file)

display(select_listfile_flistbx)
display(select_listfile_flistbx_view_button)

In [16]:
#                           select genes execute button:
select_genes_execute_button = widgets.Button(
    description='select genes',
    disabled=False,
    button_style='',
    tooltip='select genes from list',
    file_1_selector=select_spreadsheet_flistbx,
    file_2_selector=select_listfile_flistbx,
    )
select_genes_execute_button.on_click(select_genes)
display(select_genes_execute_button)

<a id='cluster_averages'></a>

##  Cluster Averages [[back to top]](#top)
* **Select genes x samples spreadsheet file.**
* **Select samples classification dictionary file (eg labels).**
* **"Get Cluster Averages" displays and writes genes average for each cluster.**

In [17]:
def get_cluster_averages(button):
    """ local function to get the averages for the dictionay selected samples in a spreadsheet  
    
    Args: 
        button:         an ipywidgets.Button object with 2 ipywidgets.Dropdown (.file_selector) objects containing 
                        the selected file names in the .value fields.
    """
    message = 'select genes incomplete'
    try:  
        file_name_1 = os.path.join(target_dir, button.file_1_selector.value)
        file_name_2 = os.path.join(target_dir, button.file_2_selector.value)
        
        spreadsheet_df = pd.read_csv(file_name_1, sep='\t', index_col=0, header=0)
        labels_df = pd.read_csv(file_name_2, sep='\t', index_col=0, names=['sample', 'cluster_number'])
        
        labels_dict = labels_df.to_dict()['cluster_number']
        cluster_numbers = list(np.unique(list(labels_dict.values())))
        labels = list(labels_dict.values())
        
        cluster_ave_df = pd.DataFrame({i: spreadsheet_df.iloc[:, labels == i].mean(axis=1) for i in cluster_numbers})

        pth_1, name_base_1 = os.path.split(file_name_1)
        name_base_1, f_ext_1 = os.path.splitext(name_base_1)
        
        outfile_name = os.path.join(output_dir, name_base_1 + '_Clst_Avg.tsv')
        cluster_ave_df.to_csv(outfile_name, sep='\t', index=True, header=True)

        button.fname_list = [outfile_name]
        visualize_selected_file(button)

        pth_1, outfile_name = os.path.split(outfile_name)
        message = 'select genes outputs written:\n%s'%(outfile_name)

    except OSError:
        message = "No input data! "
    except EmptyDataError:
        message = "Empty input data! "
    except:
        message = "Invalid input data! "

    print(message)

#                           spreadsheet select dropdown listbox and visualize button:
cluster_averages_spreadsheet_flistbx = get_dropdown_files_listbox()
cluster_averages_spreadsheet_flistbx_dlink = traitlets.dlink((files_dropdown_main, 'options'), 
                                                   (cluster_averages_spreadsheet_flistbx, 'options'))
cluster_averages_spreadsheet_flistbx.observe(clear, names='value')
cluster_averages_spreadsheet_flistbx.description='Spreadsheet:'

cluster_averages_spreadsheet_flistbx_view_button = widgets.Button(
    description='View',
    disabled=False,
    button_style='',
    tooltip='visualize selected file',
    file_selector=cluster_averages_spreadsheet_flistbx
    )
cluster_averages_spreadsheet_flistbx_view_button.on_click(visualize_selected_file)

display(update_user_data_button)
display(cluster_averages_spreadsheet_flistbx)
display(cluster_averages_spreadsheet_flistbx_view_button)

In [18]:
#                           list of genes file select dropdown listbox and visualize button:
select_clusters_flistbx = get_dropdown_files_listbox()
select_clusters_flistbx_dlink = traitlets.dlink((files_dropdown_main, 'options'), 
                                                  (select_clusters_flistbx, 'options'))
select_clusters_flistbx.description='dictionary file:'
select_clusters_flistbx.observe(clear, names='value')

select_clusters_flistbx_view_button = widgets.Button(
    description='View',
    disabled=False,
    button_style='',
    tooltip='visualize selected file',
    file_selector=select_clusters_flistbx
    )
select_clusters_flistbx_view_button.on_click(visualize_selected_file)

display(select_clusters_flistbx)
display(select_clusters_flistbx_view_button)

In [19]:
#                           get_cluster_averages files execute button:
cluster_averages_execute_button = widgets.Button(
    description='Calculate Averages',
    disabled=False,
    button_style='',
    tooltip='get cluster averages',
    file_1_selector=cluster_averages_spreadsheet_flistbx,
    file_2_selector=select_clusters_flistbx,
    )
cluster_averages_execute_button.on_click(get_cluster_averages)
display(cluster_averages_execute_button)

<a id='select_categorical'></a>

# ^^ Fixed to Here !  ||

## Select Categorical [[back to top]](#top)
* **Select genes x samples spreadsheet.**
* **Select samples x phenotypes spreadsheet.**
* **Select the phenotype id and category from the next 2 dropdown listboxes.**
* **"Select Categorical" displays and writes 2 spreadsheets with only the samples corresponding the category in that phenotype.**

In [20]:
#                                         local function to read files and select categorical and write:
def select_categorical(button):
    """ local function to read files and select categorical and write 
    
    Args: 
        button:         an ipywidgets.Button object with 2 ipywidgets.Dropdown (.file_selector) objects containing 
                        the selected file names in the .value fields.
    """
    message = 'select categorical incomplete'
    try:
        select_category = button.category_selectory.value
        phenotype_id = button.phenotype_id_selector.value

        file_name_1 = os.path.join(target_dir, button.spreadsheet_file_selector.value)
        file_name_2 = os.path.join(target_dir, button.phenotype_file_selector.value)

        spreadsheet_df = pd.read_csv(file_name_1, sep='\t', index_col=0, header=0)
        phenotype_df = pd.read_csv(file_name_2, sep='\t', index_col=0, header=0)

        samples_list = phenotype_df.index[phenotype_df[phenotype_id] == select_category]
        spreadsheet_samples_list = list(spreadsheet_df.columns)
        samples_list = sorted(list(set(samples_list) & set(spreadsheet_samples_list)))

        if len(samples_list) < 1:
            print('no samples in intersection')
            return
        
        phenotype_category_df = phenotype_df.loc[samples_list]
        spreadsheet_category_df = spreadsheet_df[samples_list]
 
        name_base_1, file_extension_1 = os.path.splitext(file_name_1)
        outfile_name_1 = os.path.join(output_dir, name_base_1 + '_Slct_Ctg.tsv')
        
        name_base_2, file_extension_2 = os.path.splitext(file_name_2)
        outfile_name_2 = os.path.join(output_dir, name_base_2 + '_Slct_Ctg.tsv')
 
        spreadsheet_category_df.to_csv(outfile_name_1, sep='\t', index=True, header=True)
        phenotype_category_df.to_csv(outfile_name_2, sep='\t', index=True, header=True)

        button.fname_list = [outfile_name_1, outfile_name_2]
        visualize_selected_file(button)
        
        pth_1, outfile_name_1 = os.path.split(outfile_name_1)
        pth_2, outfile_name_2 = os.path.split(outfile_name_2)
        message = 'select categorical outputs written:\n%s\n%s'%(outfile_name_1, outfile_name_2)

    except OSError:
        message = "No input data! "
    except EmptyDataError:
        message = "Empty input data! "
    except:
        message = "Invalid input data! "

    print(message)

#                           spreadsheet select dropdown listbox and visualize button:
select_categorical_spreadsheet_flistbx = get_dropdown_files_listbox()
select_categorical_spreadsheet_flistbx_dlink = traitlets.dlink((files_dropdown_main, 'options'), 
                                                   (select_categorical_spreadsheet_flistbx, 'options'))
select_categorical_spreadsheet_flistbx.observe(clear, names='value')
select_categorical_spreadsheet_flistbx.description='Spreadsheet:'

select_categorical_spreadsheet_flistbx_view_button = widgets.Button(
    description='View',
    disabled=False,
    button_style='',
    tooltip='file to visualize',
    file_selector=select_categorical_spreadsheet_flistbx
    )
select_categorical_spreadsheet_flistbx_view_button.on_click(visualize_selected_file)

display(update_user_data_button)
display(select_categorical_spreadsheet_flistbx)
display(select_categorical_spreadsheet_flistbx_view_button)

In [21]:
#                           phenotype file select dropdown listbox and visualize button:
select_phenotype_flistbx = get_dropdown_files_listbox()
select_phenotype_flistbx_dlink = traitlets.dlink((files_dropdown_main, 'options'), 
                                                  (select_phenotype_flistbx, 'options'))
select_phenotype_flistbx.description='phenotype:'
select_phenotype_flistbx.observe(clear, names='value')

select_phenotype_flistbx_view_button = widgets.Button(
    description='View',
    disabled=False,
    button_style='',
    tooltip='file to visualize',
    file_selector=select_phenotype_flistbx
    )
select_phenotype_flistbx_view_button.on_click(visualize_selected_file)

display(select_phenotype_flistbx)
display(select_phenotype_flistbx_view_button)

In [22]:
# Create two special dropdown listboxes S.T. 
#       - the first shows the list of phenotype ids in the phenotype file
#       - the second shows the unique values in the selected phenotype id
def reset_phenotype_id_list(change):
    """ update the phenotype id options and select the first one in phenotype_id_listbox, and...
        update the category_select_listbox options and select the first
    """
    new_file = os.path.join(target_dir, change.new)
    new_df = pd.read_csv(new_file, sep='\t', index_col=0, header=0)
    
    new_phenotpye_list = new_df.columns.tolist()
    phenotype_id_listbox.options = new_phenotpye_list
    
    phenotype_id_listbox.value = new_phenotpye_list[0]
    
    a_list = new_df[phenotype_id_listbox.value]
    a_list = a_list.dropna()
    category_select_listbox.options = sorted(set(a_list))
    category_select_listbox.value = category_select_listbox.options[0]

phenotype_id_listbox = widgets.Dropdown(
    options=[''],
    value='',
    layout=widgets.Layout(width='80%', height='80px'),
    description='pheontype id:',
    target_dir=target_dir
)
#       - phenotype file selection -- change this listbox
select_phenotype_flistbx.observe(reset_phenotype_id_list, names='value')

def reset_category_list(change):
    """ update the category_select_listbox options and select the first option
    """
    new_file = os.path.join(target_dir, select_phenotype_flistbx.value)
    new_df = pd.read_csv(new_file, sep='\t', index_col=0, header=0)
    
    a_list = new_df[phenotype_id_listbox.value]
    a_list = a_list.dropna()
    
    category_select_listbox.options = sorted(set(a_list))
    category_select_listbox.value = category_select_listbox.options[0]

category_select_listbox = widgets.Dropdown(
    options=[''],
    value='',
    layout=widgets.Layout(width='80%', height='80px'),
    description='category:'
)
#       - phenotype id change change this listbox
phenotype_id_listbox.observe(reset_category_list, names='value')

display(phenotype_id_listbox)
display(category_select_listbox)

In [23]:
#                           get_cluster_averages files execute button:
select_categorical_execute_button = widgets.Button(
    description='Select Categorical',
    disabled=False,
    button_style='',
    tooltip='select category in both',
    spreadsheet_file_selector=select_categorical_spreadsheet_flistbx,
    phenotype_file_selector=select_phenotype_flistbx,
    phenotype_id_selector=phenotype_id_listbox,
    category_selectory=category_select_listbox,
    )
select_categorical_execute_button.on_click(select_categorical)
display(select_categorical_execute_button)