In [None]:
# Import all dependencies

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pingouin as pg
import seaborn as sns
import os
import itertools
import statistics as stats
import math
import ipywidgets as widgets
from ipywidgets import HBox, VBox, Layout

from IPython.display import display

###################################################################
# Overview:

    # 1 Compute statistics
    # 2 Annotate stats within the plots
    # 3 Create widget elements (except dependent ones like checkboxes to annotate the stats)
    # 4 Functions that are triggered by clicking the widget buttons
    # 5 Specify widget layout and launch it
    # 6 Process statistical results for download

###################################################################


###################################################################
# 1 Functions to compute the different statistics
# 1.1 Comparison of independent samples

def independent_samples():        
    global data_col, group_col, d_main, l_groups, performed_test
    data_col = df.columns[0]
    group_col = df.columns[1]

    d_main = {}
    l_groups = list(df[group_col].unique())
    for group_id in l_groups:
        d_main[group_id] = {'data': df.loc[df[group_col] == group_id, data_col].values,
                            'normality_full': pg.normality(df.loc[df[group_col] == group_id, data_col].values),
                            'normality_bool': pg.normality(df.loc[df[group_col] == group_id, data_col].values)['normal'][0]}

    n_groups = len(l_groups)

    d_main['summary'] = {'normality': all([d_main[elem]['normality_bool'] for elem in l_groups]),
                         'homoscedasticity': pg.homoscedasticity([d_main[elem]['data'] for elem in l_groups])['equal_var'][0]}

    parametric = all([d_main['summary']['normality'], d_main['summary']['homoscedasticity']])

    if len(l_groups) > 2:
        if parametric:
            d_main['summary']['group_level_statistic'] = pg.anova(data=df, dv=data_col, between=group_col)
            performed_test = 'One-way ANOVA'
        else:
            d_main['summary']['group_level_statistic'] = pg.kruskal(data=df, dv=data_col, between=group_col)
            performed_test = 'Kruskal-Wallis-ANOVA'

    if len(l_groups) > 1:
        d_main['summary']['pairwise_comparisons'] = pg.pairwise_ttests(data=df, dv=data_col, between=group_col, parametric=parametric, padjust='holm')

    else:
        print('Error: The group_id column has to contain at least two different group_ids for this selection. Did you mean to perform a one-sample test?')


# 1.2 Mixed-model ANOVA (contributed by Konstantin Kobel):

def mixed_model_ANOVA():
    global d_main, data_col, group_col, subject_col, session_col, l_groups, l_sessions, performed_test
    data_col = df.columns[0]
    group_col = df.columns[1]
    subject_col = df.columns[2]
    session_col = df.columns[3]
    
    d_main = {}
    l_groups = list(df[group_col].unique())
    l_sessions = list(df[session_col].unique())

    for group_id in l_groups:
        for session_id in l_sessions:       
            d_main[group_id, session_id] = {'data': df.loc[(df[group_col] == group_id) & (df[session_col] == session_id), data_col].values,
                                            'mean': df.loc[(df[group_col] == group_id) & (df[session_col] == session_id), data_col].mean(),
                                            'normality_full': pg.normality(df.loc[(df[group_col] == group_id) & (df[session_col] == session_id), data_col].values),
                                            'normality_bool': pg.normality(df.loc[(df[group_col] == group_id) & (df[session_col] == session_id), data_col].values)['normal'][0]}

    n_groups = len(l_groups)*len(l_sessions)
    d_main['summary'] = {}
        
    d_main['summary'] = {'normality': all([d_main[key]['normality_bool'] for key in d_main.keys() if key != 'summary']),
                         'homoscedasticity': pg.homoscedasticity([d_main[key]['data'] for key in d_main.keys() if key != 'summary'])['equal_var'][0]}    
   
    parametric = all([d_main['summary']['normality'], d_main['summary']['homoscedasticity']])

    d_main['summary']['group_level_statistic'] = pg.mixed_anova(data=df, dv=data_col, within=session_col, subject=subject_col, between=group_col)
    performed_test = 'Mixed-model ANOVA'
    # If we found some non-parametric alternative this could be implemented here
    if parametric == False:
        print ("Please be aware that the data require non-parametric testing.\nHowever, this is not implemented yet and a parametric test is computed instead.")
    else:
        nothing_special_here = 'nothing_special_here'
        
    d_main['summary']['pairwise_comparisons'] = pg.pairwise_ttests(data=df, dv=data_col, 
                                                                   within=session_col, subject=subject_col, 
                                                                   between=group_col, padjust='holm')

###################################################################    

    
###################################################################
# 2 Functions to annotate the results of the statistical tests in the respective plots:
# 2.1 Comparison of independent samples:
# 2.1.1 Create all checkboxes of the pairwise comparisons
def create_checkboxes_pairwise_comparisons():
        l_checkboxes_temp = [widgets.Checkbox(value=False,description='{} vs. {}'.format(group1, group2)) for group1, group2 in list(itertools.combinations(l_groups, 2))]
        l_HBoxes = []
        elem = 0
        for i in range(int(len(l_checkboxes_temp)/3)):
            l_HBoxes.append(HBox(l_checkboxes_temp[elem:elem+3]))
            elem = elem + 3
    
        if len(l_checkboxes_temp) % 3 != 0:
            l_HBoxes.append(HBox(l_checkboxes_temp[elem:]))
        
        checkboxes_to_add_temp = VBox(l_HBoxes).children[:]
        
        return checkboxes_to_add_temp, l_checkboxes_temp 

# 2.1.2 Define l_stats_to_annotate for independent samples plots:
def define_l_stats_to_annotate_independent_samples():
    l_stats_to_annotate = []
    for i in range(len(l_checkboxes)):
        if l_checkboxes[i].value:
            checkbox_description = l_checkboxes[i].description
            group1 = checkbox_description[:checkbox_description.index(' ')]
            group2 = checkbox_description[checkbox_description.index(' vs. ') + 5 :]
            l_stats_to_annotate.append((group1, group2))
    return l_stats_to_annotate

# 2.1.3 Use l_stats_to_annotate to annotate the selected stats in the plot:
def annotate_stats_independent_samples(l_stats_to_annotate):
    if len(l_stats_to_annotate) > 0:
        max_total = df[data_col].max()
        y, h, col = max_total + max_total * 0.05, max_total * 0.05, 'k'

        # Add check whether group level ANOVA / Kruska-Wallis-ANOVA is significant
        df_temp = d_main['summary']['pairwise_comparisons'].copy()

        for group1, group2 in l_stats_to_annotate:

            x1 = l_group_order.index(group1)
            x2 = l_group_order.index(group2)

            if df_temp.loc[(df_temp['A'] == group1) & (df_temp['B'] == group2)].shape[0] > 0:
                if 'p-corr' in df_temp.loc[(df_temp['A'] == group1) & (df_temp['B'] == group2)].columns:
                    pval = df_temp.loc[(df_temp['A'] == group1) & (df_temp['B'] == group2), 'p-corr'].iloc[0]
                else:
                    pval = df_temp.loc[(df_temp['A'] == group1) & (df_temp['B'] == group2), 'p-unc'].iloc[0]

            elif df_temp.loc[(df_temp['B'] == group1) & (df_temp['A'] == group2)].shape[0] > 0:
                if 'p-corr' in df_temp.loc[(df_temp['B'] == group1) & (df_temp['A'] == group2)].columns:
                    pval = df_temp.loc[(df_temp['B'] == group1) & (df_temp['A'] == group2), 'p-corr'].iloc[0]
                else:
                    pval = df_temp.loc[(df_temp['B'] == group1) & (df_temp['A'] == group2), 'p-unc'].iloc[0]
            else:
                print('There was an error with annotating the stats!')

            if pval <= 0.001:
                stars = '***'
            elif pval <= 0.01:
                stars = '**'
            elif pval <= 0.05:
                stars = '*'
            else: 
                stars = 'n.s.'

            plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)    
            plt.text((x1+x2)*.5, y+h, stars, ha='center', va='bottom', color=col)

            y = y + 3*h


# 2.2 Mixed-model ANOVA (contributed by Konstantin Kobel)
# 2.2.1 Create all checkboxes and assign them to another accordion to make selection for the user a little clearer
def create_checkboxes_pairwise_comparisons_mma():
        annotate_session_stats_accordion = widgets.Accordion(children=[], selected_index=None)
        l_all_checkboxes = []
        
        for session_id in l_sessions:
            checkboxes_to_add_temp, l_checkboxes_temp = create_checkboxes_pairwise_comparisons()
            checkboxes_to_add_temp_vbox = VBox([])
            checkboxes_to_add_temp_vbox.children = checkboxes_to_add_temp_vbox.children + checkboxes_to_add_temp
            annotate_session_stats_accordion.children = annotate_session_stats_accordion.children + (checkboxes_to_add_temp_vbox, )
            l_all_checkboxes = l_all_checkboxes + [(session_id, elem) for elem in l_checkboxes_temp]
        
        for i in range(len(list(annotate_session_stats_accordion.children))):
            annotate_session_stats_accordion.set_title(i, l_sessions[i])
            
        annotate_stats_box = VBox([annotate_session_stats_accordion])
        
        return annotate_stats_box.children[:], l_all_checkboxes
             
# 2.2.2 Define l_stats_to_annotate for Mixed-Model-ANOVA plots:
def define_l_stats_to_annotate_mma():
    l_stats_to_annotate = []
    for i in range(len(l_checkboxes)):
        if l_checkboxes[i][1].value:
            checkbox_description = l_checkboxes[i][1].description
            group1 = checkbox_description[:checkbox_description.index(' ')]
            group2 = checkbox_description[checkbox_description.index(' vs. ') + 5 :]
            session_id = l_checkboxes[i][0]
            l_stats_to_annotate.append((group1, group2, session_id))
    return l_stats_to_annotate 

# 2.2.3 Use l_stats_to_annotate to annotate the selected stats in the plot:
# 2.2.3.1 Helper function to make sorting based on 3rd element in tuple possible

def Func(e):
    return e[3]

# 2.2.3.2 Annotate in pointplot (contributed by Konstantin Kobel):

def annotate_stats_mma_pointplot(l_stats_to_annotate):
    if len(l_stats_to_annotate) > 0:
        col = 'k'

        l_to_annotate_ordered=[]
        for session in l_sessions:
            l_temp=[elem for elem in l_stats_to_annotate if elem[2]==session]
            for n in l_temp:
                mean1=abs(df.loc[(df[group_col] == n[0]) & (df[session_col] == n[2]), data_col].mean()-
                      df.loc[(df[group_col] == n[1]) & (df[session_col] == n[2]), data_col].mean())
                l_temp[l_temp.index(n)]=n+(mean1,)
            l_temp.sort(key=Func)
            l_to_annotate_ordered=l_to_annotate_ordered + l_temp

        df_temp = d_main['summary']['pairwise_comparisons'].copy()
        c=0
        stars=''
        prev_session="randomstartsession"    
        for elem in l_to_annotate_ordered:
            y1=df.loc[(df[group_col] == elem[0]) & (df[session_col] == elem[2]), data_col].mean()
            y2=df.loc[(df[group_col] == elem[1]) & (df[session_col] == elem[2]), data_col].mean()
            x=l_sessions.index(elem[2])+df[data_col].max()*1/1000
            b=df[data_col].max()*1/1200
            if elem[2] == prev_session:
                c=c+df[data_col].max()*2/1200
            else:
                c=0
            p=df_temp.loc[(df_temp['A']==elem[0])&(df_temp['B']==elem[1])&(df_temp[session_col]==elem[2]), ["p-corr"]].values
            if p <= 0.001:
                stars = '***'
            elif p <= 0.01:
                stars = '**'
            elif p <= 0.05:
                stars = '*'
            else: 
                stars = 'n.s.'
            plt.text(x+df[data_col].max()*1/1200+c, (y1+y2)/2, stars, rotation=270)
            plt.plot([x+c, x+b+c, x+b+c, x+c], [y1, y1, y2, y2], color=col, lw=1.5)
            prev_session = elem[2]

            
# 2.2.3.3 Annotate in violinplot (contributed by Konstantin Kobel):
        
def annotate_stats_mma_violinplot(l_stats_to_annotate):
    if len(l_stats_to_annotate) > 0:
        col='k'

        l_to_annotate_ordered=[]
        for session in l_sessions:
            l_temp=[elem for elem in l_stats_to_annotate if elem[2]==session]
            for n in l_temp:
                mean1=abs(df.loc[(df[group_col] == n[0]) & (df[session_col] == n[2]), data_col].mean()-
                      df.loc[(df[group_col] == n[1]) & (df[session_col] == n[2]), data_col].mean())
                l_temp[l_temp.index(n)]=n+(mean1,)
            l_temp.sort(key=Func, reverse=False)
            l_to_annotate_ordered=l_to_annotate_ordered + l_temp

        df_temp = d_main['summary']['pairwise_comparisons'].copy()
        c=0
        stars=''
        prev_session = "randomstartsession"

        if len(l_groups)%2==0:
            med=len(l_groups)/2-0.5
        else:
            med=l_groups.index(stats.median(l_groups))

        for elem in l_to_annotate_ordered:
            y=1.1*df[data_col].max()
            group1=elem[0]
            group2=elem[1]
            x1=l_sessions.index(elem[2])+1/len(l_groups)*(med-((l_groups.index(group1)+len(l_groups))%len(l_groups)))-0.05
            x2=l_sessions.index(elem[2])+1/len(l_groups)*(med-((l_groups.index(group2)+len(l_groups))%len(l_groups)))+0.05
            b=(1/20*df[data_col].max())
            p=df_temp.loc[(df_temp['A']==elem[0])&(df_temp['B']==elem[1])&(df_temp[session_col]==elem[2]), ["p-corr"]].values
            if p <= 0.001:
                stars = '***'
            elif p <= 0.01:
                stars = '**'
            elif p <= 0.05:
                stars = '*'
            else: 
                stars = 'n.s.'
            if elem[2] == prev_session:
                c=c+(1/5*df[data_col].max())
            else:
                c=0
            plt.text((x1+x2)/2, y+1.5*b+c, stars, rotation=0)
            plt.plot([x1, x1, x2, x2], [y+c, y+c+b, y+c+b, y+c], color=col, lw=1.5)
            prev_session = elem[2]

###################################################################    

    
###################################################################
# 3 Functions that are triggered by clicking on the widget buttons:
# 3.1 Stats button:        

def on_stats_button_clicked(b):
 
    global df, save_plot, l_checkboxes
    # Open the uploaded file:
    if list(uploader.value.keys())[0].endswith('.csv'): 
        with open("input.csv", "w+b") as i:
            i.write(uploader.value[list(uploader.value.keys())[0]]['content'])
        df = pd.read_csv('input.csv', index_col=0)
        
    elif list(uploader.value.keys())[0].endswith('.xlsx'):
        with open("input.xlsx", "w+b") as i:
            i.write(uploader.value[list(uploader.value.keys())[0]]['content'])
        df = pd.read_excel('input.xlsx', index_col=0)


    save_plot = False
    
    with output:
        output.clear_output()
        
        uploader.layout.visibility = 'hidden'
        plotting_button.layout.visibility = 'visible'
        select_plot.layout.visibility = 'visible'
        main_accordion.layout.visibility = 'visible'
        select_downloads.layout.visibility = 'visible'
        download_button.layout.visibility = 'visible'
        
        # Not at all neccessary if we can really manage to align the plot
        if select_test.value == 0: # independent_samples()
            select_plot.options = [('stripplot', 0), ('boxplot', 1), ('boxplot with scatterplot overlay', 2), ('violinplot', 3)]
        elif select_test.value == 2:
            select_plot.options = [('pointplot', 0), ('boxplot', 1), ('boxplot with scatterplot overlay', 2), ('violinplot', 3)]
        else:
            print('Function not implemented. Please go and annoy Dennis to finally do it')
        
        # Check what option was chosen in the select_test dropdown and execute corresponding function
        if select_test.value==0:
            independent_samples()
            checkboxes_to_add, l_checkboxes = create_checkboxes_pairwise_comparisons()
        elif select_test.value==2:
            mixed_model_ANOVA()
            checkboxes_to_add, l_checkboxes = create_checkboxes_pairwise_comparisons_mma()
        else:
            nothing_special_here = 'nothing_special_here'


        if len(annotate_stats_box.children) == 0:
                annotate_stats_box.children = annotate_stats_box.children + checkboxes_to_add
         
        display(d_main['summary']['pairwise_comparisons'])   

        
# 3.2 Plotting button
def on_plotting_button_clicked(b):

    with output:
        output.clear_output()
        
        plotting_button.description = 'Refresh the plot'
        
        # Could also be modyfied
        global l_group_order
        l_group_order = l_groups
        
 
        plt.figure(figsize=(14,8), facecolor='white')
        
        if select_test.value == 0: # independent_samples()
            if select_plot.value == 0:
                sns.stripplot(data=df, x=group_col, y=data_col, order=l_group_order, palette=color_palettes.value, size=marker_size.value)
            elif select_plot.value == 1:
                sns.boxplot(data=df, x=group_col, y=data_col, order=l_group_order, palette=color_palettes.value)
            elif select_plot.value == 2:
                sns.boxplot(data=df, x=group_col, y=data_col, order=l_group_order, palette=color_palettes.value)
                sns.stripplot(data=df, x=group_col, y=data_col, color='k', order=l_group_order, size=marker_size.value)
            else:
                print("Function not implemented. Please go and annoy Dennis to finally do it")
        
        elif select_test.value == 2: # mixed_model_ANOVA()#
            if select_plot.value == 0:
                sns.pointplot(x=session_col, y=data_col, data=df, hue=group_col, dodge=True, ci='sd', err_style='bars', capsize=0)
                plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
            elif select_plot.value == 3:
                violin_plot = sns.violinplot(x=session_col, y=data_col, data=df, hue=group_col, width=0.8, cut=0)
                violin_plot.legend(loc='center left', bbox_to_anchor=(1, 0.5))
            else:
                print("Function not implemented. Please go and annoy Dennis to finally do it")
        
        else:
            print("Function not implemented. Please go and annoy Dennis to finally do it")
        
        if select_test.value == 0: # independent_samples()
            l_stats_to_annotate = define_l_stats_to_annotate_independent_samples()
            annotate_stats_independent_samples(l_stats_to_annotate)
        
        elif select_test.value == 2: # mixed_model_ANOVA()#
            l_stats_to_annotate = define_l_stats_to_annotate_mma()
            if select_plot.value == 0:
                annotate_stats_mma_pointplot(l_stats_to_annotate)
            elif select_plot.value == 3:
                annotate_stats_mma_violinplot(l_stats_to_annotate)
            else:
                print("Function not implemented. Please go and annoy Dennis to finally do it")
            
        plt.ylabel(yaxis_label_text.value, fontsize=yaxis_label_fontsize.value, color=yaxis_label_color.value)
        plt.xlabel(xaxis_label_text.value, fontsize=xaxis_label_fontsize.value, color=xaxis_label_color.value)
        
        if save_plot == True:
            plt.savefig('customized_plot.png', dpi=300)
        
        plt.show()
        
        
# 3.3 Download button:        
def on_download_button_clicked(b):
    global save_plot
    if select_downloads.value == 1:
        if select_test.value == 0:
            df_individual_group_stats = prepare_individual_group_stats_for_download(False)
            df_group_level_overview = prepare_group_level_stats_for_download()
            df_pairwise_comparisons = d_main['summary']['pairwise_comparisons'].copy()
            with pd.ExcelWriter('statistic_results.xlsx') as writer:  
                df_individual_group_stats.to_excel(writer, sheet_name='Individual group statistics')
                df_group_level_overview.to_excel(writer, sheet_name='Whole-group statistics')
                df_pairwise_comparisons.to_excel(writer, sheet_name='Pairwise comparisons')
            
        elif select_test.value == 2:
            df_individual_group_stats = prepare_individual_group_stats_for_download(True)
            df_group_level_overview = prepare_group_level_stats_for_download()
            df_pairwise_comparisons = d_main['summary']['pairwise_comparisons'].copy()
            with pd.ExcelWriter('statistic_results.xlsx') as writer:  
                df_individual_group_stats.to_excel(writer, sheet_name='Individual group statistics')
                df_group_level_overview.to_excel(writer, sheet_name='Whole-group statistics')
                df_pairwise_comparisons.to_excel(writer, sheet_name='Pairwise comparisons')
                
                
    elif select_downloads.value == 2:
        save_plot = True
        plotting_button.click()
        save_plot = False
    
    elif select_downloads.value == 3:
        save_plot = True
        plotting_button.click()
        save_plot = False
        if select_test.value == 0:
            df_individual_group_stats = prepare_individual_group_stats_for_download(False)
            df_group_level_overview = prepare_group_level_stats_for_download()
            df_pairwise_comparisons = d_main['summary']['pairwise_comparisons'].copy()
            with pd.ExcelWriter('statistic_results.xlsx') as writer:  
                df_individual_group_stats.to_excel(writer, sheet_name='Individual group statistics')
                df_group_level_overview.to_excel(writer, sheet_name='Whole-group statistics')
                df_pairwise_comparisons.to_excel(writer, sheet_name='Pairwise comparisons')
            
        elif select_test.value == 2:
            df_individual_group_stats = prepare_individual_group_stats_for_download(True)
            df_group_level_overview = prepare_group_level_stats_for_download()
            df_pairwise_comparisons = d_main['summary']['pairwise_comparisons'].copy()
            with pd.ExcelWriter('statistic_results.xlsx') as writer:  
                df_individual_group_stats.to_excel(writer, sheet_name='Individual group statistics')
                df_group_level_overview.to_excel(writer, sheet_name='Whole-group statistics')
                df_pairwise_comparisons.to_excel(writer, sheet_name='Pairwise comparisons')
        
        
               
###################################################################    

    
###################################################################
# 4 Functions that create the individual widget elements:
# 4.1 Buttons:

def create_buttons():
    global uploader, stats_button, plotting_button, download_button
    # File uploader:
    uploader = widgets.FileUpload(
        accept=('.xlsx,.csv'),  # Accepted file extension e.g. '.txt', '.pdf', 'image/*', 'image/*,.pdf'
        multiple=False)  # True to accept multiple files upload else False

    # Buttons:
    stats_button = widgets.Button(description="Calculate stats", icon='rocket')

    plotting_button = widgets.Button(description='Plot the data', layout={'visibility': 'hidden'})

    download_button = widgets.Button(description='Download', icon='file-download', layout={'visibility': 'hidden'})
    
# 4.2 Dropdown menus:

def create_dropdowns():
    global select_test, select_plot, select_downloads
    select_test = widgets.Dropdown(options=[('pairwise comparison of two or more independent samples', 0), ('compare one sample vs. fixed value', 1), ('Mixed_model_ANOVA', 2)],
                                   value=0,
                                   description='Please select which test you want to perform:',
                                   layout={'width': '700px'},
                                   style={'description_width': 'initial'})

    select_plot = widgets.Dropdown(options=[('something initial', 0)],
                                   value=0,
                                   description='Please select which type of plot you want to create:',
                                   layout={'width': '700px', 
                                          'visibility': 'hidden'},
                                   style={'description_width': 'initial'})

    select_downloads = widgets.Dropdown(options=[('statistical results only', 1), ('plot only', 2), ('both', 3)],
                                   value=1,
                                   description='Please select what you would like to write to disk:',
                                   layout={'width': '700px', 
                                          'visibility': 'hidden'},
                                   style={'description_width': 'initial'})


def create_hbox_y_axis():
    global yaxis_label_text, yaxis_label_fontsize, yaxis_label_color
    yaxis_label_text = widgets.Text(value='data', placeholder='data', description='y-axis label:', layout={'width': '300px'})
    yaxis_label_fontsize = widgets.IntSlider(value=12, min=8, max=40, step=1, description='y-axis label fontsize:')
    yaxis_label_color = widgets.ColorPicker(concise=False, description='y-axisl label color', value='#000000')
    return HBox([yaxis_label_text, yaxis_label_fontsize, yaxis_label_color])

def create_hbox_x_axis():
    global xaxis_label_text, xaxis_label_fontsize, xaxis_label_color
    xaxis_label_text = widgets.Text(value='group_IDs', placeholder='group_IDs', description='x-axis label:', layout={'width': '300px'})
    xaxis_label_fontsize = widgets.IntSlider(value=12, min=8, max=40, step=1, description='x-axis label fontsize:')
    xaxis_label_color = widgets.ColorPicker(concise=False, description='x-axisl label color', value='#000000')
    return HBox([xaxis_label_text, xaxis_label_fontsize, xaxis_label_color])

def create_hbox_plot_style_features():
    global color_palettes, marker_size
    color_palettes = widgets.Dropdown(options=['colorblind', 'Spectral', 'viridis', 'rocket', 'cubehelix'],
                             value='colorblind',
                             description='Please select a color palette', 
                             layout={'width': '400px'},
                             style={'description_width': 'initial'})
    
    marker_size = widgets.FloatText(value=5,description='marker size:')
    
    return HBox([color_palettes, marker_size])


def create_accordion_to_customize_the_plot():
    global main_accordion, accordion, annotate_stats_box, annotation_distance, plot_size
    # Accordion to customize the plotting:
    # We will create another Accordion inside the main accordion
    
    # First accordion will contain checkboxes to select which stats shall be annotated
    # Will be filled as soon as stats_button is clicked and tests are run
        # Still missing: 
            # Annotation of within and between statistics for mma
            # Add new level to customize annotations:
                # x or y spacing (annotation distance: annotation_distance=widgets.FloatSlider(description='Change the distance of the annotations to the graph.', value=1, min=0, max=2, step=0.001, style={'description_width': 'initial'})
                # fontsize
                # linewidth
                # distance between line & text
                # shape of "line"
    annotate_stats_box = VBox([])
    
    # Second accordion will contain widgets to customize the axes
        # Still missing:
            # make new level for each axis
            # Change axis_tick_label_sizes
            # Change axis_linewidth
    yaxis_hbox = create_hbox_y_axis()
    xaxis_hbox = create_hbox_x_axis()

    customize_axes_box = VBox([yaxis_hbox, xaxis_hbox])
    
    
    # Third accordion will contain widgets to customize the style of the plot (colorpalette, markersizes)
        # Still missing:
            # Plot size (2 sliders, x & y) to change fig_size [make sure violinplot annotation is still working for mma()]
                # e.g.: y_size=widgets.FloatSlider(description='Change the size of your plot.', value=1, min=0, max=10)
            # Make sure marker_size only shows up if possible to change
            # Plot title (+ size & color)
            # Option to remove upper and right spines
            # Set dpi
            # Select (.png, .tif, .pdf)
    
    customize_features_box = create_hbox_plot_style_features()
    

    # Create the accordion that actually contains all widget-containing accordions and will become the only child of the main accordion
    accordion = widgets.Accordion(children=[annotate_stats_box, customize_axes_box, customize_features_box], selected_index=None)

    # Give the individual accordions titles that are displayed before dropdown is clicked
    accordion.set_title(0, 'Customize how statistics are annotated in the plot')
    accordion.set_title(1, 'Customize axes')
    accordion.set_title(2, 'Customize other features of the plot')


    # Create the main accordion that contains all widgets to customize the plot and use selected_index=None to avoid dropdown by default
    main_accordion = widgets.Accordion(children=[accordion], selected_index=None, continous_update=False, layout={'visibility': 'hidden'})
    main_accordion.set_title(0, 'Expand me to customize your plot!')
    
    
def top_level_layout():
    global stats_widget
    
    create_accordion_to_customize_the_plot()
    create_dropdowns()
    create_buttons()
    
    # Bind the on_button_clicked functions to the respective buttons:
    stats_button.on_click(on_stats_button_clicked)
    plotting_button.on_click(on_plotting_button_clicked) 
    download_button.on_click(on_download_button_clicked)
    # Layout of the remaining elements
    first_row = HBox([uploader])
    second_row = HBox([select_test, stats_button])
    third_row = HBox([select_plot, plotting_button])
    third_row_extension = HBox([main_accordion])
    fourth_row = HBox([select_downloads, download_button])

    stats_widget = VBox([first_row, second_row, third_row, third_row_extension, fourth_row])
    
    
def launch():
    global output
    
    # Configure the layout:
    top_level_layout()

    # Define the output
    output = widgets.Output()

    # Display the widget:
    display(stats_widget, output)
    
###################################################################    

    
###################################################################
# 6 Functions to process the statistical data for download:
# 6.1 Calculate individual group statistics:
    
def calculate_individual_group_stats(d, key):
    group_data = d_main[key]['data']
    d['means'].append(np.mean(group_data))
    d['medians'].append(np.median(group_data))
    d['stddevs'].append(np.std(group_data))
    d['stderrs'].append(np.std(group_data) / math.sqrt(group_data.shape[0]))
    d['tests'].append('Shapiro-Wilk')
    d['test_stats'].append(d_main[key]['normality_full'].iloc[0,0])
    d['pvals'].append(d_main[key]['normality_full'].iloc[0,1])
    d['bools'].append(d_main[key]['normality_full'].iloc[0,2])
    return d

# 6.2 Create the DataFrame:
def prepare_individual_group_stats_for_download(include_sessions):
    d_individual_group_stats = {'means': [],
                                'medians': [],
                                'stddevs': [],
                                'stderrs': [],
                                'tests': [],
                                'test_stats': [], 
                                'pvals': [], 
                                'bools': []}

    l_for_index = []
    
    if include_sessions == False:
        # for independent samples:
        for group_id in l_groups:
            d_individual_group_stats = calculate_individual_group_stats(d_individual_group_stats, group_id)
            l_for_index.append(group_id)
        l_index = l_for_index
    else:
        # for mma:
        for group_id in l_groups:
            for session_id in l_sessions:
                d_individual_group_stats = calculate_individual_group_stats(d_individual_group_stats, (group_id, session_id))
                l_for_index.append((group_id, session_id))
            l_index = pd.MultiIndex.from_tuples(l_for_index)
                
    df_individual_group_stats = pd.DataFrame(data=d_individual_group_stats)

    multi_index_columns = pd.MultiIndex.from_tuples([('Group statistics', 'Mean'), ('Group statistics', 'Median'), ('Group statistics', 'Standard deviation'), ('Group statistics', 'Standard error'),
                                             ('Test for normal distribution', 'Test'), ('Test for normal distribution', 'Test statistic'), ('Test for normal distribution', 'p-value'),
                                             ('Test for normal distribution', 'Normally distributed?')])

    df_individual_group_stats.columns = multi_index_columns
    df_individual_group_stats.index = l_index

    return df_individual_group_stats


# 6.3 Group-level statistics:

def prepare_group_level_stats_for_download():
    df_group_level_overview = pg.homoscedasticity([d_main[key]['data'] for key in d_main.keys() if key != 'summary'])
    df_group_level_overview.index = [0]
    df_group_level_overview.columns = pd.MultiIndex.from_tuples([('Levene', 'W statistic'), ('Levene', 'p value'), ('Levene', 'Equal variances?')])

    df_group_level_overview[('', 'all normally distributed?')] = False
    df_group_level_overview[('', 'critera for parametric test fulfilled?')] = False
    df_group_level_overview[('', 'performed test')] = performed_test
    df_group_level_overview[' '] = ''

    df_group_statistics = d_main['summary']['group_level_statistic'].copy()
    
    df_group_statistics.index = list(range(df_group_statistics.shape[0]))
    df_group_statistics.columns = pd.MultiIndex.from_tuples([(performed_test, elem) for elem in df_group_statistics.columns])

    df_group_level_overview = pd.concat([df_group_level_overview, df_group_statistics], axis=1)
    
    return df_group_level_overview


In [None]:
launch()