In [1]:
import natsort
import numpy as np
import pandas as pd
import param
import plotly.express as px
import plotly.graph_objects as go
import re
import traceback
import panel as pn
pn.extension("plotly")
from io import BytesIO
from io import StringIO
from bokeh.models.widgets.tables import NumberFormatter
from sklearn.decomposition import PCA
import json
import ast
import statistics 
import matplotlib.pyplot as plt
import matplotlib_venn as venn
from matplotlib_venn import venn2, venn3, venn3_circles
from PIL import Image
import io

In [2]:
exec(open("class.py").read())
i_class = SpatialDataSet()

i_file = pn.widgets.FileInput(name='Upload file')

button_analysis = pn.widgets.Button(name='Analyse clusters', width=50)
button_SVM_analysis = pn.widgets.Button(name='Analyse misclassification matrix', width=50)

i_logOR01_selection = pn.widgets.Select(options=["0/1 normalized data", "log transformed data"], name="Select type of data for download", width=300)

i_acquisition = pn.widgets.Select(options=["LFQ Spectronaut", "SILAC", "LFQ"], name="Acquisition", width=300)
i_organism = pn.widgets.Select(options=["Human - Swissprot", "Mouse - Swissprot","Arabidopsis - Araport"], name="Organism", width=300)

i_SVM_table = pn.widgets.input.TextAreaInput(name='Misclassification matrix from Perseus', placeholder='Copy matrix here...')

i_clusterwidget = pn.widgets.Select(options=list(i_class.markerproteins), name="Cluster of interest", width=300)
i_mapwidget = pn.widgets.Select(options=[i_class.map_of_interest], name="Map of interest", width=300)

i_clusterwidget_comparison = pn.widgets.Select(options=list(i_class.markerproteins), name="Cluster of interest", width=300)

i_collapse_maps_PCA = pn.widgets.Checkbox(value=False, name='Collapse maps')

cache_uploaded = pn.widgets.Checkbox(value=False)
cache_uploaded_SVM = pn.widgets.Checkbox(value=False)

cache_run = pn.widgets.Checkbox(value=False)

analysis_status = pn.Pane("No analysis run yet")
analysis_status_SVM = pn.Pane("No SVM analysis run yet")

filereading_status = "No data import yet"

i_expname = pn.widgets.TextInput(name='Experiment Name', placeholder='Enter your experiment name here here...')

i_consecutiveLFQi = pn.widgets.IntSlider(name="Consecutive LFQ intensities", start=1, end=8, step=1, value=4)
i_summed_MSMS_counts = pn.widgets.IntSlider(name="Summed MS/MS counts", start=1, end=6, step=1, value=2)

i_name_pattern = pn.widgets.Select(name='Name pattern',options=["(?P<rep>.*)_(?P<frac>.*)", ".* (?P<cond>.*)_(?P<rep>.*)_(?P<frac>.*)",".* (?P<rep>.*)_(?P<frac>.*)",
                                                                ".* (?P<cond>.*)_(?P<frac>.*)_(?P<rep>.*)", "Custom"])
i_custom_namepattern = pn.widgets.TextInput(name='Customized Name Pattern', placeholder='Enter a string here...e.g.: .* (?P<rep>.*)_(?P<frac>.*)')
regex_pattern = {
    "(?P<rep>.*)_(?P<frac>.*)" : ["Spectronaut MAP1_03K"],
    ".* (?P<rep>.*)_(?P<frac>.*)" : ["MAP1_03K","MAP3_03K"],
    ".* (?P<cond>.*)_(?P<rep>.*)_(?P<frac>.*)" : ["EGF_rep1_06K","EGF_rep3_06K"],
    ".* (?P<cond>.*)_(?P<frac>.*)_(?P<rep>.*)" : ["Control_Mem_1", "Control_Cyt_1"]
    }
i_pattern_examples = pn.widgets.Select(name = "Examples", options=regex_pattern[i_name_pattern.value])

@pn.depends(i_name_pattern.param.value, i_custom_namepattern, i_pattern_examples)
def custimization(name_pattern, custom_namepattern, pattern_examples):
    if name_pattern == "Custom":
        return i_custom_namepattern
    else:
        example_for_name_pattern = regex_pattern[name_pattern]
        i_pattern_examples.options = example_for_name_pattern
        return i_pattern_examples

i_x_vs_yAxis_PCA = {
    "PC1" : ["PC3", "PC2"],
    "PC2" : ["PC1", "PC3"],
    "PC3" : ["PC1", "PC2"],
    }

i_xAxis_PCA = pn.widgets.Select(name="X-Axis", options=["PC1", "PC2","PC3"])
i_yAxis_PCA = pn.widgets.Select(name="Y-Axis", options=i_x_vs_yAxis_PCA[i_xAxis_PCA.value])

i_xAxis_PCA_comp = pn.widgets.Select(name="X-Axis", options=["PC1", "PC2","PC3"])
i_yAxis_PCA_comp = pn.widgets.Select(name="Y-Axis", options=i_x_vs_yAxis_PCA[i_xAxis_PCA_comp.value])

@pn.depends(i_xAxis_PCA_comp.param.value, watch=True)
def custimization_PCA_comp(xAxis_PCA_comp):
    yAxis_PCA = i_x_vs_yAxis_PCA[xAxis_PCA_comp]
    i_yAxis_PCA_comp.options = yAxis_PCA
    return i_yAxis_PCA_comp

@pn.depends(i_xAxis_PCA.param.value, watch=True)
def custimization_PCA(xAxis_PCA):
    yAxis_PCA = i_x_vs_yAxis_PCA[xAxis_PCA]
    i_yAxis_PCA.options = yAxis_PCA
    return i_yAxis_PCA

#define widgets, that will be displayed
@pn.depends(i_acquisition.param.value, i_consecutiveLFQi, i_summed_MSMS_counts)
def acquisition_response(acquisition, consecutiveLFQi, summed_MSMS_counts):
    if acquisition == "SILAC":
        return pn.Row(i_name_pattern, custimization)
    else:
        return pn.Column(pn.Row(i_consecutiveLFQi,i_summed_MSMS_counts), pn.Row(i_name_pattern, custimization))

#define widgets that should be disbled after run==True
wdgts = [i_acquisition,i_name_pattern,i_expname, i_pattern_examples, button_analysis, i_expname, i_organism, i_consecutiveLFQi, i_summed_MSMS_counts]            

@pn.depends(i_file.param.value)
def read_file(file):
    i_class.cache_comparison = False
    if file is None:
        filereading_status = "No file is uploaded"
        cache_uploaded.value = False
    else:
        cache_uploaded.value = False
        try:
            i_class.df_original = pd.read_csv(BytesIO(file), sep="\t", comment="#",
                                           usecols=lambda x: bool(re.match(i_class.regex["imported_columns"], x)),
                                           low_memory=False)     
            #if assert doesn't hold: False -> redirection into except
            assert i_class.df_original.shape[0]>10 and i_class.df_original.shape[1]>5
            
            cache_uploaded.value = True
            for wdgt in wdgts:
                wdgt.disabled = False

            return pn.Column(pn.Row(pn.widgets.DataFrame(i_class.df_original, height=200, width=600, disabled=True)),
                             pn.Row(i_expname), 
                             pn.Row(i_organism, i_acquisition), 
                             pn.Row(acquisition_response), 
                             pn.Row(button_analysis))

        except: 
            filereading_status = traceback.format_exc()
            cache_uploaded.value = False
            return filereading_status   
        
        
def execution(event):
    #prevent execution, if no data is uploaded yet
    if cache_uploaded.value == False:
        analysis_status.object = "Please upload a file first"
    elif i_expname.value == "":
        analysis_status.object = "Please enter an experiment name first"
    else:        
        dashboard_analysis.objects = []
        dashboard_svm.objects = []
        
        cache_run.value = False
        for wdgt in wdgts:
            wdgt.disabled = True
        #if you did already your comparison, but add another experiment afterwards - without reloading your AnylsedDatasets.json
        for wdgt in wdgts_comparison:
            wdgt.disabled = True
        try:
            
            dashboard_analysis.append(i_clusterwidget)
            dashboard_analysis.append(i_mapwidget)
            dashboard_analysis.append(analysis_tabs)
            
            i_SVM_table.value = ""
            analysis_status_SVM.object = "Please paste a SVM Matrix first"
            
            dashboard_svm.append(i_SVM_table)
            dashboard_svm.append(read_SVM_matrix)
            dashboard_svm.append(analysis_status_SVM)
            
            
            analysis_status.object = "Analysis in progress"
            i_class.acquisition = i_acquisition.value
            #get only these proteins that belong to one specific organism
            i_class.markerproteins = i_class.all_markerproteins[i_organism.value]
            #user can influence stringency filtering: e.g. [4] consecutive non-NANs, data will be retained
            i_class.consecutiveLFQi = i_consecutiveLFQi.value
            i_class.summed_MSMS_counts = i_summed_MSMS_counts.value
            #define name pattern
            if i_name_pattern.value == "Custom":
                i_class.name_pattern = i_custom_namepattern.value
            else:
                i_class.name_pattern = i_name_pattern.value

            i_class.expname = i_expname.value
            i_class.processingdf()
            update_object_selector(i_mapwidget)
            i_class.quantity_profiles_proteinGroups()
            i_class.perform_pca()
            i_class.multiple_iterations()
            i_class.distance_calculation()
            analysis_status.object = "Analysis finished! Please open the 'Analysis' tab!"
            cache_run.value = True
            #exc_info = sys.exc_info()
        except:
            for wdgt in wdgts:
                wdgt.disabled = False
            #The traceback gives no traceback, so out of that there will be still the output: Analysis in progress, although it is not possible. Out of that i removed the traceback
            analysis_status.object = traceback.format_exc()
            cache_run.value = False

button_analysis.on_click(execution)  

def update_object_selector(i_mapwidget):
    i_mapwidget.options = list(i_class.map_names)
    if i_class.map_of_interest not in list(i_class.map_names):
            i_class.map_of_interest = i_class.map_names[0]        
        
        
@pn.depends(i_SVM_table.param.value)
def read_SVM_matrix(SVM_table):   
    if SVM_table == "":
        SVM_reading_status = "No misclassification matrix is uploaded"
        cache_uploaded_SVM.value = False
    else:
        cache_uploaded_SVM.value = False
        try:
            i_class.df_SVM = pd.read_table(StringIO(SVM_table), sep="\t")
            SVM_reading_status = i_class.df_SVM
            cache_uploaded_SVM.value = True
            return pn.Column(pn.Pane(SVM_reading_status, width=60*SVM_reading_status.shape[1]),
                             pn.Row(button_SVM_analysis))
        except: 
            SVM_reading_status = traceback.format_exc()
            cache_uploaded_SVM.value = False
            return SVM_reading_status 

        
def execution_SVM(event):
    #prevent execution, if no data is uploaded yet
    if cache_uploaded_SVM.value == False:
        analysis_status_SVM.object = "Please paste a SVM Matrix first"
    else:        
        #cache_run.value = False
        try:
           # cache_run.value = True
            #exc_info = sys.exc_info()
            i_class.svm_processing()
            analysis_status_SVM.object = "Analysis finished! Check the dictionary!"
            #cache_run.value = True
        except:
            analysis_status_SVM.object = traceback.format_exc()
            #cache_run.value = False
            
            
button_SVM_analysis.on_click(execution_SVM)                  
  
    
@pn.depends(i_mapwidget.param.value, cache_run.param.value, i_collapse_maps_PCA.param.value, i_clusterwidget.param.value, i_xAxis_PCA.param.value, i_yAxis_PCA.param.value)
def update_visualization_map(mapwidget, run, collapse_maps_PCA, clusterwidget, xAxis_PCA, yAxis_PCA):
    try:
        if run == True:
            i_class.map_of_interest = mapwidget
            i_class.cluster_of_interest = clusterwidget
            i_class.x_PCA = xAxis_PCA
            i_class.y_PCA = yAxis_PCA
            i_class.collapse_maps_PCA = collapse_maps_PCA
            visualization_map = pn.Column(
                    pn.Row(i_collapse_maps_PCA),
                    pn.Row(pn.Pane(i_class.global_pca_plot(), width=1000)),
                    pn.Column(i_xAxis_PCA, custimization_PCA)
                    )
            app_tabs.active = 1
            return visualization_map
        else:
            visualization_map = "Run analysis first!"
            return cluster_overview
    except:
        update_status = traceback.format_exc()
        return update_status
               
            
@pn.depends(i_clusterwidget.param.value,i_mapwidget.param.value, cache_run.param.value)
def update_cluster_overview(clusterwidget, mapwidget, run):
    try:
        if run == True:
            i_class.cluster_of_interest = clusterwidget
            i_class.map_of_interest = mapwidget
            list_genes = [goi for goi in i_class.genenames_sortedout_list if goi in i_class.markerproteins[clusterwidget]]
            i_class.cache_cluster_quantified = True
            distance_boxplot = i_class.distance_boxplot()
            if i_class.cache_cluster_quantified == False:
                return "This protein cluster was not quantified"
            
            else:
                df_quantification_overview = i_class.quantification_overview()
                profiles_plot = i_class.profiles_plot()
                pca_plot = i_class.pca_plot()
                
                cluster_overview = pn.Column(
                        pn.Row(pn.Pane(pca_plot, width=500),
                               pn.Pane(distance_boxplot, width=500),
                               pn.Pane(profiles_plot, width=500)),
                        pn.Row(
                            "In total {} proteins across all maps were quantified, whereas the following proteins were not consistently quantified throughout all maps: {}".format(
                                i_class.proteins_qunatified_across_all_maps, ", ".join(list_genes)) if len(list_genes) != 0 else
                            "All genes from this cluster are quantified in all maps."),
                        pn.Row(pn.widgets.DataFrame(df_quantification_overview, height=200, width=500, disabled=True))  
                        )
                return cluster_overview
        
        else:
            cluster_overview = "Run analysis first!"
            return cluster_overview
    except:
        update_status = traceback.format_exc()
        return update_status
    
    
@pn.depends(i_clusterwidget.param.value, cache_run.param.value)
def update_cluster_details(clusterwidget, run):
    try:
        if run == True:
            i_class.cluster_of_interest = clusterwidget
            cluster_details = i_class.distance_to_median_boxplot()
            return cluster_details
        else:
            cluster_details = "Run analysis first!"
            return cluster_details
    except:
        update_status = traceback.format_exc()
        return update_status

    
@pn.depends(cache_run.param.value)
def update_quantity(run):
    try:
        if run == True:
            quantity = i_class.plot_quantity_profiles_proteinGroups()
            return quantity
        else:
            return "Run analysis first!"
    except:
        update_status = traceback.format_exc()
        return update_status
    
    
@pn.depends(cache_run.param.value)
def update_dynamic_range(run):
    try:
        if run == True:
            dynamic_range = i_class.dynamic_range()
            return pn.Row(dynamic_range)
        else:
            return "Run analysis first!"
    except:
        update_status = traceback.format_exc()
        return update_status
            
    
@pn.depends(cache_run.param.value)
def show_tabular_overview(run):
    try:
        if run == True:
            content = pn.Column(
                pn.panel(i_class.results_overview_table(),width=500),
                pn.widgets.FileDownload(
                    callback=table_download, filename="cluster_distances.csv"),
                i_logOR01_selection,
                df01_download_widget,
                json_download,
            )
            return content
        else:
            content = "Please, upload a file first and press ‘Analyse clusters’"
            return content
    except:
        content = traceback.format_exc()
        return content

@pn.depends(cache_run.param.value)
def table_download(run):
    df = i_class.results_overview_table()
    sio = StringIO()
    df.to_csv(sio)
    sio.seek(0)
    return sio

@pn.depends(cache_run.param.value, i_logOR01_selection.param.value)
def df01_download_widget(run, logOR01_selection):
    if logOR01_selection == "0/1 normalized data":
        return pn.Column(pn.widgets.FileDownload(callback=df01_download, filename = "01_normalized_data.csv")) 
    else:
        return pn.Column(pn.widgets.FileDownload(callback=dflog_download, filename = "log_transformed_data.csv"))

@pn.depends(cache_run.param.value)
def df01_download(run):
    df_01 = i_class.reframe_df_01ORlog_for_svm(i_class.df_01_stacked)
    sio = StringIO()
    df_01.to_csv(sio)
    sio.seek(0)
    return sio 
    
    
@pn.depends(cache_run.param.value)
def dflog_download(run):
    df_log = i_class.reframe_df_01ORlog_for_svm(i_class.df_log_stacked)
    sio = StringIO()
    df_log.to_csv(sio)
    sio.seek(0)
    return sio 
    
json_download = pn.widgets.Button(name='Download AnalysedDatasets.json', width=50)

def analysedDatasets_download(event):
    with open('AnalysedDatasets.json', 'w') as write_file:
        json.dump(
            i_class.analysed_datasets_dict, 
            write_file, 
            indent=4, 
            sort_keys=True
        )

        
json_download.on_click(analysedDatasets_download)

dashboard_svm = pn.Column("Please, upload a file first and press 'Analyse clusters'", name="SVM Analysis", css_classes=["content-width"])

dasboard_home = pn.Column(i_file, read_file,analysis_status, name="Home", css_classes=["content-width"])
dashboard_analysis = pn.Column("Please, upload a file first and press 'Analyse clusters'", name="Analysis", css_classes=["content-width"])

analysis_tabs = pn.Tabs(margin=10, css_classes=["content-width"], dynamic=True)
analysis_tabs.append(("Visualization of an organellar map", update_visualization_map))
analysis_tabs.append(("Cluster Overview", update_cluster_overview))
analysis_tabs.append(("Cluster Details", update_cluster_details))
analysis_tabs.append(("Protein Groups/Profiles Quantity", update_quantity))
analysis_tabs.append(("Dynamic Range", update_dynamic_range))

cache_uploaded_json = pn.widgets.Checkbox(value=False)
cache_run_json = pn.widgets.Checkbox(value=False)

button_comparison = pn.widgets.Button(name='Compare experiments', width=50)
i_jsonFile = pn.widgets.FileInput(name='Upload JSON file for comparison')

i_organism_comparison = pn.widgets.Select(options=["Human - Swissprot", "Mouse - Swissprot","Arabidopsis - Araport"], name="Organism", width=300)

i_clusters_for_ranking = pn.widgets.CrossSelector(name="Select clusters to be considered for ranking calculation", value=list(i_class.markerproteins), 
                                                options=list(i_class.markerproteins), size=8)

i_multi_choice = pn.widgets.CrossSelector(name='Select experiments for comparison', value=["a", "b"], options=["a", "b", "c"])

i_multi_choice_venn = pn.widgets.MultiChoice(name='Select experiments for comparison', value=["a", "b"], options=["a", "b", "c"], max_items = 3, width = 800)

i_ref_exp = pn.widgets.Select(name='Select experiments as reference', options=["a", "b", "c"])

dashboard_json = pn.Column("Please, upload a file first and press 'Compare clusters'", name="Comparison", css_classes=["content-width"])

comparison_status = pn.Pane("No datasets were compared yet")

i_collapse_maps = pn.widgets.Checkbox(value=False, name="Collapse maps")
i_collapse_cluster = pn.widgets.Checkbox(value=False, name="Collapse cluster")
i_markerset_or_cluster = pn.widgets.Checkbox(value=False, name="Display only protein clusters")

wdgts_comparison = [button_comparison,i_organism_comparison] 

@pn.depends(i_jsonFile.param.value)#cache_run.param.value
def open_jsonFile(jsonFile):#run
    cache_run_json.value = False
    if jsonFile is None:
        filereading_status_json = "No file is uploaded"
        cache_uploaded_json.value = False
    else:
        cache_uploaded_json.value = False
        try:
            i_class.json_dict = ast.literal_eval(i_jsonFile.value.decode("UTF-8"))
            if hasattr(i_class.json_dict, "keys") == False:
                return "Your json-File does not fulfill the requirements"
            else:
                update_multi_choice_venn(i_multi_choice_venn)
                update_multi_choice(i_multi_choice)
                update_ref_exp(i_ref_exp)
                filereading_status_json = list(set(list(i_class.analysed_datasets_dict.keys()) + list(i_class.json_dict.keys())))
                cache_uploaded_json.value = True
                for wdgt in wdgts_comparison:
                    wdgt.disabled = False
                return pn.Column(pn.Row("You will compare following experiments:\n {}, and {}".format(', '.join(filereading_status_json[:-1]), filereading_status_json[-1])),
                                 pn.Row(i_organism_comparison),
                                 pn.Row(button_comparison), 
                                 )

        except: 
            filereading_status_json = traceback.format_exc()
            cache_uploaded_json.value = False
            return filereading_status_json                 


@pn.depends(i_multi_choice.param.value, i_clusterwidget_comparison.param.value, cache_run_json.param.value, i_xAxis_PCA_comp.param.value, i_yAxis_PCA_comp.param.value, 
            i_markerset_or_cluster.param.value)
def update_visualization_map_comparison(multi_choice, clusterwidget_comparison, run_json, xAxis_PCA_comp, yAxis_PCA_comp, markerset_or_cluster):
    try:
        if run_json == True:
            if multi_choice == []:
                return pn.Column(pn.Row(i_multi_choice),
                                 pn.Row("Please select experiments for comparison")
                                )
            else:
                pass
            i_class.multi_choice = multi_choice
            i_class.markerset_or_cluster = markerset_or_cluster
            i_class.cluster_of_interest_comparison = clusterwidget_comparison
            i_class.x_PCA_comp = xAxis_PCA_comp
            i_class.y_PCA_comp = yAxis_PCA_comp
            pca_global_comparison = i_class.plot_global_pca_comparison()
            if markerset_or_cluster == False:
                return pn.Column(pn.Row(i_multi_choice),
                                 pn.Row(i_clusterwidget_comparison),
                                 pn.Row(i_markerset_or_cluster),
                                 pn.Row(pca_global_comparison),
                                 pn.Column(i_xAxis_PCA_comp, custimization_PCA_comp)    
                                )
            else:
                return pn.Column(pn.Row(i_multi_choice),
                                 pn.Row(i_markerset_or_cluster),
                                 pn.Row(pca_global_comparison),
                                 pn.Column(i_xAxis_PCA_comp, custimization_PCA_comp)    
                                )
        else:
            pca_global_comparison = "Run analysis first!"
            return pca_global_comparison
    except:
        update_status = traceback.format_exc()
        return update_status


@pn.depends(i_multi_choice.param.value, i_clusterwidget_comparison.param.value, i_collapse_maps.param.value, i_collapse_cluster.param.value, i_clusters_for_ranking.param.value, i_ref_exp.param.value,
            cache_run_json.param.value)
def update_distance_and_pca(multi_choice, clusterwidget_comparison, collapse_maps, collapse_cluster, clusters_for_ranking, ref_exp, run_json):
    try:
        if run_json == True:
            if multi_choice == []:
                return pn.Column(pn.Row(i_multi_choice),
                                 pn.Row("Please select experiments for comparison")
                                )
            else:
                pass
            i_class.multi_choice = multi_choice
            i_ref_exp.options = multi_choice
            i_class.collapse_maps = collapse_maps
            i_class.collapse_cluster = collapse_cluster
            update_ref_exp(i_ref_exp)
            i_class.ref_exp = ref_exp
            if clusters_for_ranking == []:
                i_class.clusters_for_ranking = [clusterwidget_comparison]
            else:
                i_class.clusters_for_ranking = clusters_for_ranking
            i_class.cluster_of_interest_comparison = clusterwidget_comparison
            distance_ranking_comparison = i_class.distance_ranking_barplot_comparison()
            i_class.cache_cluster_quantified = True
            distance_comparison = i_class.distance_boxplot_comparison()
            if i_class.cache_cluster_quantified == False:
                return pn.Column(pn.Row(i_multi_choice),
                                 pn.Row(i_clusterwidget_comparison),
                                 pn.Row("Cluster was not quantified at all")
                                )
            else:
                pca_comparison = i_class.plot_pca_comparison()
                if clusters_for_ranking == []:
                    return pn.Column(pn.Row(i_multi_choice),
                                     pn.Row(i_clusterwidget_comparison),
                                     pn.Row(i_collapse_maps),
                                     pn.Row(pca_comparison),
                                     pn.Row(distance_comparison),
                                     pn.Row(i_clusters_for_ranking),
                                     pn.Row("Select at least one cluster"))
                else:
                    return pn.Column(pn.Row(i_multi_choice),
                                     pn.Row(i_clusterwidget_comparison),
                                     pn.Row(i_collapse_maps),
                                     pn.Row(pca_comparison),
                                     pn.Row(distance_comparison),
                                     pn.Row(i_clusters_for_ranking),
                                     pn.Row(i_collapse_cluster),
                                     pn.Row(i_ref_exp),
                                     pn.Row(distance_ranking_comparison),
                                     pn.Row(pn.widgets.DataFrame(i_class.df_quantified_cluster, height=200, width=1800, disabled=True)) #, autosize_mode='fit_columns'
                                    )
        else:
            pca_comparison = "Run analysis first!"
            return pca_comparison
    except:
        update_status = traceback.format_exc()
        return update_status
    
    
@pn.depends(i_multi_choice.param.value, cache_run_json.param.value)
def update_npr_ngg_nprDc(multi_choice, run_json):
    try:
        if run_json == True: 
            if multi_choice == []:
                return pn.Column(pn.Row(i_multi_choice),
                                 pn.Row("Please select experiments for comparison"))
            else:
                i_class.multi_choice = multi_choice
                npr_npg_barplot = i_class.quantity_pr_pg_barplot_comparison()
                coverage_barplot = i_class.coverage_comparison()
                return pn.Column(pn.Row(i_multi_choice),
                                 pn.Row(npr_npg_barplot),
                                 pn.Row(coverage_barplot)
                                )
        else:
            completeness_barplot = "Run analysis first!"
            return completeness_barplot
    except:
        update_status = traceback.format_exc()
        return update_status 
    
    
@pn.depends(i_multi_choice_venn.param.value, cache_run_json.param.value)
def update_venn(multi_choice_venn, run_json):
    try:
        if run_json == True: 
            venn_plot = []
            i_class.multi_choice_venn = multi_choice_venn
            venn_plot = i_class.venn_diagram()
            return pn.Column(pn.Row(i_multi_choice_venn),
                             pn.Pane(venn_plot)
                            )
        else:
            venn_plot = "Run analysis first!"
            return venn_plot
    except:
        update_status = traceback.format_exc()
        return update_status    

@pn.depends(i_multi_choice.param.value, i_ref_exp.param.value, i_collapse_cluster.param.value, cache_run_json.param.value)
def update_dynamic_range_comparison(multi_choice, ref_exp, collapse_cluster, run_json):
    try:
        if run_json == True: 
            if multi_choice == []:
                return pn.Column(pn.Row(i_multi_choice),
                                 pn.Row("Please select experiments for comparison"))
            else:
                i_class.multi_choice = multi_choice
                i_class.collapse_cluster = collapse_cluster
                i_class.ref_exp = ref_exp
                dynamic_range_barplot = i_class.dynamic_range_comparison()
                return pn.Column(pn.Row(i_multi_choice),
                                 pn.Row(dynamic_range_barplot),
                                 pn.Row(i_collapse_cluster),
                                 pn.Row(i_ref_exp)
                                )
        else:
            dynamic_range_barplot = "Run analysis first!"
            return dynamic_range_barplot
    except:
        update_status = traceback.format_exc()
        return update_status 
    
@pn.depends(i_multi_choice_venn.param.value, cache_run_json.param.value)    
def update_venn(multi_choice_venn, run_json):
    try:
        if run_json == True: 
            venn_plot = []
            i_class.multi_choice_venn = multi_choice_venn
            venn_plot = i_class.venn_diagram()
            return pn.Column(pn.Row(i_multi_choice_venn),
                             pn.Pane(venn_plot)
                            )
        else:
            venn_plot = "Run analysis first!"
            return venn_plot
    except:
        update_status = traceback.format_exc()
        return update_status       
        

def update_multi_choice(i_multi_choice):
    i_multi_choice.options = list(i_class.json_dict.keys())
    if i_class.multi_choice not in list(i_class.json_dict.keys()):
            i_class.multi_choice = list(i_class.json_dict.keys())[0:2] 
            i_multi_choice.value = list(i_class.json_dict.keys())[0:2] 

@pn.depends(i_multi_choice.param.value)
def update_ref_exp(i_ref_exp):
    i_ref_exp.options = i_multi_choice.value
#    if i_ref_exp.value not in i_multi_choice.value:
#            i_class.ref_exp = i_multi_choice.value[0] 
#            i_ref_exp.value = i_multi_choice.value[0] 
    if i_class.ref_exp not in i_multi_choice.value:
            i_class.ref_exp = i_multi_choice.value[0] 
            i_ref_exp.value = i_multi_choice.value[0] 


#def update_ref_exp(i_ref_exp):
#    i_ref_exp.options = i_multi_choice.value
#    if i_class.ref_exp not in i_multi_choice.value:
#            i_class.ref_exp = i_multi_choice.value[0] 
#            i_ref_exp.value = i_multi_choice.value[0]
#            

def update_multi_choice_venn(i_multi_choice_venn):
    i_multi_choice_venn.options = list(i_class.json_dict.keys())
    if i_class.multi_choice_venn not in list(i_class.json_dict.keys()):
            i_class.multi_choice_venn = list(i_class.json_dict.keys())[0:2]  
            i_multi_choice_venn.value = list(i_class.json_dict.keys())[0:2] 

        
def execution_comparison(event):
    #prevent execution, if no data is uploaded yet
    if cache_uploaded_json.value == False:
        comparison_status.object = "Please upload a JSON-file first"
    else:        
        dashboard_comparison.objects[2:] = []
        cache_run_json.value = False
        for wdgt in wdgts_comparison:
            wdgt.disabled = True
        try:
            comparison_status.object = "Analysis in progress"
            i_class.markerproteins = i_class.all_markerproteins[i_organism_comparison.value]
            i_class.read_jsonFile()
            update_multi_choice(i_multi_choice)
            update_multi_choice_venn(i_multi_choice_venn)
            i_class.perform_pca_comparison()
            
            dashboard_comparison.append(comparison_tabs)

            comparison_status.object = "Comparison finished!"
            cache_run_json.value = True
        except:
            for wdgt in wdgts_comparison:
                wdgt.disabled = False
            comparison_status.object = traceback.format_exc()
            cache_run_json.value = False

            
button_comparison.on_click(execution_comparison)  

dashboard_comparison = pn.Column(i_jsonFile, open_jsonFile, comparison_status)

comparison_tabs = pn.Tabs(margin=10, css_classes=["content-width"], dynamic=True)
comparison_tabs.append(("Visualization - organellar maps", update_visualization_map_comparison))
comparison_tabs.append(("PCA plot and Distance box plot", update_distance_and_pca))
comparison_tabs.append(("Venn Diagram", update_venn))
comparison_tabs.append(("Protein Groups/Profiles Quantity", update_npr_ngg_nprDc))
comparison_tabs.append(("Dynamic Range", update_dynamic_range_comparison))

app_tabs = pn.Tabs(margin=10, css_classes=["content-width"], dynamic=True)
app_tabs.append(("Home", dasboard_home))
app_tabs.append(("Analysis", dashboard_analysis))
app_tabs.append(("Download", show_tabular_overview))
app_tabs.append(("SVM Analysis", dashboard_svm))
app_tabs.append(("Data comparison", dashboard_comparison))

app_tabs.append(("About", pn.Row("Explanation of what's going on here", width=2000)))

#i_search = pn.widgets.TextInput(name="Search")
app_center = pn.Column(pn.Row(pn.Pane("# QC tool for Spatial Proteomics", width = 600),
                              pn.layout.HSpacer(),
                              #i_search,
                              #width=1600, 
                              margin=10),
                       app_tabs,
                       #pn.Spacer(background="#DDDDDD", height=100, margin=0)
                      )
app = pn.GridSpec()#sizing_mode="stretch_both", margin=0)
app[0,0] = pn.Spacer(background="white", margin=0) #"#DDDDDD"
app[0,9] = pn.Spacer(background="white", margin=0) #"#DDDDDD"
app[0,1:8] = app_center

pwd = pn.widgets.PasswordInput(name="Please enter password for access.")
app_container = pn.Column(pwd)

def check_pwd(event, app=app):
    pwd = event.new
    if pwd == "pwd":
        app_container[0]=app
pwd.param.watch(check_pwd, "value")
df_eLifeMarkers = pd.read_csv("eLife_markers.txt", sep="\t", comment="#",
                                       usecols=lambda x: bool(re.match("Gene name|Compartment", x)))
df_eLifeMarkers = df_eLifeMarkers.rename(columns={"Gene name":"Gene names"})
df_eLifeMarkers = df_eLifeMarkers.astype({'Gene names': 'str'})
i_class.df_eLifeMarkers =  df_eLifeMarkers

In [3]:
try:
    server.stop()
except:
    print("First server startup")
server = app.show(port=5063, websocket_max_message_size=2000000000)
#app.servable()

First server startup
Launching server at http://localhost:5063


# Histogram between pairs of replicates

In [None]:
import plotly.figure_factory as ff

fig = ff.create_distplot(i_class.df_log_stacked.reset_index(), group_labels, colors=colors, bin_size=.25,
                         show_curve=False)

# Add title
fig.update_layout(title_text='Hist and Rug Plot')
fig.show()

In [79]:
df = i_class.df_log_stacked.copy()
df.reset_index(inplace=True)

In [115]:
px.histogram(i_class.df_log_stacked.reset_index(), 
             x="log profile",
             color="Map",
             facet_col="Map",
         #   marginal="box",
        #      barmode="overlay"
            )
    

In [349]:
df_npgf_dc = pd.DataFrame(dict_npgf_dc)
df_npgf_dc.rename_axis("Fraction", axis=1, inplace=True)
df_npgf_dc = df_npgf_dc.rename({0:"Data completeness"}, axis="index").T
df_npgf_dc.reset_index(inplace=True)
df_npgf_dc

Unnamed: 0,Fraction,Data completeness
0,01K,0.94779
1,03K,0.978204
2,06K,1.0
3,12K,1.0
4,24K,0.926703
5,80K,0.837287


In [351]:
px.bar(df_npgf_dc, x="Fraction", y="Data completeness")

# DC and PG - Fractions / Map

In [None]:
px.bar(df_npgf, x="Fraction", y="Protein Groups")

In [None]:
#def get_venn_sections(sets):
#    """
#    Given a list of sets, return a new list of sets with all the possible
#    mutually exclusive overlapping combinations of those sets.  Another way
#    to think of this is the mutually exclusive sections of a venn diagram
#    of the sets.  If the original list has N sets, the returned list will
#    have (2**N)-1 sets.
#
#    Parameters
#    ----------
#    sets : list of set
#
#    Returns
#    -------
#    combinations : list of tuple
#        tag : str
#            Binary string representing which sets are included / excluded in
#            the combination.
#        set : set
#            The set formed by the overlapping input sets.
#    """
#    num_combinations = 2 ** len(sets)
#    bit_flags = [2 ** n for n in range(len(sets))]
#    flags_zip_sets = [z for z in zip(bit_flags, sets)]
#
#    combo_sets = []
#    for bits in range(num_combinations - 1, 0, -1):
#        include_sets = [s for flag, s in flags_zip_sets if bits & flag]
#        exclude_sets = [s for flag, s in flags_zip_sets if not bits & flag]
#        combo = set.intersection(*include_sets)
#        combo = set.difference(combo, *exclude_sets)
#        tag = ''.join([str(int((bits & flag) > 0)) for flag in bit_flags])
#        combo_sets.append((tag, combo))
#    return combo_sets

In [None]:
#new_list = get_venn_sections([set(i_class.unique_proteins_total[i]) for i in i_class.multi_choice_venn])[0][1]

In [None]:
####SILAC
###df_data_completeness = i_class.df_index["Ratio H/L"].stack(["Fraction"])
###data_completeness = 1-df_data_completeness.apply(np.isnan).apply(sum)/len(df_data_completeness)
###data_completeness = data_completeness.append(pd.Series(data_completeness.mean(), index=["combined"]))
###data_completeness.rename("Data completness", inplace=True)
####LFQ
###df_data_completeness = i_class.df_index["LFQ intensity"].stack(["Fraction"])
###data_completeness = 1-df_data_completeness.apply(np.isnan).apply(sum)/len(df_data_completeness)
###data_completeness = data_completeness.append(pd.Series(data_completeness.mean(), index=["combined"]))
###data_completeness.rename("Data completness", inplace=True)
####LFQ and SILAC
###df_profile_completeness = i_class.df_01_stacked["normalized profile"].xs(i_class.fractions[0], level="Fraction", axis=0).unstack(["Map"])
###profile_completeness = 1-df_profile_completeness.apply(np.isnan).apply(sum)/len(df_profile_completeness)
###profile_completeness = profile_completeness.append(pd.Series(profile_completeness.mean(), index=["combined"]))
###profile_completeness.rename("Profile completness", inplace=True)





###
###df_completeness = pd.concat([data_completeness, profile_completeness], axis=1)
###df_completeness.index.name = "Map"###################
#SVM
###################

#df_01_test = i_class.df_01_stacked.copy()
#
##df_01_filtered_combined = df_01_filtered_combined.stack(["Experiment", "Map"]).swaplevel(0,1, axis=0).dropna(axis=1)
#index_ExpMap = df_01_test.index.get_level_values("Map")+'_'+df_01_test.index.get_level_values("Fraction")
#index_ExpMap.name = "Map_Frac"
#df_01_test.set_index(index_ExpMap, append=True, inplace=True)      
#
#df_01_test.index = df_01_test.index.droplevel(["Map", "Fraction"])
#df_01_test = df_01_test.unstack("Map_Frac")
#df_01_test.columns = ['_'.join(col) for col in df_01_test.columns.values]
#df_01_test.rename(index={"undefined" : np.nan}, level='Compartment', inplace=True)
#df_01_test
#compartments = i_class.df_eLifeMarkers["Compartment"].unique()
#compartment_color = dict(zip(compartments, i_class.css_color))
#compartment_color["undefined"] = "lightgrey"
#
#collapse_maps_PCA_test = False
#df_global_pca = i_class.df_pca.unstack("Map").swaplevel(0,1, axis=1)[i_class.map_of_interest].reset_index()
##        else:
# #           df_global_pca = self.df_pca.rename_axis("PC", axis="columns").unstack("Map").stack("PC").median(axis=1).to_frame().unstack("PC")
# #           df_global_pca.columns = df_global_pca.columns.droplevel()
  #          df_global_pca.reset_index(inplace=True)

#if collapse_maps_PCA_test == False:
#    title_PCA = "Protein subcellular localization by PCA of {}".format(i_class.map_of_interest)
#else:
#    title_PCA = "Protein subcellular localization by PCA of combined maps"
        
#fig_global_pca = px.scatter(data_frame=df_global_pca,
#                            x="PC1",
#                            y="PC3",
#                            color="Compartment",
#                            color_discrete_map=compartment_color,
#                            title= "Protein subcellular localization by PCA for {}".format(i_class.map_of_interest) 
#                            if collapse_maps_PCA_test == False else "Protein subcellular localization by PCA of combined maps", 
#                            #title_PCA,
#                            hover_data=["Protein IDs", "Gene names", "Compartment"],
#                    #       custom_data=df_annotated_all.columns,
#                            opacity=0.9
#                            )
#fig_global_pca       
######################################################WITH RNAKING 
##dict_cluster_ranking = {}
##dict_cluster_normalizedMedian = {}
##for cluster in i_class.markerproteins.keys():
##    df_cluster = df_distance_map_cluster_gene_in_index.xs(cluster, level="Cluster")
##    all_median_one_cluster_several_exp = {}
##    for exp in i_class.json_dict.keys():
##        median = df_cluster.xs(exp, level="Experiment").median()
##        #df_cluster[df_cluster.index.get_level_values('Experiment').isin([exp])].median()
##        all_median_one_cluster_several_exp[exp] = float(median)
##    
##    min_median = min(all_median_one_cluster_several_exp.items(), key=lambda x: x[1])[1]
##    median_ranking = {exp: median/min_median for exp, median in all_median_one_cluster_several_exp.items()}
##    dict_cluster_normalizedMedian[cluster] = median_ranking
##    
##    dict_cluster_normalizedMedian[cluster] = median_ranking
##    
##    ranking_one_cluster = {key: rank for rank, key in enumerate(sorted(all_median_one_cluster_several_exp, key=all_median_one_cluster_several_exp.get), 1)}
##    dict_cluster_ranking[cluster] = ranking_one_cluster
##
##df_ranking = pd.concat([pd.DataFrame(dict_cluster_ranking), pd.DataFrame(dict_cluster_normalizedMedian)], keys=["Ranking", "Normalized median"], names=['Type', 'Experiment'])   
##df_ranking.rename_axis("Cluster", axis=1, inplace=True)
##df_rank_for_plot = df_ranking.unstack("Type").stack("Cluster").reset_index()
##df_rank_for_plot = df_rank_for_plot.sort_values(["Normalized median", "Experiment"])
##fig2 =px.bar(df_rank_for_plot, x='Cluster', y='Normalized median', color='Experiment', barmode="group")
##fig2.update_xaxes(categoryorder="total ascending")
##fig2 

##ranking_sum = df_ranking.sum(axis=1).round(2)
##df_ranking2 = df_ranking.copy()
##df_ranking2["Ranking - Product"] = df_ranking2.product(axis=1).round(2)
##df_ranking2["Ranking - Sum"] = ranking_sum
##df_ranking2

#on_validProfiles2 = {}
#or maps in i_class.map_names : 
#   series_non_validProfiles_map = df_MQ["normalized profile"].xs(maps, level="Map").unstack("Fraction").apply(lambda x: x.isnull().any(), axis=1)
#   total_protein_ids_map = len(series_non_validProfiles_map)
#   non_validProfiles_map = len(series_non_validProfiles_map[series_non_validProfiles_map == True].index)
#   non_validProfiles2[maps] = non_validProfiles_map/total_protein_ids_map*100
#eries_non_validProfiles_combined = df_MQ.unstack(["Fraction", "Map"])["normalized profile"].apply(lambda x: x.isnull().any(), axis=1)
#otal_protein_ids_combined = len(series_non_validProfiles_combined)
#on_validProfiles_combined = len(series_non_validProfiles_combined[series_non_validProfiles_combined == True].index)
#on_validProfiles2["combined maps"] = non_validProfiles_combined/total_protein_ids_combined*100
#self.shape_dict["Non valid profiles"] = non_validProfiles
#on_validProfiles2


##df_lognorm_MAP1 = df_lognorm_ratio_stacked.loc(axis=0)[:,:,:,:,:,:,:,:,["MAP1"]]
##px.histogram(df_lognorm_MAP1.reset_index(), 
##x="normalized profile",
##color="Map_Frac",
##facet_row="Map_Frac")
#
#timsTOF = i_class.df_01_stacked.copy()
#timsTOF_tojson = i_class.df_01_stacked["normalized profile"].unstack("Map").median(axis=1).to_frame(name="normalized profile")#.unstack("PC")
#
#
#test_summary_dict = {"timsTOF" : {"0/1 normalized data" : timsTOF_tojson.reset_index().to_json() }}
#test_summary_dict["Expl"] = {"0/1 normalized data" : Expl_tojson.reset_index().to_json() }
#
#for exp_name in test_summary_dict.keys():
#    #print(exp_name)
#    for data_type in test_summary_dict[exp_name].keys():
#        if data_type == "0/1 normalized data" and exp_name == list(test_summary_dict.keys())[0]:
#            #convert into dataframe
#            df_01_combined = pd.read_json(test_summary_dict[exp_name][data_type])
#            #get only 01 normalized data 
#            df_01_combined = df_01_combined.set_index(["Fraction", "Gene names", "Protein IDs", "Compartment"])[["normalized profile"]].unstack(["Fraction"])
#            df_01_combined.rename(columns = {"normalized profile":exp_name}, inplace=True)
#
#        elif data_type == "0/1 normalized data" and exp_name != list(test_summary_dict.keys())[0]:
#            df_01_toadd = pd.read_json(test_summary_dict[exp_name][data_type])
#            df_01_toadd = df_01_toadd.set_index(["Fraction", "Gene names", "Protein IDs", "Compartment"])[["normalized profile"]].unstack(["Fraction"])
#            df_01_toadd.rename(columns = {"normalized profile":exp_name}, inplace=True)
#            #dataframes will be concatenated, only proteins/Profiles that are in both df will be retained
#            df_01_combined = pd.concat([df_01_combined, df_01_toadd], axis=1, join='inner')
#            
#            
#df_01_filtered_combined = df_01_combined.dropna()    
#df_01_filtered_combined.columns.names = ["Experiment", "Fraction"]
##reframe it to make it ready for PCA
#df_01_filtered_combined = df_01_filtered_combined.stack(["Experiment"])
#
#
#pca = PCA(n_components=3)
#
## df_pca: PCA processed dataframe, containing the columns "PC1", "PC2", "PC3"
#df_pca = pd.DataFrame(pca.fit_transform(df_01_filtered_combined))
#df_pca.columns = ["PC1", "PC2", "PC3"]
#df_pca.index = df_01_filtered_combined.index
#
#
#
#df_pca_for_plotting = i_class.df_global_pca_for_plotting
#df_pca_for_plotting
#
#
#fig_pca = px.scatter(data_frame=i_class.df_global_pca_for_plotting.reset_index(),
#                     x="PC1",
#                     y="PC3",
#                     color="Compartment",
#                     color_discrete_map={
#                         "undefined": "lightgrey",
#                         "Endosome": "green",
#                         "Ergic/cisGolgi": "blue",
#                         "Large Protein Complex": "orange",
#                         "Nuclear pore complex": "purple",
#                         "Peroxisome" : "goldenrod",
#                         "Lysosome": "yellow",
#                         "Plasma membrane" : "lightcoral",
#                         "Actin binding proteins" : "magenta",
#                         "ER": "brown",
#                         "ER_high_curvature" : "lightpink",
#                         "Golgi": "red",
#                         "Mitochondrion": "turquoise",
#                     },
#                     facet_col="Experiment",
#                     #facet_col_wrap=4,
##  facet_col_wrap=len(i_class.fractions),     
#                         
#                     )
#fig_pca
#
#
##compartments = self.df_eLifeMarkers["Compartment"].unique()
##compartment_color = dict(zip(compartments, css_color))
##compartment_color["undefined"] = "lightgrey"
#
#fig_pca = px.scatter(data_frame=i_class.df_pca.reset_index(),
#                     x="PC1",
#                     y="PC3",
#                     color="Compartment",
#                     color_discrete_map=compartment_color,
#                    title="Protein subcellular localization by PCA",
#                    hover_data=["Gene names", "Compartment"], #"Protein names", 
#                    #custom_data=df_annotated_all.columns,
#                    opacity=0.9
#                    )
#fig_pca                         

#df_fractionnumber_stacked = i_class.df_index.copy().stack("Fraction")
#number_fractions = len(df_fractionnumber_stacked.index.get_level_values("Fraction").unique())
#
#df_index = i_class.df_index.stack("Map")
#df_index.sort_index(axis=1, level=0, inplace=True)
#df_mscount_mapstacked = df_index.loc[df_index[('MS/MS count')].apply(np.sum, axis=1) >= (
#        number_fractions * 2)]
#df_stringency_mapfracstacked = df_mscount_mapstacked.copy()
#df_stringency_mapfracstacked = df_stringency_mapfracstacked.loc[
#    df_stringency_mapfracstacked[("LFQ intensity")].apply(lambda x: any(
#        np.invert(np.isnan(x)).rolling(window=4).sum() >=
#        4), axis=1)]
#df_stringency_mapfracstacked = df_stringency_mapfracstacked.copy().stack("Fraction")
#
#df_stringency_mapfracstacked

#df_eLifeMarkers = pd.read_csv("eLife_markers.txt", sep="\t", comment="#",
#                                       usecols=lambda x: bool(re.match("Gene name|Compartment", x)))
#df_eLifeMarkers = df_eLifeMarkers.rename(columns={"Gene name":"Gene names"})
#df_eLifeMarkers = df_eLifeMarkers.astype({'Gene names': 'str'})
#df_eLifeMarkers

#df_index_annotated_noIndex = df_stringency_mapfracstacked.reset_index()
#df_index_annotated_noIndex = df_index_annotated_noIndex.merge(df_eLifeMarkers, how="outer", on='Gene names', indicator = True)
##df_index_annotated_noIndex = df_index_annotated_noIndex.sort_values("Compartment")
#df_an = df_index_annotated_noIndex.loc[df_index_annotated_noIndex['_merge'].isin(['both','left_only'])].drop("_merge", axis=1)
#df_an.set_index([c for c in df_an.columns if c != 'MS/MS count' and c != 'LFQ intensity'], inplace=True)
#df_an.rename(index={np.nan : "undefined"}, level='Compartment', inplace=True)
#df_an

#df_pca_MAP1 = i_class.df_pca.unstack("Map").swaplevel(0,1, axis=1)["Map2"].reset_index()
#df_pca_MAP1

#fig_pca = px.scatter(data_frame=df_pca_MAP1,
#                     x="PC1",
#                     y="PC3",
#                     color="Compartment",
#                     color_discrete_map=compartment_color,
#                    title="Protein subcellular localization by PCA",
#                    hover_data=["Gene names", "Compartment"], #"Protein names", 
#                    #custom_data=df_annotated_all.columns,
#                    opacity=0.9
#                    )
#fig_pca  


#i_class.df_pca.query("Compartment == 'ER' or Compartment == 'Plasma membrane'")#"& Compartment == 'Plasma memebrane'")  and 

#json_dict = ast.literal_eval(i_jsonFile.value.decode("UTF-8"))



#    @pn.depends(i_SVM_table.param.value, cache_run.param.value)
#def read_SVM_matrix(SVM_table, run):   
#    try:
#        if run == True:
#            if SVM_table == "":
#                SVM_reading_status = "No misclassification matrix is uploaded"
#                cache_uploaded_SVM.value = False
#            else:
#                cache_uploaded_SVM.value = False
#                try:
#                    i_class.df_SVM = pd.read_table(StringIO(SVM_table), sep="\t")
#                    SVM_reading_status = i_class.df_SVM
#                    cache_uploaded_SVM.value = True
#                    return pn.Column(pn.Pane(SVM_reading_status, width=60*SVM_reading_status.shape[1]),
#                                     pn.Row(button_SVM_analysis))
#                except: 
#                    SVM_reading_status = traceback.format_exc()
#                    cache_uploaded_SVM.value = False
#                    return SVM_reading_status 
#        else:
#            SVM_reading_status = "Please, upload a file first and press ‘Analyse clusters’"
#            return SVM_reading_status
#    except:
#        SVM_reading_status = traceback.format_exc()
#        return SVM_reading_status#        


#from matplotlib_venn import venn2, venn2_circles, venn2_unweighted
#from matplotlib_venn import venn3, venn3_circles
#from matplotlib import pyplot as plt
#Course1=["A", "B","C", "D", "E", "F", "G", "I", "P", "Q"]
#Course2=["B", "E", "F", "H","K", "Q", "R", "S", "T", "U", "V", "Z"]
#Course3=["C", "E", "G", "H", "J", "K", "O", "Q", "Z"]
#
#vd3=venn3([set(Course1),set(Course2),set(Course3)],
# set_labels=("Course1", "Course2","Course3"),
# set_colors=(), 
# alpha = 0.8)
#venn3_circles([set(Course1), set(Course2),set(Course3)], linestyle="-.", linewidth=2, color="grey")
#for text in vd3.set_labels:
#    text.set_fontsize(16);
#for text in vd3.subset_labels:
#    text.set_fontsize(16)
##plt.title(‘Venn Diagram for 3 courses’,fontname=’Times New Roman’,fontweight=’bold’,fontsize=20,
## pad=30,backgroundcolor=’#cbe7e3',color=’black’,style=’italic’);
#plt.show()

#                    data_completeness: series, for each individual map, as well as combined maps: 1 - (percentage of NANs)

            ##profile completness - percentage of valid profiles (profiles that do not contain NaN)
            #df_profile_completeness = df_01_stacked["normalized profile"].xs(self.fractions[0], level="Fraction", axis=0).unstack(["Map"])
            #profile_completeness = 1-df_profile_completeness.apply(np.isnan).apply(sum)/len(df_profile_completeness)
            #profile_completeness = profile_completeness.append(pd.Series(profile_completeness.mean(), index=["Combined Maps"]))
            #profile_completeness.rename("Profile completeness", inplace=True)
            #
            #df_completeness = pd.concat([self.data_completeness, profile_completeness], axis=1)
            #df_completeness.index.name = "Map"
            #self.analysis_summary_dict["Data/Profile Completeness"] = df_completeness.reset_index().to_json()
            
            
            
                        ##profile completness - percentage of valid profiles (profiles that do not contain NaN)
            #df_profile_completeness = df_01_stacked["normalized profile"].xs(self.fractions[0], level="Fraction", axis=0).unstack(["Map"])
            #profile_completeness = 1-df_profile_completeness.apply(np.isnan).apply(sum)/len(df_profile_completeness)
            #profile_completeness = profile_completeness.append(pd.Series(profile_completeness.mean(), index=["Combined Maps"]))
            #profile_completeness.rename("Profile completeness", inplace=True)
            #            
            #df_completeness = pd.concat([self.data_completeness, profile_completeness], axis=1)
            #df_completeness.index.name = "Map"
            #self.analysis_summary_dict["Data/Profile Completeness"] = df_completeness.reset_index().to_json()
            #
#df_completeness_combined : df, with information about Data/Profile Completeness, index: "Experiment", "Map", 
#                                       column names: "Data completeness", "Profile completeness"


#                    data_completeness : series, for each individual map, as well as combined maps: 1 - (percentage of NANs)
#            #data completness - percentage of NANs
#            if self.acquisition == "SILAC":
#                df_data_completeness = df_index["Ratio H/L"].stack(["Fraction"])
#            elif self.acquisition == "LFQ":
#                df_data_completeness = df_index["LFQ intensity"].stack(["Fraction"])
#                
#            data_completeness = 1-df_data_completeness.apply(np.isnan).apply(sum)/len(df_data_completeness)
#            data_completeness = data_completeness.append(pd.Series(data_completeness.mean(), index=["Combined Maps"]))
#            data_completeness.rename("Data completeness", inplace=True)
#            self.data_completeness = data_completeness 



#                    data_completeness : series, for each individual map, as well as combined maps: 1 - (percentage of NANs)
            
#            #data completness - percentage of NANs
#            df_data_completeness = df_index["LFQ intensity"].stack(["Fraction"])
#            data_completeness = 1-df_data_completeness.apply(np.isnan).apply(sum)/len(df_data_completeness)
#            data_completeness = data_completeness.append(pd.Series(data_completeness.mean(), index=["Combined Maps"]))
#            data_completeness.rename("Data completeness", inplace=True)
#            self.data_completeness = data_completeness


                
#                self:
#                    analysis_summary_dict["Data/Profile Completeness"] : df, with information about Data/Profile Completeness
#                                        column: "Experiment" 	"Map" 	"Data completeness" 	"Profile completeness"
#                                        no row index