# Dashboard

In [1]:
# export
import os
import numpy as np
import pandas as pd
import panel as pn
pn.extension('plotly')

import pyteomics.fasta
from io import StringIO
import plotly.graph_objects as go

from pepmap.importing import importSpectronautData
from pepmap.preprocessing import formatInputData, expandProteinIds, getPeptidePosition, getModifications

In [23]:
path_to_folder_fasta_files = 'testdata'
full_fasta = None
full_uniprot = None

### Style of the dashboard

In [3]:
#export
css = '''
.bk.padding_widgets {
  padding: 2px;
}
'''
pn.extension(raw_css=[css])

### Header

In [4]:
### Common widgets
header_titel = pn.pane.Markdown(
    '# Protein Sequence Visualizer', 
    align='center')
divider = pn.layout.Divider(
    align='center', 
    width=1500, 
    margin=1)

In [5]:
ptmShapeDict = {'[Phospho (STY)]': 0, 
                '[GlyGly (K)]':2,
                '[Carbamidomethyl (C)]':3,
                '[Oxidation (M)]':4,
                '[Acetyl (Protein N-term)]':5}

In [6]:
# Copied functions
def getPlotData(protein,df,fasta):
    protein_sequence = fasta[protein].sequence
    df_prot = df[df.unique_protein_id==protein]
    
    if df_prot.shape[0] == 0:
        df_plot = None
    else:
        df_peps = [np.arange(row['start'], row['end']+1) for _, row in df_prot.iterrows()]
        df_peps  = pd.DataFrame.from_records(data=df_peps)
        df_peps['modified_sequence'] = df_prot['modified_sequence'].values
        df_peps = df_peps.melt(id_vars=['modified_sequence'])
        df_peps = df_peps[['modified_sequence','value']].dropna()
        df_peps = df_peps.rename(columns={"value": "seq_position"})
        df_peps['marker_symbol'] = 1
        df_peps['marker_size'] = 8
        df_peps['PTM'] = np.NaN
        df_peps['PTMtype'] = np.NaN
        df_peps['PTMshape'] = np.NaN
        unique_pep = df_peps.modified_sequence.unique()
        for uid in unique_pep:
            df_peps_uid = df_peps[df_peps.modified_sequence==uid]
            start_uid = np.min(df_peps_uid.seq_position)
            end_uid = np.max(df_peps_uid.seq_position)
            df_peps['marker_symbol'] = np.where(df_peps.seq_position == start_uid, 7, df_peps.marker_symbol)
            df_peps['marker_symbol'] = np.where(df_peps.seq_position == end_uid, 8, df_peps.marker_symbol)
            df_peps['marker_size'] = np.where(df_peps.seq_position == start_uid, 6, df_peps.marker_size)
            df_peps['marker_size'] = np.where(df_peps.seq_position == end_uid, 6, df_peps.marker_size)

            df_PTMs_uid = df_prot[df_prot.modified_sequence==uid]
            PTMsites = df_PTMs_uid.PTMsites.tolist()[0] + start_uid
            PTMtypes = df_PTMs_uid.PTMtypes.tolist()[0]

            for i in range(0,len(PTMsites)):
                df_peps['PTM'] = np.where(df_peps["seq_position"]==PTMsites[i], 1, df_peps.PTM)
                df_peps['PTMtype'] = np.where(df_peps["seq_position"]==PTMsites[i], PTMtypes[i], df_peps.PTMtype)

            df_seq = pd.DataFrame({'seq_position':np.arange(0,len(protein_sequence))})

            df_plot = pd.merge(df_seq, df_peps, how='left', on='seq_position')
            df_plot['height']=0
            df_plot['color']="grey"

            unique_mods = df_plot['PTMtype'].dropna().unique()
            if len(unique_mods) > 0:
                for mod in df_plot['PTMtype'].dropna().unique():
                    if mod != 'nan':
                        df_plot.loc[df_plot.PTMtype == mod, 'PTMshape'] = ptmShapeDict[mod]

    return(df_plot)

def plotSinglePeptideTraces(df_plot,protein,fasta):
    protein_sequence = fasta[protein].sequence
    
    ## Peptide backbone
    df_plot_pep = df_plot.dropna(subset=['modified_sequence'])
    plot1 = go.Scatter(x=df_plot_pep.seq_position,
                               y=df_plot.height,
                               xaxis='x1',
                               mode='markers',
                               marker_size=df_plot_pep.marker_size,
                               marker_symbol=df_plot_pep.marker_symbol,
                               marker_line_color=df_plot_pep.color,
                               marker_color=df_plot_pep.color,
                               marker_opacity=1,
                               text=df_plot_pep.seq_position+1,
                               hovertemplate='%{text}', name='',
                       showlegend=False)

    ## PTM dots
    df_plot_ptm = df_plot.dropna(subset=['PTM'])
    plot2 = go.Scatter(x=df_plot_ptm.seq_position,
                               y=df_plot_ptm.height+0.3,
                               xaxis='x1',
                               mode='markers',
                               marker_size=8,
                               marker_symbol=df_plot_ptm.PTMshape,
                               marker_line_color=df_plot_ptm.color,
                               marker_color=df_plot_ptm.color,
                               marker_opacity=1,
                               text=df_plot_ptm.seq_position+1,
                               hovertemplate='%{text}', name='',
                       showlegend=False)

    layout = go.Layout(
            yaxis=dict(
                title = "",
                ticks = None,
                showticklabels=False,
                range=[-1, 2]
                ),
            xaxis=dict(
                title= 'protein sequence',
                tickmode = 'array',
                range=[-10, len(protein_sequence)+10],
                tickvals = np.arange(0,len(protein_sequence)),
                ticktext = list(protein_sequence),
                tickangle=0
            ),
        #showlegend=False, 
        height=400, #width=1000,
        plot_bgcolor='rgba(0,0,0,0)',
        title=f"Sequence plot for {protein}:"
        )

    fig = go.Figure(data=[plot1,plot2], layout=layout)

    for i in range(0, df_plot_ptm.shape[0]):
            fig.add_shape(
                    dict(
                        type="line",
                        x0=df_plot_ptm.seq_position.values[i],
                        y0=df_plot_ptm.height.values[i],
                        x1=df_plot_ptm.seq_position.values[i],
                        y1=df_plot_ptm.height.values[i]+0.3,
                        line=dict(
                            color=df_plot_ptm.color.values[i],
                            width=1
                        )
                    )
            )
    
    return fig

def plotPeptideTraces(df,name,protein,fasta):
    
    colors = ['#E24A33', '#348ABD', '#988ED5', '#777777', '#FBC15E', '#8EBA42', '#FFB5B8']
    
    if isinstance(df, pd.DataFrame):
        df_plot = getPlotData(protein=protein,
                              df = df,
                              fasta = fasta)
        df_plot.color = colors[0]
        
        observed_mods = list(set(df_plot.PTMtype))
        ptmShapeDict_sub = {key: ptmShapeDict[key] for key in observed_mods if key in ptmShapeDict}
        
    elif isinstance(df, list):
        
        df_plot = [getPlotData(protein=protein,
                               df = d,
                               fasta = fasta) for d in df]
        
        # Subset data and annotations for the samples where the selected protein was detected
        valid_idx = []
        for i in range(len(df_plot)):
            if df_plot[i] is not None:
                valid_idx.append(i)
        df_plot = [df_plot[i] for i in valid_idx]
        name = [name[i] for i in valid_idx]
        colors = [colors[i] for i in valid_idx]
        #observed_mods = set([df_plot[i].PTMtype for i in valid_idx])
        observed_mods = []
        for i in range(len(df_plot)):
            observed_mods.extend(list(set(df_plot[i].PTMtype)))
        observed_mods = list(set(observed_mods))
        ptmShapeDict_sub = {key: ptmShapeDict[key] for key in observed_mods if key in ptmShapeDict}
                
        for i in range(len(df_plot)):
            df_plot[i].color = colors[i]
            df_plot[i].height = 1+i

    if isinstance(df_plot, pd.DataFrame):
        fig = plotSinglePeptideTraces(df_plot,protein=protein,fasta = fasta)
        fig.update_layout(yaxis=dict(showticklabels=True,
                                     tickmode = 'array',
                                     tickvals = [0],
                                     ticktext = [name]))
    elif isinstance(df_plot, list):
        plot_list = [plotSinglePeptideTraces(df,protein=protein,fasta = fasta) for df in df_plot]
        new_data = [p.data for p in plot_list]
        new_data = sum(new_data, ())
        new_layout = plot_list[0].layout 
        shapes = [p.layout.shapes for p in plot_list]
        shapes = sum(shapes, ())
        new_layout.shapes = new_layout.shapes + tuple(shapes)
        fig = go.Figure(data=new_data, layout=new_layout)
        fig.update_layout(yaxis=dict(range=[0,len(df_plot)+1],
                                     showticklabels=True,
                                     tickmode = 'array',
                                     tickvals = np.arange(0, len(df_plot))+1,
                                     ticktext = np.array(name)))
    
    ptmShapeDict_sub = dict(sorted(ptmShapeDict_sub.items()))
    for i in range(len(ptmShapeDict_sub)):
        fig.add_trace(go.Scatter(y=[None], 
                                 mode='markers',
                                 marker=dict(symbol=list(ptmShapeDict_sub.values())[i], 
                                             color='black'),
                                 name=list(ptmShapeDict_sub.keys())[i],
                                 showlegend=True))
    
    return fig

In [7]:
# Widgets
select_protein = pn.widgets.AutocompleteInput(
    name='Select protein:', 
    placeholder='Type first letters of the protein id...',
    min_characters=1)
select_organism = pn.widgets.Select(
    name='Select organism:', 
    value='Human', 
    options=['Human', 'Mouse', 'Rat'])
experimental_data = pn.widgets.FileInput(
    accept=".csv, .txt", 
    margin=20)
preprocessed_exp_data = pn.widgets.DataFrame(
    name='Exp_data')
visualize_button = pn.widgets.Button(
    name='Visualize Data', 
    button_type='primary', 
    css_classes=['padding_widgets'],
    height=40)

In [8]:
### Options
options_preprocessing_events = pn.widgets.CheckButtonGroup(
    name='Preprocessing event(s)', 
    value=['Chain', 'Peptide'], 
    options=['Chain', 'Initiator methionine', 'Peptide', 'Propeptide', 'Signal peptide', 'Transit peptide'],
    align='center')
options_PTMs = pn.widgets.CheckButtonGroup(
    name='PTM(s)', 
    options=['Cross-link', 'Disulfide bond', 'Glycosylation', 'Lipidation', 'Modified residue'],
    align='center')
options_domains = pn.widgets.CheckButtonGroup(
    name='Family & Domain(s)',  
    options=['Coiled coil', 'Compositional bias', 'Domain', 'Motif', 'Region', 'Repeat', 'Zinc finger'],
    align='center')
options_locations = pn.widgets.CheckButtonGroup(
    name='Subcellular location', 
    options=['Intramembrane', 'Topological domain', 'Transmembrane'],
    align='center')
options_structures = pn.widgets.CheckButtonGroup(
    name='Structure', 
    options=['Beta strand', 'Helix', 'Turn'],
    align='center')

options = pn.Accordion(
    options_preprocessing_events, 
    options_PTMs,
    options_domains,
    options_locations,
    options_structures,
    width = 600)

In [21]:
### PREPROCESSING
def upload_experimental_data():
    full_proteome_data = importSpectronautData(StringIO(str(experimental_data.value, "utf-8")))
    preprocessed_exp_data.value = formatInputData(
        df = full_proteome_data, 
        fasta = full_fasta, 
        modification_exp = r'\[.*?\]')
    select_protein.options = preprocessed_exp_data.value.unique_protein_id.unique().tolist()
        
def upload_organism_info():
    global full_fasta
    global full_uniprot
    if select_organism.value == 'Human':
        full_fasta = pyteomics.fasta.IndexedUniProt(os.path.join(path_to_folder_fasta_files, "human.fasta"))
    elif select_organism.value == 'Mouse':
        pass
    else:
        pass

In [17]:
### VISUALIZATION
@pn.depends(visualize_button.param.clicks, select_organism.param.value, experimental_data.param.value)
def visualize_data(clicks, org, exp):
    if clicks > 0:
        # preload the data
        upload_organism_info()
        upload_experimental_data()
        # create a layout
        app = pn.Column(
            pn.Row(
                pn.layout.VSpacer(width=200),
                select_protein,
                pn.layout.VSpacer(width=100),
                options,
                aligh='center'
            ),
            divider,
            pn.Pane(
                visualize_plot, 
                aligh='center', 
                width_policy='max',
                width=1500)
        )
        return app

@pn.depends(select_protein.param.value)
def visualize_plot(_):
    if select_protein.value:
        fig =  plotPeptideTraces(
            preprocessed_exp_data.value,
            name = 'full proteome',
            protein = select_protein.value,
            fasta = full_fasta)
        return fig

In [18]:
layout = pn.Column(
    header_titel,
    divider,
    pn.WidgetBox(
        select_organism,
        experimental_data,
        margin=10,
        width=300,
        css_classes=['run_analysis']
    ),
    divider,
    visualize_button,
    divider,
    visualize_data
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_sub["modified_sequence"] = data_sub.apply(lambda row: re.sub('_','',row["EG.ModifiedSequence"]), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_sub["naked_sequence"] = data_sub.apply(lambda row: re.sub(r'\[.*?\]','',row["modified_sequence"]), axis=1)


In [19]:
layout.servable()