In [1]:
import pandas as pd
import numpy as np
import panel as pn
pn.extension("plotly")
import plotly.express as px
import re
from bokeh.models.widgets.tables import NumberFormatter

In [2]:
database = False

if not database:
    df_iBAQ = pd.read_csv("data/iBAQ_processed_for_plotting.csv")
    df_iBAQ.set_index("query id", inplace=True)
    
    df_LFQ = pd.read_csv("data/LFQ6PCA_Perseus_plusprofiles.txt", sep='\t')
    df_LFQ.insert(0, "query id", [el.split(".")[0] for el in df_LFQ["Majority protein IDs"]])
    df_LFQ.set_index("query id", inplace=True)
    

In [3]:
def fetch_iBAQ(ids):
    df_query = pd.DataFrame()
    message = ""
    global database
    if not database:
        global df_iBAQ
        df = df_iBAQ.copy()
        for q in ids:
            try:
                df_query = pd.concat([df_query, df.loc[q,:]], axis=1)
            except:
                if "+" in q:
                    q_split = [el.strip() for el in q.split("+")]
                    df_query_split = pd.DataFrame()
                    for qs in q_split:
                        try:
                            df_query_split = pd.concat([df_query_split, df.loc[qs,:]], axis=1)
                        except:
                            message = message+qs+" not in data\n\n"
                    if len(df_query_split) > 0:
                        df_query_split = df_query_split.replace(np.nan, "").sum(axis=1)
                        df_query_split.name = q
                    df_query = pd.concat([df_query, df_query_split], axis=1)
                else:
                    message = message+q+" not in data\n\n"
        df_query = pd.DataFrame(df_query).T
        df_query.index.name = "query id"
        df_query = df_query.reset_index()[["query id", "Protein names", "Gene names",
                                           "median full proteome", "stdev full proteome",
                                           "median summed membranes", "stdev summed membranes"]]
        df_query = df_query.set_index(["query id", "Protein names", "Gene names"])
        df_query.columns = pd.MultiIndex.from_arrays(
            arrays=[[el.split(" ")[0]+" iBAQ intensity" for el in df_query.columns],
                    [" ".join(el.split(" ")[1:3]) for el in df_query.columns]],
            names=["Type", "Quantification"])
        df_query = df_query.stack("Quantification")
        df_query = df_query.reset_index()
        df_query.columns.name = None
        return df_query, message

def plot_iBAQ(df_query, log=True, sort=True, norm=False):
    message=""
    # Enter option for normalization here
    if norm == False or norm == "False" or norm == "None":
        title = "iBAQ based quantification"
        None
    elif norm == 'median':
        title = "iBAQ based quantification relative to the median"
        None
    elif norm in df_query.loc["query id"].values:
        title = "iBAQ based quantification relative to "+norm
        None
    else:
        message = "Couldn't normalize data due to invalid input."
    if sort:
        df_query = df_query.sort_values("median iBAQ intensity")
    fig = px.bar(data_frame=df_query,
                 x="query id",
                 y="median iBAQ intensity",
                 log_y=log,
                 color="Quantification",
                 color_discrete_map={"full proteome": "#EF553B", "summed membranes": "#636EFA"},
                 error_y="stdev iBAQ intensity",
                 hover_data=["Protein names", "Gene names"],
                 custom_data=df_query.columns,
                 barmode="group",
                 width=18*len(df_query)+270,
                 height=500,
                 title=title)
    df_query["Gene names"] = df_query["Gene names"].apply(lambda x: x if str(x) == "nan" else x.replace(";", "; "))
    df_display = df_query.set_index(["query id", "Protein names", "Gene names", "Quantification"]).unstack("Quantification")
    
    return fig, message, df_display

def panel_iBAQ(ids, log=True, sort=True, norm=False):
    df_query, message_fetch = fetch_iBAQ(ids)
    fig, message_plot, df_display = plot_iBAQ(df_query, log=log, sort=sort, norm=norm)
    return pn.Column(pn.Row(pn.Pane(fig, css_classes=["content-width"]), height=520, css_classes=["horizontal-overflow", "content-width"]),
                     message_fetch+message_plot,
                     pn.Row(pn.pane.DataFrame(df_display, float_format="{:.2e}".format, width=700)))

In [4]:
def fetch_LFQ(ids):
    df_query = pd.DataFrame()
    global database
    message = ""
    if not database:
        global df_LFQ
        df = df_LFQ.copy()
        df = df.replace(np.nan, "")
        for q in ids:
            try:
                df_query = pd.concat([df_query, df.loc[q,:]], axis=1)
            except:
                if "+" in q:
                    q_split = [el.strip() for el in q.split("+")]
                    for qs in q_split:
                        try:
                            df_query = pd.concat([df_query, df.loc[qs,:]], axis=1)
                        except:
                            message = message+qs+" not in data\n\n"
                else:
                    message = message+q+" not in data\n\n"
        df_query = df_query.T
        df_query["Localisation"] = df_query["Localisation"].transform(lambda x: "Query")
    return df_query, message
    
def plot_PCA2D(df_query, components=[1,3]):
    global database
    if not database:
        global df_LFQ
        df = df_LFQ[["Component "+str(components[0]), "Component "+str(components[1]),
                     "Protein names", "Gene names", "Localisation"]]
        df["Localisation"].replace(np.nan, "undefined", inplace=True)
        df = pd.concat([df, df_query[["Component "+str(components[0]), "Component "+str(components[1]),
                                      "Protein names", "Gene names", "Localisation"]]], axis=0)
    fig_pca = px.scatter(data_frame=df,
                         x="Component "+str(components[0]),
                         y="Component "+str(components[1]),
                         color="Localisation",
                         color_discrete_map={
                             "undefined": "lightgrey",
                             "Mitochondrion": "turquoise",
                             "Plastid": "green",
                             "ER": "brown",
                             "Golgi": "red",
                             "PM": "blue",
                             "Ribosome": "orange",
                             "PostGolgi": "purple",
                             "Vacuole": "yellow",
                             "Query": "black"
                         },
                         title="Protein subcellular localization by PCA",
                         hover_data=["Protein names", "Gene names", "Localisation"],
                         custom_data=df.columns,
                         opacity=0.9
                        )
    return fig_pca

def plot_profiles(df_query):
    df = df_query[[el for el in df_query.columns if "profile" in el]+["Protein names", "Gene names"]]
    df = df.replace(np.nan, "")
    df.index.name="query id"
    df.set_index(["Protein names", "Gene names"], append=True, inplace=True)
    df.columns = pd.MultiIndex.from_arrays(
        arrays=[[re.findall("rep(.)", el)[0] for el in df.columns],
                [int(re.findall("([1234567890]+)K", el)[0]) for el in df.columns]],
        names=["Replicate", "Fraction"])
    df.sort_index(1, inplace=True)
    df = df.stack(["Replicate", "Fraction"])
    df.name = "% protein"
    df = pd.DataFrame(df).reset_index()
    df["Fraction"] = [str(el)+"K" for el in df["Fraction"]]
    fig_profiles = px.line(data_frame=df,
                           x="Fraction",
                           y="% protein",
                           color="query id",
                           line_group="Replicate",
                           hover_data=["Protein names", "Gene names"],
                           title="Protein distribution profiles",
                           width=500
                          )
    return fig_profiles

def panel_LFQ(ids, components=[1,3]):
    df_query, message = fetch_LFQ(ids)
    fig_PCA = plot_PCA2D(df_query, components)
    fig_profiles = plot_profiles(df_query)
    return pn.Column(pn.Row(fig_PCA, fig_profiles), message)


In [5]:
l=0
def xyz(event):
    l = event.new
    return l

In [6]:
i_query_multi = pn.widgets.TextAreaInput(
    value="AT3G11830\nAT5G16070\nAT5G20890\nAT5G26360\nAT1G24510\nAT3G18190\nAT3G20050\nAT3G03960",
    sizing_mode="fixed", width=300, height=200, name="Enter Araport identifiers separated by newline, comma or semicolon")

i_presets = pn.widgets.Select(
    options=["CCT complex", "Coatomer", "SAGA complex", "AP1/2", "20S proteasome", "cis Golgi proteins", "photosystem"],
    name="Predefined lists", width=300
)

def update_genelist(event):
    l = event.new
    lg_dict = {"CCT complex": "AT3G11830,AT5G16070,AT5G20890,AT5G26360,AT1G24510,AT3G18190,AT3G20050,AT3G03960",
               "Coatomer": "AT1G62020,AT4G34450,AT4G31490,AT1G79990,AT1G30630,AT4G08520",
               "SAGA complex": "AT5G25150,AT3G54610,AT1G54360,AT1G54140,AT4G38130,AT4G31720",
               "AP1/2": "AT1G60070,AT2G17380,AT4G23460,AT5G22780,AT1G47830,AT5G46630,AT1G10730",
               "20S proteasome": "AT1G13060,AT1G21720,AT3G22110,AT3G22630,AT5G40580,AT1G16470,AT1G47250,AT1G53850,"\
                                 "AT1G56450,AT2G27020,AT3G60820,AT4G31300,AT5G35590,AT3G51260,AT3G53230",
               "cis Golgi proteins": "AT1G05720,AT1G07230,AT1G10950,AT1G15020,AT1G18580,AT1G20270,AT1G29060,AT1G29310,"\
                                     "AT1G51590,AT1G52420,AT1G53710,AT1G62330,AT1G65820,AT1G76270,AT1G77370,AT1G78920,"\
                                     "AT2G01070,AT2G14740,AT2G17720,AT2G20130,AT2G20810,AT2G40280,AT2G43080,AT2G47320,"\
                                     "AT3G06300,AT3G09090,AT3G21160,AT3G24160,AT3G28480,AT3G48280,AT4G01210,AT4G24530,"\
                                     "AT5G04480,AT5G06660,AT5G14430,AT5G14950,AT5G18900,AT5G27330,AT5G47780,AT5G65470,"\
                                     "AT5G66060",
               "photosystem": "AT2G33040,AT5G13450,ATCG00280,AT1G31330,AT1G29920,ATCG00340,ATCG00350,AT1G61520,"\
                              "ATCG00580,ATCG00710,AT4G12800,AT4G10340,AT3G08940,AT3G54890,ATCG00270,ATCG00020,"\
                              "AT5G13440,AT1G55670,AT4G22890,AT3G47470,AT1G45474"}
    if l in lg_dict.keys():
        i_query_multi.value = lg_dict[l]
i_presets.param.watch(update_genelist, 'value')

@pn.depends(i_query_multi)
def multi_query(query_multi):
    
    # validate input:
    query_multi_split = [el.strip() for el in query_multi.split("\n")]
    if len(query_multi_split) == 1:
        query_multi_split = [el.strip() for el in query_multi.split(",")]
    if len(query_multi_split) == 1:
        query_multi_split = [el.strip() for el in query_multi.split(";")]
    
    query_multi = [el.upper() for el in query_multi_split if len(el) > 0]
    
    try:
        fig_iBAQ = panel_iBAQ(query_multi)
    except Exception as ex:
        fig_iBAQ = str(ex)
    try:
        fig_pca = panel_LFQ(query_multi)
    except Exception as ex:
        fig_pca = str(ex)
    
    return pn.Column(fig_pca, fig_iBAQ)

dashboard_multi = pn.Column(i_presets, i_query_multi, multi_query, name="Home", css_classes=["content-width"])




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [7]:
app_tabs = pn.Tabs(margin=10, css_classes=["content-width"])
app_tabs.append(("Home", dashboard_multi))
app_tabs.append(("Individual maps", "Show PCA and switch between maps"))
app_tabs.append(("Neighborhood analysis", "Network analysis tool, initialized with same query as on home screen"))
app_tabs.append(("Download", "Download section either for full data or queried slice"))
app_tabs.append(("About", "Explanation of what's going on here"))


In [8]:
i_search = pn.widgets.TextInput(name="Search")
app_center = pn.Column(pn.Row(pn.Pane("# Plant map browser"),
                              pn.layout.HSpacer(),
                              i_search,
                              margin=10),
                       app_tabs,
                       pn.Spacer(background="#DDDDDD", height=100, margin=0))

In [9]:
app = pn.GridSpec(sizing_mode="stretch_both", margin=0)
app[0,0] = pn.Spacer(background="#DDDDDD", margin=0)
app[0,9] = pn.Spacer(background="#DDDDDD", margin=0)
app[0,1:8] = app_center

In [10]:
copy_source_button = pn.widgets.Button(name="✂ Copy query ids to clipboard", button_type="success", width=200)

def search_data(event):
    global database
    if not database:
        global df_iBAQ
    df_search = df_iBAQ[['Protein IDs', 'Majority protein IDs', 'Gene names', 'Protein names',
                         'GO biological process slim', 'GO cellular component slim',
                         'GO molecular function slim']].copy()
    search = re.compile(".*"+event.new+".*")
    df_out = df_search.loc[[bool(search.match(row)) for row in
                            df_search.apply(lambda x: " ".join([str(el) for el in x.values]), axis=1)],:]
    df_out = df_out.apply(
        lambda x: [el if str(el) == "NaN" else str(el).replace(";", "; ") for el in x]
    ).sort_values("Gene names")
    old_result = [i for i,obj in zip(range(len(app_tabs.objects)), app_tabs.objects) if obj.name=="Search result"]
    if len(old_result) > 0:
        app_tabs.pop(old_result[0])
    copy_source_code = "navigator.clipboard.writeText(source);"
    copy_source_button.js_on_click(args={"source": "\n".join(df_out.index.values)}, code=copy_source_code)
    app_tabs.append(("Search result", pn.Column(copy_source_button,
                                                pn.pane.DataFrame(df_out, css_classes=["content-width"]),
                                                name="Search result", css_classes=["content-width"])))
    app_tabs.active = len(app_tabs.objects)-1
i_search.param.watch(search_data, 'value')

Watcher(inst=TextInput(name='Search'), cls=<class 'panel.widgets.input.TextInput'>, fn=<function search_data at 0x000001CEFB515700>, mode='args', onlychanged=True, parameter_names=('value',), what='value', queued=False)

In [11]:
pwd = pn.widgets.PasswordInput(name="Please enter password for access.")
app_container = pn.Column(pwd)

def check_pwd(event, app=app):
    pwd = event.new
    if pwd == "STL1Exo":
        app_container[0]=app
pwd.param.watch(check_pwd, 'value')

Watcher(inst=PasswordInput(name='Please enter p...), cls=<class 'panel.widgets.input.PasswordInput'>, fn=<function check_pwd at 0x000001CEFB515E50>, mode='args', onlychanged=True, parameter_names=('value',), what='value', queued=False)

In [12]:
app

In [13]:
app_container.servable()