In [None]:
import os

import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
import math
from pathlib import Path

############################## SECTION FOR PEAK PARAMETERS, MUST RUN APQUANT IN PD 2.5 #######################################
WRITE_OUTPUT = False
url_base = None
APPFOLDER = "./"



In [None]:
def read_peaks_files(settings, data_files):
    #load files
    
    files = []
    for each_file in data_files:
        currentFile = pd.read_table(each_file, sep="\t")
        currentFile = currentFile[currentFile["Identified By"] == "MS/MS"]
        files.append(currentFile)
    
    #create groups
    all_peaks = {}
    in_filters = settings["filter_in"].tolist()
    out_filters = settings["filter_out"].tolist()
    i = 0
    for eachGroup in settings["Group Name"].tolist():
        #check for which file
        if len(str.split(in_filters[i],sep = "@")) == 2:
            file_index = int(str.split(in_filters[i],sep = "@")[1])
            filter_in = str.split(in_filters[i],sep = "@")[0]
        elif len(str.split(in_filters[i],sep = "@")) == 1:
            file_index = 0
            filter_in = in_filters[i]
        else:
            print("error, multiple peak files per group not implemented")
            break
        if isinstance(out_filters[i], str) and len(out_filters[i]) > 0:
            filter_out = out_filters[i]
        elif out_filters[i] == None or out_filters[i] == "" or math.isnan(out_filters[i]):
            filter_out = "M@di"
        else:
            filter_out = out_filters[i]
        #filter groups
        currentGroupData = files[file_index][files[file_index]["Spectrum File"].str.contains(filter_in)]
        currentGroupData = currentGroupData[~currentGroupData["Spectrum File"].str.contains(filter_out)]
        #group
        all_peaks[eachGroup] = currentGroupData.groupby("Sequence").agg({
            "Peak Apex": "median",
            "FWHM": "mean",
            "Apex Intensity": "mean",
            "Area": "mean"}).reset_index()
        i = i + 1
    return all_peaks

In [None]:
def FWHM_Boxplot (peakdata_object, plot_options, settings, username = None):

    if len(settings["Group Name"].tolist()) > 1:
        firstName = settings["Group Name"].tolist()[0]
        all_fwhm = pd.DataFrame({"Sequence":peakdata_object[firstName]["Sequence"],firstName:peakdata_object[firstName]["FWHM"]})
        for eachGroup in settings["Group Name"].tolist()[1:]:
            current = pd.DataFrame({eachGroup:peakdata_object[eachGroup]["FWHM"],"Sequence":peakdata_object[eachGroup]["Sequence"]})
            all_fwhm = pd.merge(current, all_fwhm, how="inner")
    else:
        all_fwhm = peakdata_object[0]
        
    toPlotFWHM = all_fwhm.set_index("Sequence").melt(ignore_index=False, var_name="Group Name", value_name="FWHM").reset_index()
    toPlotFWHM = toPlotFWHM.merge(settings, how="inner")
    return plot_FWHM_boxplot(toPlotFWHM, plot_options, username)

def plot_FWHM_boxplot(FWHMs, plot_options, username):
    
    plot_div = None
    CSV_link = None
    SVG_link = None

    if plot_options["log10"]:
        FWHMs= FWHMs[FWHMs["FWHM"]>0]

    if not plot_options["ylimits"] or plot_options["ylimits"] == "[]" or \
            not isinstance(plot_options["ylimits"], list):
        ylimits = None
    else:
        ylimits = plot_options["ylimits"]

    #Remove outliers
    if plot_options["outliers"] == "remove":
        q_low = FWHMs["FWHM"].quantile(0.01)
        q_hi  = FWHMs["FWHM"].quantile(0.99)

        FWHMs = FWHMs[(FWHMs["FWHM"] < q_hi) & (FWHMs["FWHM"] > q_low)]
        plot_options["outliers"] = False
        
    FWHM_summary = FWHMs.groupby(["Group Name"]).agg(
        {'FWHM': ['median', 'mean']}).reset_index()
    FWHM_summary.columns = ["Group Name", 'meds', 'CoVar']
    # median label
    if plot_options["median label"] == "True" or \
            plot_options["median label"] == True:
        total_labels = [{"x": x, "y": total*1.15, "text": str(
            round(total,1)), "showarrow": False} for x, total in zip(
            FWHM_summary["Group Name"], FWHM_summary["meds"])]
    else:
        total_labels = []   # no median labels

    if plot_options["FWHM mode"] == "grouped":  
            
        #find out present categories
        categories = FWHMs.groupby(plot_options["Group By Color"]).first().reset_index()[plot_options["Group By Color"]].tolist()
        # create the plot
        fig_data = []
        i = 0
        for eachCategory in categories:
            fig_data.append(go.Box(name = eachCategory,
                        x=FWHMs.loc[FWHMs[plot_options["Group By Color"]]==eachCategory,plot_options["Group By X"]].tolist(),
                        y=FWHMs.loc[FWHMs[plot_options["Group By Color"]]==eachCategory,"FWHM"].tolist(),
                        fillcolor = plot_options["color"][i],
                        boxpoints=plot_options["outliers"],
                        marker=dict(opacity=0)
                        ))
            i = i + 1

        fig = go.Figure(data = fig_data)
        fig.update_layout(
            boxmode = "group",
            plot_bgcolor='white',
            paper_bgcolor='white',
            yaxis=dict(title=plot_options["Y Title"],showline=True, linewidth=1, linecolor='black'),
            xaxis=dict(title=plot_options["X Title"],showline=True, linewidth=1, linecolor='black')
            )
        
    else:
    # create the interactive plot
        fig = px.box(FWHMs,
                        x="Group Name",
                        y='FWHM',
                        color="Group Name",
                        color_discrete_sequence=plot_options["color"],
                        width=plot_options["width"],
                        height=plot_options["height"],
                        )

        fig.update_layout(
            yaxis=dict(title=plot_options["Y Title"],
                    range=ylimits, showline=True, linewidth=1, linecolor='black'),
            font=plot_options["font"],
            xaxis=dict(title=plot_options["X Title"],showline=True, linewidth=1, linecolor='black'),
            showlegend=True,
            annotations=total_labels,
            boxmode = "group",
            plot_bgcolor='white',
            paper_bgcolor='white',
        )
    if WRITE_OUTPUT:        
        # create the file for donwnload
        img_dir = os.path.join(APPFOLDER, "images/")
        if not os.path.exists(img_dir):
            Path(img_dir).mkdir(parents=True)

        fig.write_image(os.path.join(
            img_dir, f"{username}_FWHM_Boxplot.svg"), format = "svg", validate = False, engine = "kaleido")
        
        # create the download CSV and its link
        data_dir = os.path.join(APPFOLDER, "csv/")
        if not os.path.exists(data_dir):
            Path(data_dir).mkdir(parents=True)
        FWHMs.to_csv(os.path.join(
            data_dir, f"{username}_all_FWHMs.csv"), index=False)
        FWHM_summary.to_csv(os.path.join(
            data_dir, f"{username}_FWHM_Summary.csv"), index=False)
        print("Downloading links...")
        CSV_link = f"/files/{url_base}/csv/" \
            f"{username}_all_FWHMs.csv"

        # download SVG link
        SVG_link = f"/files/{url_base}/images/" \
            f"{username}_FWHM_boxplot.svg"


    return fig, CSV_link, SVG_link
    
    

In [None]:
def Intensity_Boxplot (peakdata_object, plot_options, settings, username = None):

    if len(settings["Group Name"].tolist()) > 1:
        firstName = settings["Group Name"].tolist()[0]
        all_intensity = pd.DataFrame({"Sequence":peakdata_object[firstName]["Sequence"],firstName:peakdata_object[firstName]["Apex Intensity"]})
        for eachGroup in settings["Group Name"].tolist()[1:]:
            current = pd.DataFrame({eachGroup:peakdata_object[eachGroup]["Apex Intensity"],"Sequence":peakdata_object[eachGroup]["Sequence"]})
            all_intensity = pd.merge(current, all_intensity, how="inner")
    else:
        all_intensity = peakdata_object[0]
        
    toPlotIntensity = all_intensity.set_index("Sequence").melt(ignore_index=False, var_name="Group Name", value_name="Intensity").reset_index()
    toPlotIntensity = toPlotIntensity.merge(settings, how="inner")
    return plot_Intensity_boxplot(toPlotIntensity, plot_options, username)

def plot_Intensity_boxplot(Intensities, plot_options, username):
    
    plot_div = None
    CSV_link = None
    SVG_link = None

    Intensities= Intensities[Intensities["Intensity"]>0]

    if not plot_options["ylimits"] or plot_options["ylimits"] == "[]" or \
            not isinstance(plot_options["ylimits"], list):
        ylimits = None
    else:
        ylimits = plot_options["ylimits"]

    if plot_options["log10"]:
        Intensities["Intensity"] = np.log10(Intensities["Intensity"])

    #Remove outliers
    if plot_options["outliers"] == "remove":
        q_low = Intensities["Intensity"].quantile(0.01)
        q_hi  = Intensities["Intensity"].quantile(0.99)

        Intensities = Intensities[(Intensities["Intensity"] < q_hi) & (Intensities["Intensity"] > q_low)]
        plot_options["outliers"] = False
        
    Intensity_summary = Intensities.groupby(["Group Name"]).agg(
        {'Intensity': ['median', 'mean']}).reset_index()
    Intensity_summary.columns = ["Group Name", 'meds', 'CoVar']
    # median label
    if plot_options["median label"] == "True" or \
            plot_options["median label"] == True:
        total_labels = [{"x": x, "y": total*1.15, "text": str(
            round(total,1)), "showarrow": False} for x, total in zip(
            Intensity_summary["Group Name"], Intensity_summary["meds"])]
    else:
        total_labels = []   # no median labels
    

    if plot_options["Intensity mode"] == "grouped":  
            
        #find out present categories
        categories = Intensities.groupby(plot_options["Group By Color"]).first().reset_index()[plot_options["Group By Color"]].tolist()
        # create the plot
        fig_data = []
        i = 0
        for eachCategory in categories:
            fig_data.append(go.Box(name = eachCategory,
                        x=Intensities.loc[Intensities[plot_options["Group By Color"]]==eachCategory,plot_options["Group By X"]].tolist(),
                        y=Intensities.loc[Intensities[plot_options["Group By Color"]]==eachCategory,"Intensity"].tolist(),
                        fillcolor = plot_options["color"][i],
                        boxpoints=plot_options["outliers"],
                        marker=dict(opacity=0)
                        ))
            i = i + 1

        fig = go.Figure(data = fig_data)
        fig.update_layout(
            boxmode = "group",
            plot_bgcolor='white',
            paper_bgcolor='white',
            yaxis=dict(title=plot_options["Y Title"],showline=True, linewidth=1, linecolor='black'),
            xaxis=dict(title=plot_options["X Title"],showline=True, linewidth=1, linecolor='black'))
        
    else:
    # create the interactive plot
        fig = px.box(Intensities,
                        x="Group Name",
                        y='Intensity',
                        color="Group Name",
                        color_discrete_sequence=plot_options["color"],
                        width=plot_options["width"],
                        height=plot_options["height"],
                        )

        fig.update_layout(
            yaxis=dict(title=plot_options["Y Title"],
                    range=ylimits,showline=True, linewidth=1, linecolor='black'),
            font=plot_options["font"],
            xaxis=dict(title=plot_options["X Title"],showline=True, linewidth=1, linecolor='black'),
            showlegend=True,
            annotations=total_labels,
            boxmode = "group",
            plot_bgcolor='white',
            paper_bgcolor='white',
        )
    if WRITE_OUTPUT:        
        # create the file for donwnload
        img_dir = os.path.join(APPFOLDER, "images/")
        if not os.path.exists(img_dir):
            Path(img_dir).mkdir(parents=True)

        fig.write_image(os.path.join(
            img_dir, f"{username}_Intensity_Boxplot.svg"), format = "svg", validate = False, engine = "kaleido")
        
        # create the download CSV and its link
        data_dir = os.path.join(APPFOLDER, "csv/")
        if not os.path.exists(data_dir):
            Path(data_dir).mkdir(parents=True)
        Intensities.to_csv(os.path.join(
            data_dir, f"{username}_all_Intensity.csv"), index=False)
        Intensity_summary.to_csv(os.path.join(
            data_dir, f"{username}_Intensity_Summary.csv"), index=False)
        print("Downloading links...")
        CSV_link = f"/files/{url_base}/csv/" \
            f"{username}_all_Intensity.csv"

        # download SVG link
        SVG_link = f"/files/{url_base}/images/" \
            f"{username}_Intensity_boxplot.svg"


    return fig, CSV_link, SVG_link
    
    

In [None]:
def Area_Boxplot (peakdata_object, plot_options, settings, username = None):

    if len(settings["Group Name"].tolist()) > 1:
        firstName = settings["Group Name"].tolist()[0]
        all_area = pd.DataFrame({"Sequence":peakdata_object[firstName]["Sequence"],firstName:peakdata_object[firstName]["Area"]})
        for eachGroup in settings["Group Name"].tolist()[1:]:
            current = pd.DataFrame({eachGroup:peakdata_object[eachGroup]["Area"],"Sequence":peakdata_object[eachGroup]["Sequence"]})
            all_area = pd.merge(current, all_area, how="inner")
    else:
        all_area = peakdata_object[0]
        
    toPlotArea = all_area.set_index("Sequence").melt(ignore_index=False, var_name="Group Name", value_name="Area").reset_index()
    toPlotArea = toPlotArea.merge(settings, how="inner")
    return plot_Area_boxplot(toPlotArea, plot_options, username)

def plot_Area_boxplot(Areas, plot_options, username):
    
    plot_div = None
    CSV_link = None
    SVG_link = None
    Areas= Areas[Areas["Area"]>0]
    if plot_options["log10"]:
        Areas["Area"] = np.log10(Areas["Area"])

    if not plot_options["ylimits"] or plot_options["ylimits"] == "[]" or \
            not isinstance(plot_options["ylimits"], list):
        ylimits = None
    else:
        ylimits = plot_options["ylimits"]

    #Remove outliers
    if plot_options["outliers"] == "remove":
        q_low = Areas["Area"].quantile(0.01)
        q_hi  = Areas["Area"].quantile(0.99)

        Areas = Areas[(Areas["Area"] < q_hi) & (Areas["Area"] > q_low)]
        plot_options["outliers"] = False
    
    Area_summary = Areas.groupby(["Group Name"]).agg(
        {'Area': ['median', 'mean']}).reset_index()
    Area_summary.columns = ["Group Name", 'meds', 'CoVar']
    # median label
    if plot_options["median label"] == "True" or \
            plot_options["median label"] == True:
        total_labels = [{"x": x, "y": total*1.15, "text": str(
            round(total,1)), "showarrow": False} for x, total in zip(
            Area_summary["Group Name"], Area_summary["meds"])]
    else:
        total_labels = []   # no median labels

    if plot_options["Area mode"] == "grouped":  
            
        #find out present categories
        categories = Areas.groupby(plot_options["Group By Color"]).first().reset_index()[plot_options["Group By Color"]].tolist()
        # create the plot
        fig_data = []
        i = 0
        for eachCategory in categories:
            fig_data.append(go.Box(name = eachCategory,
                        x=Areas.loc[Areas[plot_options["Group By Color"]]==eachCategory,plot_options["Group By X"]].tolist(),
                        y=Areas.loc[Areas[plot_options["Group By Color"]]==eachCategory,"Area"].tolist(),
                        fillcolor = plot_options["color"][i],
                        boxpoints=plot_options["outliers"],
                        marker=dict(opacity=0)
                        ))
            i = i + 1

        fig = go.Figure(data = fig_data)
        fig.update_layout(
            boxmode = "group", 
            plot_bgcolor='white',
            paper_bgcolor='white',
            yaxis=dict(title=plot_options["Y Title"],showline=True, linewidth=1, linecolor='black'),
            xaxis=dict(title=plot_options["X Title"], showline=True, linewidth=1, linecolor='black'),
            legend=dict(itemclick=False, itemdoubleclick=False),
            )
        
    else:
    # create the interactive plot
        fig = px.box(Areas,
                        x="Group Name",
                        y='Area',
                        color="Group Name",
                        color_discrete_sequence=plot_options["color"],
                        width=plot_options["width"],
                        height=plot_options["height"],
        )

        fig.update_layout(
            yaxis=dict(title=plot_options["Y Title"],
                    range=ylimits,showline=True, linewidth=1, linecolor='black'),
            font=plot_options["font"],
            xaxis=dict(title=plot_options["X Title"],showline=True, linewidth=1, linecolor='black'),
            showlegend=True,
            annotations=total_labels,
            boxmode = "group",
            plot_bgcolor='white',
            paper_bgcolor='white',
        
        )
        
    if WRITE_OUTPUT:        
        # create the file for donwnload
        img_dir = os.path.join(APPFOLDER, "images/")
        if not os.path.exists(img_dir):
            Path(img_dir).mkdir(parents=True)

        fig.write_image(os.path.join(
            img_dir, f"{username}_Area_Boxplot.svg"), format = "svg", validate = False, engine = "kaleido")
        
        # create the download CSV and its link
        data_dir = os.path.join(APPFOLDER, "csv/")
        if not os.path.exists(data_dir):
            Path(data_dir).mkdir(parents=True)
        Areas.to_csv(os.path.join(
            data_dir, f"{username}_all_Areas.csv"), index=False)
        Area_summary.to_csv(os.path.join(
            data_dir, f"{username}_Area_Summary.csv"), index=False)
        print("Downloading links...")
        CSV_link = f"/files/{url_base}/csv/" \
            f"{username}_all_Areas.csv"

        # download SVG link
        SVG_link = f"/files/{url_base}/images/" \
            f"{username}_Area_Violin_Plot.svg"


    return fig, CSV_link, SVG_link
    
    

In [None]:
PEAKS_SETTINGS_FILE = "peaks_settings.txt"
saved_peak_settings = pd.read_table(PEAKS_SETTINGS_FILE, sep="\t")
PEAKS_DATA_FILES = ["C12_apQuant_apQuantFeatures.txt",
                          "50umC12_apQuantFeatures.txt"]

peakdata_obj = read_peaks_files(saved_peak_settings,PEAKS_DATA_FILES)



In [None]:
# FWHM boxplot
display(peakdata_obj)
plot_options={    
        "median label": "True",
        "X Title": "Conditions",
        "Y Title": "FWHM (s)",
        "color": ["blue", "cyan", "black", "gray", "purple", "pink",
                  "orange", "brown", "pink", "gray", "olive", "cyan"],
        "log10": True,
        "width": 700,
        "height": 450,
        "font": dict(size=16, family="Arial black"),
        "outliers": "all", #False, "all", "outliers", "suspectedoutliers", "remove"
        "ylimits": [],
        "FWHM mode": "ungrouped", #grouped, ungrouped
        "Group By X": "Sample Amount",
        "Group By Color": "SPE Type",
        "help for information only": \
        "median label options: True or False." \
        "color: the first few colors will be used"\
    }
figure, _, _ =FWHM_Boxplot(peakdata_obj, plot_options, saved_peak_settings)
figure.show()



In [None]:
# Intensity boxplot
display()
plot_options={    
        "median label": "True", 
        "X Title": "Conditions",
        "Y Title": "Intensity",
        "color": ["blue", "cyan", "black", "gray", "purple", "pink",
                  "orange", "brown", "pink", "gray", "olive", "cyan"],
        "log10": False,
        "width": 700,
        "height": 450,
        "font": dict(size=16, family="Arial black"),
        "outliers": "all", #False, "all", "outliers", "suspectedoutliers", "remove"
        "ylimits": [],
        "Intensity mode": "grouped", #grouped, ungrouped
        "Group By X": "Sample Amount",
        "Group By Color": "SPE Type",
        "help for information only": \
        "median labelel options: True or False." \
        "color: the first few colors will be used"\
    }
figure, _, _ =Intensity_Boxplot(peakdata_obj, plot_options, saved_peak_settings)
figure.show()



In [None]:
# Area boxplot
display()
plot_options={    
        "median label": "True",
        "X Title": "Conditions",
        "Y Title": "Area",
        "color": ["blue", "cyan", "black", "gray", "purple", "pink",
                  "orange", "brown", "pink", "gray", "olive", "cyan"],
        "width": 700,
        "height": 450,
        "font": dict(size=16, family="Arial black"),
        "outliers": "remove", #False, "all", "outliers", "suspectedoutliers", "remove"
        "log10": False,
        "ylimits": [],
        "Area mode": "grouped", #grouped, ungrouped
        "Group By X": "Sample Amount",
        "Group By Color": "SPE Type",
        "help for information only": \
        "median labelel options: True or False." \
        "color: the first few colors will be used"\
    }
figure, _, _ =Area_Boxplot(peakdata_obj, plot_options, saved_peak_settings)
figure.show()

