In [3]:
import pandas as pd
import numpy as np

from scipy import stats

import plotly.express as px
import plotly.graph_objects as go

In [4]:
experiment_name = "20241210_Ex0016_HILICZ_LC-DDA_Ecoli_Com20"

In [8]:
modes = ["neg", "pos"]
quant_df = pd.read_csv(f"../../../../Data/{experiment_name}/fbmn_{modes[0]}_quant.csv")

quantifications_df = quant_df[[col for col in quant_df.columns if "Peak area" in col]]

In [9]:
def plot_quantification_heatmap(quantifications_df:pd.DataFrame, **kwargs):
    fig = go.Figure(
                    data=go.Heatmap(x=quantifications_df.columns,
                                    y=quantifications_df.index,
                                    z=quantifications_df,
                                    colorscale="Inferno",
                                    **kwargs),
                    layout={"height": 800})
    fig.show()
plot_quantification_heatmap(quantifications_df=quantifications_df)

## All samples

### Accumulation in strains

In [10]:
summed_signals = pd.DataFrame({"Signals found": np.sum(quantifications_df > 0.0, axis=0)})
fig = px.histogram(summed_signals, x="Signals found", nbins=20)
fig.update_layout( yaxis_title="Number of strains (#)" )
fig.show()

In [6]:
summed_signals = pd.DataFrame({"Signal intensity": quantifications_df.sum()})
fig = px.histogram(summed_signals, x="Signal intensity", nbins=20)
fig.update_layout( yaxis_title="Number of strains (#)" )
fig.show()

### Scoring (Z-Score)

In [11]:
zscores = stats.zscore(quantifications_df, axis=1)

In [12]:
def plot_zscore_heatmap(zscores, range:tuple[float]=None, **kwargs):
    fig = go.Figure(data=go.Heatmap(z=zscores,
                                    zmin=range[0] if range else None,
                                    zmax=range[1] if range else None,
                                    colorscale="Tropic",
                                    **kwargs),
                    layout={"height": 800})
    fig.show()
plot_zscore_heatmap(zscores=zscores, range=(-15.0, 15.0), x=quantifications_df.columns, y=quantifications_df.index)

In [9]:
def plot_cutoff_accumulation(zscores:pd.DataFrame, cutoff_range:tuple, axis:int=0, sample_marker=None, jitter:float=0.5, marker_size:int=6, **kwargs):
    score_cutoffs = pd.DataFrame({i: np.sum((zscores > i) | (zscores < -i), axis=axis) for i in range(*cutoff_range)})
    names = score_cutoffs.index
    if sample_marker:
        marked_samples = [sample_marker in name for name in names] 
        score_cutoffs[sample_marker] = marked_samples
        fig = px.strip(score_cutoffs, color=sample_marker, hover_name=names, **kwargs)
    else:
        fig = px.strip(score_cutoffs, hover_name=names, **kwargs)
    fig.update_layout( xaxis_title="z-score cutoff", yaxis_title="Strains per metabolites" if axis else "Metabolites per strain",
                       hovermode="x" )
    fig.update_traces(jitter=jitter, marker={'size': marker_size})
    #fig.data[0].update(span = (0, None), spanmode='manual')
    fig.show()

In [10]:
plot_cutoff_accumulation(zscores=zscores, cutoff_range=(3,10), axis=1, template="seaborn", jitter=1.0, marker_size=5)

In [11]:
plot_cutoff_accumulation(zscores=zscores, cutoff_range=(4,10), axis=0, sample_marker="blank", template="seaborn")

## Without blanks

In [12]:
quant_df = pd.read_csv(f"../../data/processed/{experiment_name}_iimn_gnps_quant.csv")
quantifications_df = quant_df[[col for col in quant_df.columns if "Peak area" in col]]

annotations_mzmine_df = pd.read_csv(f"../../data/processed/{experiment_name}_annotations.csv")
annot_df = annotations_mzmine_df.groupby('id').agg({'compound_name': lambda x: '\n'.join(set(x))}).reset_index()
annot_df = pd.merge(annot_df, quant_df, left_on="id", right_on="row ID", how="right").fillna("")

In [13]:
quantifications_xblank = quantifications_df[[col for col in quantifications_df.columns if "blank" not in col.lower()]]

In [14]:
plot_quantification_heatmap(quantifications_df=quantifications_xblank)

### Scoring (Z-Score)

In [15]:
zscores_xblank = stats.zscore(quantifications_xblank, axis=1)

In [16]:
zscores_xblank.index = [row["compound_name"] if row["compound_name"] != "" else row["row ID"] for i, row in annot_df.iterrows()]

In [17]:
zscores_xblank

Unnamed: 0,moeB_R1_P3-F1_neg.mzML Peak area,kdpA_R1_P3-G5_neg.mzML Peak area,proP_R1_P3-H6_neg.mzML Peak area,gcd_R1_P3-D6_neg.mzML Peak area,menH_R1_P3-C6_pos.mzML Peak area,yihV_R1_P3-F9_pos.mzML Peak area,metQ_R1_P3-E5_pos.mzML Peak area,lipB_R1_P3-C11_pos.mzML Peak area,cysP_R1_P3-H11_pos.mzML Peak area,lptG_R1_P3-E2_pos.mzML Peak area,...,hldD_R1_P3-F12_neg.mzML Peak area,lpxB_R1_P3-A10_neg.mzML Peak area,gcd_R1_P3-D6_pos.mzML Peak area,arnB_R1_P3-F5_pos.mzML Peak area,serB_R1_P3-A7_pos.mzML Peak area,ampH_R1_P3-D3_neg.mzML Peak area,panF_R1_P3-G3_pos.mzML Peak area,cpdA_R1_P3-C5_pos.mzML Peak area,adhE_R1_P3-A6_pos.mzML Peak area,hldD_R1_P3-F12_pos.mzML Peak area
1251,-0.254937,-0.254937,-0.254937,-0.254937,0.784100,-0.254937,-0.254937,-0.254937,-0.254937,-0.254937,...,0.754940,-0.254937,-0.254937,-0.254937,-0.254937,-0.254937,-0.254937,0.873602,-0.254937,-0.254937
2-Oxobutanoate\nSuccinic semialdehyde\nAcetoacetate,1.754825,0.543027,1.273565,0.398939,-0.880613,-0.880613,-0.880613,-0.880613,-0.880613,-0.880613,...,0.230899,0.537182,-0.880613,-0.847111,-0.842432,1.194448,-0.880613,-0.880613,-0.880613,-0.867955
L-Allo-threonine\n4-Aminobutanoate\nL-Homoserine\nL-Threonine,-0.883576,-0.949120,-0.906441,-0.807325,0.709626,-0.047627,0.309009,0.286713,4.443640,-0.047449,...,-0.912445,-0.798930,1.133536,0.133322,0.664206,-0.799248,0.730711,0.426030,0.299676,0.434035
1339,-0.814510,-0.814510,-0.814510,-0.814510,1.721288,0.470423,-0.562811,0.089161,0.785567,-0.207398,...,-0.814510,-0.814510,0.607643,0.632417,2.089960,-0.814510,0.251809,0.679083,1.609566,0.749980
1194,-0.730203,-0.730203,-0.730203,-0.730203,0.437583,0.571669,0.268926,0.489728,1.693401,0.300975,...,-0.730203,-0.730203,0.602516,0.385785,-0.102689,-0.730203,0.740576,0.496394,0.046549,0.451792
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1296,0.793838,0.451417,1.142370,0.853333,-0.880526,-0.880526,-0.880526,-0.880526,-0.880526,-0.880526,...,0.786926,1.035053,-0.880526,-0.880526,-0.880526,2.411948,-0.880526,-0.864139,-0.880526,-0.880526
1156,1.315563,0.598975,1.099674,0.232183,-0.811843,-0.811843,-0.811843,-0.811843,-0.811843,-0.811843,...,0.324236,0.256880,-0.811843,-0.811843,-0.811843,0.351979,-0.811843,-0.811843,-0.811843,-0.811843
1873,-0.505535,0.904556,0.200964,0.763998,-0.808936,-0.808936,-0.808936,-0.808936,-0.808936,-0.808936,...,2.124663,0.315673,-0.808936,-0.808936,-0.808936,1.654961,-0.808936,-0.808233,-0.808936,-0.808936
1157,1.559388,0.451159,0.194635,0.421205,-0.764535,-0.763405,-0.764535,-0.764535,-0.764535,-0.764535,...,0.494221,1.446051,-0.764535,-0.764535,-0.764535,0.390617,-0.764535,-0.764535,-0.764535,-0.764535


In [18]:
quantifications_xblank.index

RangeIndex(start=0, stop=281, step=1)

In [19]:
plot_zscore_heatmap(zscores=zscores_xblank, range=(-15.0, 15.0), x=quantifications_xblank.columns, y=quantifications_xblank.index)

In [20]:
plot_cutoff_accumulation(zscores=zscores_xblank, cutoff_range=(4,10), axis=0, template="seaborn", jitter=1.0, marker_size=5)

In [21]:
plot_cutoff_accumulation(zscores=zscores_xblank, cutoff_range=(4,10), axis=1, template="seaborn", jitter=1.0, marker_size=5)