In [1]:
from pathlib import Path
import flippr as fp
import polars as pl
import scipy as sp
import numpy as np


In [2]:

lip = Path("LFQ/LiP")
trp = Path("LFQ/Control")

study = fp.Study(lip=lip, trp=trp)

print(study.samples)

study.add_process(1, "Native", "Refolded_001_min", 3)
study.add_process(5, "Native", "Refolded_005_min", 3, "Native", "Refolded", 3)
study.add_process(120, "Native", "Refolded_120_min", 3, "Native", "Refolded", 3)

results = study.run()

pep = results[5].peptide
mod_pep = results[5].modified_peptide
cut = results[5].cut_site


{'LiP': {'Refolded_005_min', 'Native', 'Refolded_120_min', 'Refolded_001_min'}, 'TrP': {'Refolded', 'Native'}}


In [22]:
s = "Peptides"
val = pep.group_by(by = "Protein ID", maintain_order=True).agg(pl.col("CV").filter(
    (pl.col("-Log10 P-value") > 0)
).len()).rename({"CV": f"No. of Valid {s}"})

sig = pep.group_by(by = "Protein ID", maintain_order=True).agg(pl.col("CV").filter(
    ((pl.col("Log2 Normalized FC").abs() > 1) & (pl.col("-Log10 P-value") > 2)) |
    ((pl.col("Log2 Normalized FC").abs() > 6) & (pl.col("-Log10 P-value") > 1.8))
).len()).rename({"CV": f"No. of Significant {s} (P-value)"})

sigsig = pep.group_by(by = "Protein ID", maintain_order=True).agg(pl.col("CV").filter(
    ((pl.col("Log2 Normalized FC").abs() > 1) & (pl.col("-Log10 Adj. P-value") > 1.3))
).len()).rename({"CV": f"No. of Significant {s} (Adj. P-value)"})


In [25]:
val.join(sig, on = "Protein ID").join(sigsig, on = "Protein ID")

Protein ID,No. of Valid Peptides,No. of Significant Peptides (P-value),No. of Significant Peptides (Adj. P-value)
str,u32,u32,u32
"""P00350""",135,30,16
"""P00363""",16,0,0
"""P00370""",26,4,4
"""P00448""",50,12,12
"""P00452""",59,6,3
"""P00509""",69,15,15
"""P00547""",2,0,0
"""P00561""",25,4,0
"""P00562""",8,0,0
"""P00579""",52,6,4


In [3]:
ions  = results[5].ion

In [4]:
FLIPPR_CUT_SITE_COLUMNS: list[str] = [
    "Protein ID",
    "Cut Site",
    "Gene",
    "Entry Name",
    "Protein Description",
    "Mapped Genes",
    "Mapped Proteins",
    "Half Tryptic"
]

In [5]:
meta = \
ions.group_by(by = "Protein ID", maintain_order=True).agg(
    pl.col(FLIPPR_CUT_SITE_COLUMNS).first()
)

In [6]:
data = \
ions.group_by(by = ["Protein ID", "Cut Site ID"], maintain_order=True).agg(
    pl.col(["P-value", "Adj. P-value", "CV", "FC"])
    .filter(
        pl.col("T-test").sign() == pl.col("T-test").sign().sum().sign()
    )
).with_columns(
    pl.when(pl.col("P-value").list.len() > 0)
    .then(pl.col("P-value"))
    .otherwise(pl.lit([1.0]))
    .alias("P-value"),

    pl.when(pl.col("Adj. P-value").list.len() > 0)
    .then(pl.col("Adj. P-value"))
    .otherwise(pl.lit([1.0]))
    .alias("Adj. P-value"),

    pl.when(pl.col("CV").list.len() > 0)
    .then(pl.col("CV"))
    .otherwise(pl.lit([0.0]))
    .alias("CV"),

    pl.when(pl.col("FC").list.len() > 0)
    .then(pl.col("FC"))
    .otherwise(pl.lit([0.0]))
    .alias("FC"),
).select(
    pl.col("Protein ID"),

    pl.col("P-value").map_elements(lambda x: sp.stats.combine_pvalues(x)[1])
    .alias("P-value"),

    pl.col("Adj. P-value").map_elements(lambda x: sp.stats.combine_pvalues(x)[1])
    .alias("Adj. P-value"),

    pl.col("CV").map_elements(lambda x: np.nanmax(x))
    .alias("CV"),

    pl.col("FC").map_elements(lambda x: np.nanmedian(x))
    .alias("FC")
)

In [7]:
meta.join(data, left_on = "Protein ID", right_on = "Protein ID")

Protein ID,Cut Site,Gene,Entry Name,Protein Description,Mapped Genes,Mapped Proteins,Half Tryptic,P-value,Adj. P-value,CV,FC
str,str,str,str,str,str,str,bool,f64,f64,f64,f64
"""P00350""","""T220""","""gnd""","""6PGD_ECOLI""","""6-phosphogluco…",,,true,0.000005,0.003443,0.248862,298.440571
"""P00350""","""T220""","""gnd""","""6PGD_ECOLI""","""6-phosphogluco…",,,true,0.000888,0.046487,0.036748,0.001673
"""P00350""","""T220""","""gnd""","""6PGD_ECOLI""","""6-phosphogluco…",,,true,0.000021,0.015424,0.081713,2.895886
"""P00350""","""T220""","""gnd""","""6PGD_ECOLI""","""6-phosphogluco…",,,true,0.006282,0.125755,0.141682,2.019546
"""P00350""","""T220""","""gnd""","""6PGD_ECOLI""","""6-phosphogluco…",,,true,0.000295,0.022081,0.036748,0.055145
"""P00350""","""T220""","""gnd""","""6PGD_ECOLI""","""6-phosphogluco…",,,true,0.003568,0.070705,0.036748,0.005559
"""P00350""","""T220""","""gnd""","""6PGD_ECOLI""","""6-phosphogluco…",,,true,0.004398,0.070705,0.066402,0.302598
"""P00350""","""T220""","""gnd""","""6PGD_ECOLI""","""6-phosphogluco…",,,true,0.001376,0.033619,0.13496,2.026846
"""P00350""","""T220""","""gnd""","""6PGD_ECOLI""","""6-phosphogluco…",,,true,0.002224,0.04306,0.507252,0.110709
"""P00350""","""T220""","""gnd""","""6PGD_ECOLI""","""6-phosphogluco…",,,true,0.006116,0.070705,0.116909,3.872074
