In [1]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt

from statsmodels.stats.multitest import multipletests

In [2]:
def read_results(workdir):
    df = {}
    offset = np.finfo(float).tiny # replace zero pvals with this value
    
    rlimma = pd.read_csv(workdir+"/DPE_deqms_central.tsv", sep="\t", index_col=0)
    print(f"rlimma: {rlimma.shape}")
    #rlimma = rlimma.applymap(lambda x: float(x.replace(",",".")))
    rlimma.loc[rlimma["sca.adj.pval"]==0,"sca.adj.pval"] = offset
    df["pv_Rdeqms"] = -np.log10(rlimma["sca.adj.pval"])
    df["lfc_Rdeqms"] = rlimma["logFC"]

    # fedprot
    fedprot = pd.read_csv(workdir+"/DPE_fedprot_app.csv", sep="\t", index_col=0)
    print(f"fedprot: {fedprot.shape}")
    fedprot.loc[fedprot["sca.adj.pval"]==0, "sca.adj.pval"] = offset
    df["pv_FedProt"] = -np.log10(fedprot["sca.adj.pval"])
    df["lfc_FedProt"] = fedprot["logFC"]

    # Fisher
    ma_cm = pd.read_csv(workdir+"/MA_CM.tsv", sep="\t")
    ma_cm.index = ma_cm["Symbol"].values
    df["lfc_Fisher"] = ma_cm["metafc"]
    _, adj_pval,_,_ = multipletests(ma_cm["metap"].values, alpha=0.05, method='fdr_bh',
                                           is_sorted=False, returnsorted=False)
    adj_pval[adj_pval==0] = offset
    df["pv_Fisher"] = -np.log10(pd.Series(adj_pval,index=ma_cm["metap"].index))
    

    # REM
    ma_rem = pd.read_csv(workdir+"/MA_REM.tsv", sep="\t")
    ma_rem.index = ma_rem["Symbol"].values
    df["lfc_REM"] = ma_rem["randomSummary"]
    _, adj_pval, _, _ = multipletests(ma_rem["randomP"].values, alpha=0.05, method='fdr_bh',
                                           is_sorted=False, returnsorted=False)
    adj_pval[adj_pval==0] = offset
    df["pv_REM"] = -np.log10(pd.Series(adj_pval,index=ma_rem["randomP"].index))

    ### Stoufer 
    stoufer  = pd.read_csv(workdir+"/MA_Stouffer.tsv", sep="\t", index_col=0)
    stoufer.loc[stoufer["FDR"]==0,"FDR"] = offset
    df["pv_Stouffer"] = -np.log10(stoufer["FDR"])
    df["lfc_Stouffer"] = df["lfc_Fisher"]  # take logFC from MetaVolcanoR
    ### RankProd
    rankprod  = pd.read_csv(workdir+"/MA_RankProd.tsv", sep="\t", index_col=0)
    rankprod["FDR"] = rankprod.loc[:,["down_reg.FDR","up_reg.FDR"]].min(axis=1)
    rankprod.loc[rankprod["FDR"]==0,"FDR"] = offset
    df["pv_RankProd"] = -np.log10(rankprod["FDR"])
    df["lfc_RankProd"] = rankprod["avgL2FC"] 
    
    df = pd.DataFrame.from_dict(df)
    df = df.dropna(axis=0)
    return df

In [3]:
root_dir = "/home/yuliya/repos/cosybio/FedProt/evaluation/"

dfs = {}

workdir = root_dir + "balanced" + '/results'
df = read_results(workdir)
dfs["Balanced"] = df

workdir = root_dir + "imbalanced"+ '/results'
df = read_results(workdir)
dfs["Imbalanced"] = df


print(dfs["Imbalanced"].shape)
dfs["Imbalanced"].head(10)

rlimma: (2299, 11)
fedprot: (2299, 13)
rlimma: (2299, 11)
fedprot: (2295, 13)
(2266, 12)


Unnamed: 0,pv_Rdeqms,lfc_Rdeqms,pv_FedProt,lfc_FedProt,lfc_Fisher,pv_Fisher,lfc_REM,pv_REM,pv_Stouffer,lfc_Stouffer,pv_RankProd,lfc_RankProd
A5A614,7.602458,0.828114,7.605322,-0.828114,0.85672,11.268901,0.801288,11.02517,10.708241,0.85672,1.695739,0.85672
P00350,9.969867,-0.195364,9.970624,0.195364,-0.193568,12.321236,-0.209903,11.616745,10.51589,-0.193568,0.584653,-0.193568
P00363,0.832283,0.090561,0.83236,-0.090561,0.088998,0.371287,0.084068,0.917463,0.002092,0.088998,0.200449,0.088998
P00370,15.143569,-0.533457,15.144325,0.533457,-0.528191,15.147751,-0.549734,30.441422,15.583554,-0.528191,1.898354,-0.528191
P00393,1.448721,-0.076153,1.44874,0.076153,-0.074333,0.888182,-0.062923,0.706181,0.177077,-0.074333,0.29001,-0.074333
P00448,14.314083,0.700476,14.31484,-0.700476,0.687096,15.771816,0.705081,7.330966,14.852193,0.687096,1.332784,0.687096
P00452,1.976964,0.124394,1.976935,-0.124394,0.136452,4.730906,0.137776,1.009838,4.548215,0.136452,0.272679,0.136452
P00490,35.859301,-0.805037,35.860057,0.805037,-0.813447,36.544421,-0.790988,72.64086,38.361404,-0.813447,3.071998,-0.813447
P00509,4.613787,0.172203,4.614543,-0.172203,0.174626,6.904187,0.170772,3.173364,4.355585,0.174626,0.33333,0.174626
P00547,12.163459,0.346525,12.164216,-0.346525,0.33801,15.282149,0.343487,8.513249,14.747525,0.33801,0.61829,0.33801
