In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
client=pd.read_table("result/client_go.txt", index_col=0)
fp=pd.read_table("result/fp_cc_go.txt", index_col=0)

In [None]:
with open("data/mlo_go_PhaSepDB.txt", "r") as f: 
    mlo_term=set(f.read().split("\n"))
    mlo_term.remove("")

In [None]:
fp_term=set(fp.index)
client_term=set(client.index)

In [None]:
terms=set([x.split()[-1] for x in mlo_term])&fp_term&client_term

In [None]:
import math
def process_df(df:pd.DataFrame):
    for i in df.index:
        r_in_study=df.at[i,"ratio_in_study"].split("/")
        r_in_pop=df.at[i,"ratio_in_pop"].split("/")
        study_n=float(r_in_study[0])
        study_all=float(r_in_study[1])
        pop_n=float(r_in_pop[0])
        pop_all=float(r_in_pop[1])
        study=study_n/(study_all-study_n)
        pop=(pop_n-study_n)/((pop_all-study_all)-(pop_n-study_n))
        p=float(df.at[i,"p_fdr_bh"])
        if p>0.05:
            signif="N.S."
        elif p>0.01:
            signif="*"
        elif p>0.001:
            signif="**"
        else:
            signif="***"
        if study!=0 and pop!=0:
            fold=math.log2(study/pop)
        else:
            fold=math.nan
        df.at[i,"odds_ratio"]=fold
        df.at[i,"significance"]=signif
    return df

In [None]:
client_processed=process_df(client.loc[[x.split()[-1] for x in terms], :])

In [None]:
fp_processed=process_df(fp.loc[[x.split()[-1] for x in terms], :])

In [None]:
client_processed["class"]="Known client"
fp_processed["class"]="Predicted client"

In [None]:
client_processed_s=client_processed.sort_values("p_fdr_bh")

In [None]:
fig = plt.figure(figsize=(10,6))
ax = fig.add_subplot(111)
sns.barplot(data=pd.concat([client_processed_s,fp_processed]), y="name", x="odds_ratio", hue="class", zorder=2, palette=["orangered", "gold"])
#plt.xlim([-5,5])
plt.grid(zorder=1)
ax.axvline(c="black")
ax.axhline(20.5,c="black", lw=1.2, linestyle="--")
plt.text(1,20.3,"↑",fontsize=20)
plt.text(1.3,18,"Significant enrichment",fontsize=12)
plt.text(1.3,19.3,"for known clients",fontsize=12)
for n,i in enumerate(client_processed_s.index):
    cli_sig=client_processed_s.at[i, "significance"]
    cli_x=client_processed_s.at[i, "odds_ratio"]
    fp_sig=fp_processed.at[i, "significance"]
    fp_x=fp_processed.at[i, "odds_ratio"]
    if cli_x<0:
        plt.text(cli_x-0.3,n,cli_sig,fontsize=6.5)
    else:
        plt.text(cli_x+0.1,n,cli_sig,fontsize=6.5)
    if fp_x<0:
        plt.text(fp_x-0.3,n+0.5,fp_sig,fontsize=6.5)
    else:
        plt.text(fp_x+0.1,n+0.5,fp_sig,fontsize=6.5)
#plt.plot([0,0], [0,34],color="green")
plt.xlabel("Enrichment: $log_{2}$(odds ratio)", fontsize=13)
plt.ylabel("GO terms associated with membraneless organelle", fontsize=13)
plt.legend(title = None, loc="upper left")
plt.tight_layout()
plt.savefig("result/fig3.pdf")
plt.show()