In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import re
from matplotlib.pyplot import subplots, style, rc
from tqdm import tqdm
from venn import venn, pseudovenn
from collections import defaultdict
from itertools import count, islice
from functools import lru_cache

In [2]:
style.use(["seaborn-poster", "seaborn-whitegrid"])
rc("axes", linewidth=1, edgecolor="black")
%matplotlib inline

In [9]:
from pickle import load

with open("for_wilcoxon-p_arm.pkl", mode="rb") as pkl:
    P = load(pkl)
with open("for_wilcoxon-q_arm.pkl", mode="rb") as pkl:
    Q = load(pkl)

In [7]:
from scipy.stats import wilcoxon

def wilcoxon_dropna(df, a, b):
    dfnona = df[[a, b]].dropna()
    try:
        yes = sum(dfnona[a] < dfnona[b])
        no = sum(dfnona[a] > dfnona[b])
        p = wilcoxon(dfnona[a], dfnona[b])[1]
        return yes, no, p
    except ValueError:
        return np.nan, np.nan, np.nan

In [22]:
from statsmodels.stats.multitest import multipletests

overall = pd.DataFrame(
    index=['s2t_p', 's2o_p', 't2o_p', 'aff_p', 'afm_p', 'cff_p', 'cfm_p'],
    columns=["p"],
    data=[
        wilcoxon_dropna(pd.concat(P.cd_list + Q.cd_list), "subject", "trio")[2],
        wilcoxon_dropna(pd.concat(P.cd_list + Q.cd_list), "subject", "outgroup")[2],
        wilcoxon_dropna(pd.concat(P.cd_list + Q.cd_list), "trio", "outgroup")[2],
        wilcoxon_dropna(pd.concat(P.aff_list + Q.aff_list), "father to son", "father to mother")[2],
        wilcoxon_dropna(pd.concat(P.afm_list + Q.afm_list), "mother to son", "mother to father")[2],
        wilcoxon_dropna(pd.concat(P.cff_list + Q.cff_list), "father to son", "father to mother")[2],
        wilcoxon_dropna(pd.concat(P.cfm_list + Q.cfm_list), "mother to son", "mother to father")[2],
    ],
)

overall["p_adjusted"] = multipletests(overall["p"], method="bonferroni")[1]

In [23]:
PRINT_NS = False

if PRINT_NS:
    format_pval = lambda p: "ns" if (p >= .05) else ("<1.0e-300" if (p < 1e-300) else format(p, ".1e"))
else:
    format_pval = lambda p: format(p, ".2f") if (p >= .05) else ("<1.0e-300" if (p < 1e-300) else format(p, ".1e"))

overall["p_adjusted_formatted"] = overall["p_adjusted"].apply(format_pval)
overall

Unnamed: 0,p,p_adjusted,p_adjusted_formatted
s2t_p,6.043496999999999e-57,4.2304479999999995e-56,4.2e-56
s2o_p,1.088936e-107,7.622554e-107,7.599999999999999e-107
t2o_p,3.2050069999999997e-41,2.243505e-40,2.2e-40
aff_p,5.317692e-12,3.722384e-11,3.7e-11
afm_p,0.1608129,1.0,1.0
cff_p,0.004794396,0.03356077,0.034
cfm_p,0.03224968,0.2257478,0.23
