# Enrichment analyses

Inputs:
* 250217_Arivale_Indiv.csv  
* 250217_Arivale_Indiv_FEMALE.csv  
* 250217_Arivale_Indiv_MALE.csv  
* 250217_Arivale_Indiv_delage.csv  
* 250217_Arivale_Indiv_delage_FEMALE.csv  
* 250217_Arivale_Indiv_delage_MALE.csv  
* 250217_TwinsUK_Indiv.csv  
* 250217_TwinsUK_Indiv_FEMALE.csv  
* 250217_TwinsUK_Indiv_MALE.csv  
* Arivale_metabolomics_metadata.csv

Outputs: 
* Metabolite enrichment analysis results in Supplementary Files 1 & 4, Supplementary Tables 1 & 2  
* Table 3 data

'Save' lines are commented out  
'Healthy' and 'Unhealthy' are renamed as 'Bio_Young' and 'Bio_Old' in figures and data files

In [1]:
import pandas as pd
from IPython.display import display
# get the library
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as sp
import seaborn as sns
from matplotlib import pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf
import statsmodels.stats.multitest as smm

In [2]:
# get metabolite annotations
met_met = pd.read_csv('/notebooks/0. APOE-Multiomics/Data_Files/Arivale_metabolomics_metadata.csv')

# Enrichment of pre-adjusted p < 0.05

In [3]:
arivale_APOE_result_df = pd.read_csv('/notebooks/0. APOE-Multiomics/Data_Files/250217_Arivale_Indiv.csv')

In [4]:
TwinsUK_APOE_result_df = pd.read_csv('/notebooks/0. APOE-Multiomics/Data_Files/250217_TwinsUK_Indiv.csv')

# Arivale APOE pre-adjusted enrichment

In [5]:
arivale_APOE_result_df['E2_assoc'] = 'ns'
arivale_APOE_result_df['E4_assoc'] = 'ns'

for i in arivale_APOE_result_df.index:
    if (arivale_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E2]_p'][i] < 0.05) and (arivale_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E2]'][i] > 0):
        arivale_APOE_result_df['E2_assoc'][i] = 'Positive'
    if (arivale_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E2]_p'][i] < 0.05) and (arivale_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E2]'][i] < 0):
        arivale_APOE_result_df['E2_assoc'][i] = 'Negative'
        
    if (arivale_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E4]_p'][i] < 0.05) and (arivale_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E4]'][i] > 0):
        arivale_APOE_result_df['E4_assoc'][i] = 'Positive'
    if (arivale_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E4]_p'][i] < 0.05) and (arivale_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E4]'][i] < 0):
        arivale_APOE_result_df['E4_assoc'][i] = 'Negative'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  arivale_APOE_result_df['E2_assoc'][i] = 'Positive'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  arivale_APOE_result_df['E4_assoc'][i] = 'Positive'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  arivale_APOE_result_df['E4_assoc'][i] = 'Negative'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  arivale_APOE_re

In [6]:
annots = met_met[['SUB_PATHWAY','SUPER_PATHWAY','BIOCHEMICAL_NAME']]
annots.rename(columns={"BIOCHEMICAL_NAME": "col1"},inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [7]:
arivale_APOE_result_df = arivale_APOE_result_df[['col1','E2_assoc','E4_assoc']]

In [8]:
arivale_enrich_df = arivale_APOE_result_df.merge(annots, how='left',on='col1')

In [9]:
arivale_enrich_df[(arivale_enrich_df.SUB_PATHWAY == 'Diacylglycerol')]

Unnamed: 0,col1,E2_assoc,E4_assoc,SUB_PATHWAY,SUPER_PATHWAY
0,oleoyl-linoleoyl-glycerol (18:1/18:2) [1],Positive,Positive,Diacylglycerol,Lipid
7,palmitoleoyl-arachidonoyl-glycerol (16:1/20:4)...,Positive,Positive,Diacylglycerol,Lipid
30,oleoyl-linoleoyl-glycerol (18:1/18:2) [2],Positive,Positive,Diacylglycerol,Lipid
64,palmitoyl-linoleoyl-glycerol (16:0/18:2) [1]*,Positive,Positive,Diacylglycerol,Lipid
72,palmitoyl-linoleoyl-glycerol (16:0/18:2) [2]*,Positive,ns,Diacylglycerol,Lipid
78,myristoyl-linoleoyl-glycerol (14:0/18:2) [1]*,Positive,ns,Diacylglycerol,Lipid
84,oleoyl-oleoyl-glycerol (18:1/18:1) [1]*,Positive,ns,Diacylglycerol,Lipid
153,oleoyl-oleoyl-glycerol (18:1/18:1) [2]*,Positive,ns,Diacylglycerol,Lipid
211,palmitoyl-oleoyl-glycerol (16:0/18:1) [2]*,Positive,ns,Diacylglycerol,Lipid
252,palmitoyl-oleoyl-glycerol (16:0/18:1) [1]*,Positive,ns,Diacylglycerol,Lipid


In [10]:
dict_all_analytes = {}

In [11]:
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    dict_all_analytes[i] = arivale_enrich_df[(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

In [12]:
# make dict for a each thing

In [13]:
E2_positive = {}
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    E2_positive[i] = arivale_enrich_df[(arivale_enrich_df.E2_assoc == 'Positive')&(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

E2_negative = {}
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    E2_negative[i] = arivale_enrich_df[(arivale_enrich_df.E2_assoc == 'Negative')&(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

E4_positive = {}
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    E4_positive[i] = arivale_enrich_df[(arivale_enrich_df.E4_assoc == 'Positive')&(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

E4_negative = {}
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    E4_negative[i] = arivale_enrich_df[(arivale_enrich_df.E4_assoc == 'Negative')&(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()


In [14]:
def calc_enrichment_bckgset(test_set, measured_set_dict, thr=0.05):
    results = pd.DataFrame(columns = ['total_size','total_in_set', 'overall_test_size', 'test_in_set_aka_overlap','pvalue', 'bon_adj_pvalue', 'fdr_adj_pvalue'])
    measured_list = set([y for x in list(measured_set_dict.values()) for y in x])
    test_list = set([y for x in list(test_set.values()) for y in x])
    
    N = len(measured_list)
    n = len(test_list)
    for set_name in measured_set_dict.keys():
        results.loc[set_name, 'total_size'] = N
        K = len(measured_set_dict[set_name])
        results.loc[set_name, 'total_in_set'] = K
        results.loc[set_name, 'overall_test_size'] = n
        k = len(test_set[set_name])
        results.loc[set_name, 'test_in_set_aka_overlap'] = k
        pvalue =  sp.hypergeom.sf(k - 1,N,K,n)
        results.loc[set_name, 'pvalue'] = pvalue
    multitester= smm.multipletests(results.pvalue.values.tolist(), alpha=thr, method='fdr_bh', is_sorted=False, returnsorted=False)
    results['fdr_adj_pvalue'] = multitester[1]
    results['bon_adj_pvalue'] = [x*len(results) for x in results.pvalue.values.tolist()]
    results.sort_values(by='pvalue',ascending=True,inplace=True)
    return results

In [15]:
E2_pos_results = calc_enrichment_bckgset(E2_positive, dict_all_analytes, thr=0.05)

In [16]:
E2_neg_results = calc_enrichment_bckgset(E2_negative, dict_all_analytes, thr=0.05)

In [17]:
E4_pos_results = calc_enrichment_bckgset(E4_positive, dict_all_analytes, thr=0.05)

In [18]:
E4_neg_results = calc_enrichment_bckgset(E4_negative, dict_all_analytes, thr=0.05)

In [19]:
E2_pos_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Diacylglycerol,729,13,52,13,0.0,2.279082e-14,2.279082e-14
Plasmalogen,729,21,52,6,0.002334,1.983831e-01,7.502838e-02
Long Chain Fatty Acid,729,15,52,5,0.002648,2.250852e-01,7.502838e-02
Monoacylglycerol,729,4,52,2,0.027303,2.320716e+00,5.801789e-01
"Glycolysis, Gluconeogenesis, and Pyruvate Metabolism",729,6,52,2,0.062216,5.288385e+00,1.000000e+00
...,...,...,...,...,...,...,...
Ascorbate and Aldarate Metabolism,729,3,52,0,1.0,8.500000e+01,1.000000e+00
"Pyrimidine Metabolism, Uracil containing",729,9,52,0,1.0,8.500000e+01,1.000000e+00
"Glycine, Serine and Threonine Metabolism",729,9,52,0,1.0,8.500000e+01,1.000000e+00
Dipeptide,729,5,52,0,1.0,8.500000e+01,1.000000e+00


In [20]:
E2_neg_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Sphingolipid Metabolism,729,44,19,9,0.0,0.000023,0.000023
Sphingomyelins,729,1,19,1,0.026063,2.215364,0.738455
Ceramide PEs,729,1,19,1,0.026063,2.215364,0.738455
Vitamin B6 Metabolism,729,2,19,1,0.051482,4.375952,1.000000
"Fatty Acid, Amino",729,3,19,1,0.076271,6.483045,1.000000
...,...,...,...,...,...,...,...
Fatty Acid Metabolism(Acyl Carnitine),729,32,19,0,1.0,85.000000,1.000000
"Purine Metabolism, Adenine containing",729,5,19,0,1.0,85.000000,1.000000
Chemical,729,13,19,0,1.0,85.000000,1.000000
Lysine Metabolism,729,11,19,0,1.0,85.000000,1.000000


In [21]:
E4_pos_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Lysolipid,729,46,36,8,0.001071,0.091033,0.091033
Diacylglycerol,729,13,36,4,0.002613,0.222071,0.111036
Monoacylglycerol,729,4,36,2,0.013372,1.136610,0.378870
Polyamine Metabolism,729,7,36,2,0.042597,3.620721,0.905180
Phospholipid Metabolism,729,73,36,7,0.058385,4.962700,0.992540
...,...,...,...,...,...,...,...
Partially Characterized Molecules,729,5,36,0,1.0,85.000000,1.000000
"Purine Metabolism, Guanine containing",729,2,36,0,1.0,85.000000,1.000000
Urea cycle; Arginine and Proline Metabolism,729,17,36,0,1.0,85.000000,1.000000
Ceramides,729,7,36,0,1.0,85.000000,1.000000


In [22]:
E4_neg_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Plasmalogen,729,21,13,3,0.004904,0.416820,0.41682
Androgenic Steroids,729,2,13,1,0.035371,3.006565,1.00000
"Methionine, Cysteine, SAM and Taurine Metabolism",729,18,13,2,0.038252,3.251435,1.00000
Creatine Metabolism,729,3,13,1,0.052621,4.472747,1.00000
"Glycolysis, Gluconeogenesis, and Pyruvate Metabolism",729,6,13,1,0.102675,8.727354,1.00000
...,...,...,...,...,...,...,...
Ascorbate and Aldarate Metabolism,729,3,13,0,1.0,85.000000,1.00000
"Pyrimidine Metabolism, Uracil containing",729,9,13,0,1.0,85.000000,1.00000
"Glycine, Serine and Threonine Metabolism",729,9,13,0,1.0,85.000000,1.00000
Partially Characterized Molecules,729,5,13,0,1.0,85.000000,1.00000


# TwinsUK pre-adjusted enrichment

In [23]:
TwinsUK_APOE_result_df['E2_assoc'] = 'ns'
TwinsUK_APOE_result_df['E4_assoc'] = 'ns'

for i in TwinsUK_APOE_result_df.index:
    if (TwinsUK_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E2]_p'][i] < 0.05) and (TwinsUK_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E2]'][i] > 0):
        TwinsUK_APOE_result_df['E2_assoc'][i] = 'Positive'
    if (TwinsUK_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E2]_p'][i] < 0.05) and (TwinsUK_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E2]'][i] < 0):
        TwinsUK_APOE_result_df['E2_assoc'][i] = 'Negative'
        
    if (TwinsUK_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E4]_p'][i] < 0.05) and (TwinsUK_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E4]'][i] > 0):
        TwinsUK_APOE_result_df['E4_assoc'][i] = 'Positive'
    if (TwinsUK_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E4]_p'][i] < 0.05) and (TwinsUK_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E4]'][i] < 0):
        TwinsUK_APOE_result_df['E4_assoc'][i] = 'Negative'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  TwinsUK_APOE_result_df['E4_assoc'][i] = 'Positive'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  TwinsUK_APOE_result_df['E4_assoc'][i] = 'Negative'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  TwinsUK_APOE_result_df['E2_assoc'][i] = 'Negative'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  TwinsUK_APOE_re

In [24]:
annots = met_met[['SUB_PATHWAY','SUPER_PATHWAY','BIOCHEMICAL_NAME']]
annots.rename(columns={"BIOCHEMICAL_NAME": "col1"},inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [25]:
TwinsUK_APOE_result_df = TwinsUK_APOE_result_df[['col1','E2_assoc','E4_assoc']]

In [26]:
TwinsUK_enrich_df = TwinsUK_APOE_result_df.merge(annots, how='left',on='col1')

In [27]:
TwinsUK_enrich_df[(TwinsUK_enrich_df.SUB_PATHWAY == 'Diacylglycerol')]

Unnamed: 0,col1,E2_assoc,E4_assoc,SUB_PATHWAY,SUPER_PATHWAY
39,oleoyl-linoleoyl-glycerol (18:1/18:2) [2],Positive,Positive,Diacylglycerol,Lipid
56,oleoyl-linoleoyl-glycerol (18:1/18:2) [1],Positive,Positive,Diacylglycerol,Lipid
71,palmitoyl-linoleoyl-glycerol (16:0/18:2) [1]*,Positive,Positive,Diacylglycerol,Lipid
98,palmitoyl-linoleoyl-glycerol (16:0/18:2) [2]*,Positive,ns,Diacylglycerol,Lipid


In [28]:
dict_all_analytes = {}

In [29]:
for i in TwinsUK_enrich_df.SUB_PATHWAY.unique():
    dict_all_analytes[i] = TwinsUK_enrich_df[(TwinsUK_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

In [30]:
# make dict for a each thing

In [31]:
E2_positive = {}
for i in TwinsUK_enrich_df.SUB_PATHWAY.unique():
    E2_positive[i] = TwinsUK_enrich_df[(TwinsUK_enrich_df.E2_assoc == 'Positive')&(TwinsUK_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

E2_negative = {}
for i in TwinsUK_enrich_df.SUB_PATHWAY.unique():
    E2_negative[i] = TwinsUK_enrich_df[(TwinsUK_enrich_df.E2_assoc == 'Negative')&(TwinsUK_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

E4_positive = {}
for i in TwinsUK_enrich_df.SUB_PATHWAY.unique():
    E4_positive[i] = TwinsUK_enrich_df[(TwinsUK_enrich_df.E4_assoc == 'Positive')&(TwinsUK_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

E4_negative = {}
for i in TwinsUK_enrich_df.SUB_PATHWAY.unique():
    E4_negative[i] = TwinsUK_enrich_df[(TwinsUK_enrich_df.E4_assoc == 'Negative')&(TwinsUK_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()


In [32]:
def calc_enrichment_bckgset(test_set, measured_set_dict, thr=0.05):
    results = pd.DataFrame(columns = ['total_size','total_in_set', 'overall_test_size', 'test_in_set_aka_overlap','pvalue', 'bon_adj_pvalue', 'fdr_adj_pvalue'])
    measured_list = set([y for x in list(measured_set_dict.values()) for y in x])
    test_list = set([y for x in list(test_set.values()) for y in x])
    
    N = len(measured_list)
    n = len(test_list)
    for set_name in measured_set_dict.keys():
        results.loc[set_name, 'total_size'] = N
        K = len(measured_set_dict[set_name])
        results.loc[set_name, 'total_in_set'] = K
        results.loc[set_name, 'overall_test_size'] = n
        k = len(test_set[set_name])
        results.loc[set_name, 'test_in_set_aka_overlap'] = k
        pvalue =  sp.hypergeom.sf(k - 1,N,K,n)
        results.loc[set_name, 'pvalue'] = pvalue
    multitester= smm.multipletests(results.pvalue.values.tolist(), alpha=thr, method='fdr_bh', is_sorted=False, returnsorted=False)
    results['fdr_adj_pvalue'] = multitester[1]
    results['bon_adj_pvalue'] = [x*len(results) for x in results.pvalue.values.tolist()]
    results.sort_values(by='pvalue',ascending=True,inplace=True)
    return results

In [33]:
E2_pos_results = calc_enrichment_bckgset(E2_positive, dict_all_analytes, thr=0.05)

In [34]:
E2_neg_results = calc_enrichment_bckgset(E2_negative, dict_all_analytes, thr=0.05)

In [35]:
E4_pos_results = calc_enrichment_bckgset(E4_positive, dict_all_analytes, thr=0.05)

In [36]:
E4_neg_results = calc_enrichment_bckgset(E4_negative, dict_all_analytes, thr=0.05)

In [37]:
E2_pos_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Diacylglycerol,523,4,34,4,0.000015,0.001099,0.001099
Steroid,523,22,34,5,0.010159,0.741583,0.370791
Phenylalanine and Tyrosine Metabolism,523,22,34,4,0.047159,3.442642,1.000000
Phospholipid Metabolism,523,44,34,6,0.055545,4.054775,1.000000
Carnitine Metabolism,523,2,34,1,0.125909,9.191380,1.000000
...,...,...,...,...,...,...,...
Nicotinate and Nicotinamide Metabolism,523,5,34,0,1.0,73.000000,1.000000
Bacterial/Fungal,523,1,34,0,1.0,73.000000,1.000000
"Glycine, Serine and Threonine Metabolism",523,7,34,0,1.0,73.000000,1.000000
Lysoplasmalogen,523,6,34,0,1.0,73.000000,1.000000


In [38]:
E2_neg_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Sphingolipid Metabolism,523,19,15,6,0.000004,0.000295,0.000295
Sterol,523,2,15,1,0.056592,4.131228,1.000000
Ascorbate and Aldarate Metabolism,523,3,15,1,0.083754,6.114014,1.000000
Phenylalanine and Tyrosine Metabolism,523,22,15,2,0.127368,9.297882,1.000000
Chemical,523,6,15,1,0.160923,11.747394,1.000000
...,...,...,...,...,...,...,...
Nicotinate and Nicotinamide Metabolism,523,5,15,0,1.0,73.000000,1.000000
Bacterial/Fungal,523,1,15,0,1.0,73.000000,1.000000
Urea cycle; Arginine and Proline Metabolism,523,13,15,0,1.0,73.000000,1.000000
Ketone Bodies,523,1,15,0,1.0,73.000000,1.000000


In [39]:
E4_pos_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Lysolipid,523,44,39,18,0.0,1.615608e-09,1.615608e-09
Monoacylglycerol,523,8,39,6,0.000003,2.157680e-04,1.078840e-04
Diacylglycerol,523,4,39,3,0.001462,1.067251e-01,3.557502e-02
Lyso-phospho-ether,523,2,39,1,0.143711,1.049091e+01,1.000000e+00
Sterol,523,2,39,1,0.143711,1.049091e+01,1.000000e+00
...,...,...,...,...,...,...,...
Urea cycle; Arginine and Proline Metabolism,523,13,39,0,1.0,7.300000e+01,1.000000e+00
"Glycine, Serine and Threonine Metabolism",523,7,39,0,1.0,7.300000e+01,1.000000e+00
"Purine Metabolism, (Hypo)Xanthine/Inosine containing",523,5,39,0,1.0,7.300000e+01,1.000000e+00
"Fatty Acid, Amino",523,2,39,0,1.0,7.300000e+01,1.000000e+00


In [40]:
E4_neg_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Xanthine Metabolism,523,12,15,4,0.000191,0.013953,0.013953
Ketone Bodies,523,1,15,1,0.028681,2.093690,1.000000
Glycerolipid Metabolism,523,2,15,1,0.056592,4.131228,1.000000
Phenylalanine and Tyrosine Metabolism,523,22,15,2,0.127368,9.297882,1.000000
Alanine and Aspartate Metabolism,523,5,15,1,0.135901,9.920775,1.000000
...,...,...,...,...,...,...,...
Urea cycle; Arginine and Proline Metabolism,523,13,15,0,1.0,73.000000,1.000000
"Glycine, Serine and Threonine Metabolism",523,7,15,0,1.0,73.000000,1.000000
"Purine Metabolism, (Hypo)Xanthine/Inosine containing",523,5,15,0,1.0,73.000000,1.000000
"Fatty Acid, Amino",523,2,15,0,1.0,73.000000,1.000000


# Arivale delta age status pre-adjusted

In [41]:
arivale_delta_age_result_df = pd.read_csv('/notebooks/0. APOE-Multiomics/Data_Files/250217_Arivale_Indiv_delage.csv')

In [42]:
arivale_delta_age_result_df['bioyoung_assoc'] = 'ns'
arivale_delta_age_result_df['bioold_assoc'] = 'ns'

for i in arivale_delta_age_result_df.index:
    if (arivale_delta_age_result_df['C(Model_Health_Indiv, Treatment(reference=1))[T.Healthy]_p'][i] < 0.05) and (arivale_delta_age_result_df['C(Model_Health_Indiv, Treatment(reference=1))[T.Healthy]'][i] > 0):
        arivale_delta_age_result_df['bioyoung_assoc'][i] = 'Positive'
    if (arivale_delta_age_result_df['C(Model_Health_Indiv, Treatment(reference=1))[T.Healthy]_p'][i] < 0.05) and (arivale_delta_age_result_df['C(Model_Health_Indiv, Treatment(reference=1))[T.Healthy]'][i] < 0):
        arivale_delta_age_result_df['bioyoung_assoc'][i] = 'Negative'
        
    if (arivale_delta_age_result_df['C(Model_Health_Indiv, Treatment(reference=1))[T.Unhealthy]_p'][i] < 0.05) and (arivale_delta_age_result_df['C(Model_Health_Indiv, Treatment(reference=1))[T.Unhealthy]'][i] > 0):
        arivale_delta_age_result_df['bioold_assoc'][i] = 'Positive'
    if (arivale_delta_age_result_df['C(Model_Health_Indiv, Treatment(reference=1))[T.Unhealthy]_p'][i] < 0.05) and (arivale_delta_age_result_df['C(Model_Health_Indiv, Treatment(reference=1))[T.Unhealthy]'][i] < 0):
        arivale_delta_age_result_df['bioold_assoc'][i] = 'Negative'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  arivale_delta_age_result_df['bioold_assoc'][i] = 'Negative'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  arivale_delta_age_result_df['bioyoung_assoc'][i] = 'Negative'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  arivale_delta_age_result_df['bioold_assoc'][i] = 'Positive'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-ve

In [43]:
annots = met_met[['SUB_PATHWAY','SUPER_PATHWAY','BIOCHEMICAL_NAME']]
annots.rename(columns={"BIOCHEMICAL_NAME": "col1"},inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [44]:
arivale_delta_age_result_df = arivale_delta_age_result_df[['col1','bioyoung_assoc','bioold_assoc']]

In [45]:
arivale_enrich_df = arivale_delta_age_result_df.merge(annots, how='left',on='col1')

In [46]:
dict_all_analytes = {}

In [47]:
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    dict_all_analytes[i] = arivale_enrich_df[(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

In [48]:
# make dict for a each thing

In [49]:
bioyoung_positive = {}
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    bioyoung_positive[i] = arivale_enrich_df[(arivale_enrich_df.bioyoung_assoc == 'Positive')&(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

bioyoung_negative = {}
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    bioyoung_negative[i] = arivale_enrich_df[(arivale_enrich_df.bioyoung_assoc == 'Negative')&(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

bioold_positive = {}
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    bioold_positive[i] = arivale_enrich_df[(arivale_enrich_df.bioold_assoc == 'Positive')&(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

bioold_negative = {}
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    bioold_negative[i] = arivale_enrich_df[(arivale_enrich_df.bioold_assoc == 'Negative')&(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()


In [50]:
bioyoung_pos_results = calc_enrichment_bckgset(bioyoung_positive, dict_all_analytes, thr=0.05)

In [51]:
bioyoung_neg_results = calc_enrichment_bckgset(bioyoung_negative, dict_all_analytes, thr=0.05)

In [52]:
bioold_pos_results = calc_enrichment_bckgset(bioold_positive, dict_all_analytes, thr=0.05)

In [53]:
bioold_neg_results = calc_enrichment_bckgset(bioold_negative, dict_all_analytes, thr=0.05)

In [54]:
bioyoung_pos_results.head(n=10)

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Long Chain Fatty Acid,729,15,51,7,2.3e-05,0.001936,0.001936
Steroid,729,26,51,6,0.006789,0.577024,0.288512
Fatty Acid Metabolism(Acyl Carnitine),729,32,51,6,0.019256,1.636783,0.545594
Endocannabinoid,729,4,51,2,0.026303,2.235724,0.558931
"Purine Metabolism, (Hypo)Xanthine/Inosine containing",729,6,51,2,0.06005,5.104221,0.991084
Phosphatidylserine (PS),729,1,51,1,0.069959,5.946502,0.991084
"Methionine, Cysteine, SAM and Taurine Metabolism",729,18,51,3,0.125027,10.627288,1.0
"Pyrimidine Metabolism, Thymine containing",729,2,51,1,0.135113,11.484591,1.0
Sphingolipid Metabolism,729,44,51,5,0.186583,15.859517,1.0
Ascorbate and Aldarate Metabolism,729,3,51,1,0.195786,16.641793,1.0


In [55]:
bioyoung_neg_results.head(n=10)

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Plasmalogen,729,21,88,12,1e-06,4.7e-05,4.7e-05
Urea cycle; Arginine and Proline Metabolism,729,17,88,6,0.010741,0.912989,0.30655
"Leucine, Isoleucine and Valine Metabolism",729,28,88,8,0.013247,1.126007,0.30655
Carnitine Metabolism,729,2,88,2,0.014426,1.226202,0.30655
Fatty Acid Metabolism (Acyl Choline),729,3,88,2,0.039865,3.3885,0.649662
Histidine Metabolism,729,12,88,4,0.045859,3.897975,0.649662
Chemical,729,13,88,4,0.060274,5.123328,0.731904
Gamma-glutamyl Amino Acid,729,14,88,4,0.076817,6.529404,0.816176
Dipeptide,729,5,88,2,0.113033,9.607833,0.855053
Partially Characterized Molecules,729,5,88,2,0.113033,9.607833,0.855053


In [56]:
bioold_pos_results.head(n=10)

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Plasmalogen,729,21,158,17,0.0,5.640329e-07,5.640329e-07
"Leucine, Isoleucine and Valine Metabolism",729,28,158,15,0.000159,0.01350345,0.006751724
Polyamine Metabolism,729,7,158,6,0.000552,0.04688595,0.01562865
Urea cycle; Arginine and Proline Metabolism,729,17,158,8,0.016569,1.408355,0.3520888
Histidine Metabolism,729,12,158,6,0.027291,2.319732,0.4639465
Phospholipid Metabolism,729,73,158,22,0.047927,4.073792,0.6789653
Phenylalanine and Tyrosine Metabolism,729,21,158,8,0.062542,5.316081,0.7533783
Partially Characterized Molecules,729,5,158,3,0.070906,6.027026,0.7533783
"Fructose, Mannose and Galactose Metabolism",729,3,158,2,0.120164,10.2139,1.0
"Methionine, Cysteine, SAM and Taurine Metabolism",729,18,158,6,0.174629,14.84346,1.0


In [57]:
bioold_neg_results.head(n=10)

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Steroid,729,26,31,4,0.020753,1.763995,0.952232
Tryptophan Metabolism,729,15,31,3,0.022405,1.904465,0.952232
"Fatty Acid, Dicarboxylate",729,22,31,3,0.061953,5.265975,1.0
Androgenic Steroids,729,2,31,1,0.083296,7.08013,1.0
Carnitine Metabolism,729,2,31,1,0.083296,7.08013,1.0
Glutathione Metabolism,729,3,31,1,0.122385,10.402711,1.0
Creatine Metabolism,729,3,31,1,0.122385,10.402711,1.0
Nicotinate and Nicotinamide Metabolism,729,5,31,1,0.195782,16.641473,1.0
"Glycolysis, Gluconeogenesis, and Pyruvate Metabolism",729,6,31,1,0.230217,19.568427,1.0
Glutamate Metabolism,729,7,31,1,0.263223,22.37393,1.0


# Arivale APOE male, pre-adjusted

In [58]:
arivale_APOE_result_df = pd.read_csv('/notebooks/0. APOE-Multiomics/Data_Files/250217_Arivale_Indiv_MALE.csv')

In [59]:
arivale_APOE_result_df['E2_assoc'] = 'ns'
arivale_APOE_result_df['E4_assoc'] = 'ns'

for i in arivale_APOE_result_df.index:
    if (arivale_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E2]_p'][i] < 0.05) and (arivale_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E2]'][i] > 0):
        arivale_APOE_result_df['E2_assoc'][i] = 'Positive'
    if (arivale_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E2]_p'][i] < 0.05) and (arivale_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E2]'][i] < 0):
        arivale_APOE_result_df['E2_assoc'][i] = 'Negative'
        
    if (arivale_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E4]_p'][i] < 0.05) and (arivale_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E4]'][i] > 0):
        arivale_APOE_result_df['E4_assoc'][i] = 'Positive'
    if (arivale_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E4]_p'][i] < 0.05) and (arivale_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E4]'][i] < 0):
        arivale_APOE_result_df['E4_assoc'][i] = 'Negative'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  arivale_APOE_result_df['E4_assoc'][i] = 'Positive'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  arivale_APOE_result_df['E2_assoc'][i] = 'Positive'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  arivale_APOE_result_df['E4_assoc'][i] = 'Negative'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  arivale_APOE_re

In [60]:
annots = met_met[['SUB_PATHWAY','SUPER_PATHWAY','BIOCHEMICAL_NAME']]
annots.rename(columns={"BIOCHEMICAL_NAME": "col1"},inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [61]:
arivale_APOE_result_df = arivale_APOE_result_df[['col1','E2_assoc','E4_assoc']]

In [62]:
arivale_enrich_df = arivale_APOE_result_df.merge(annots, how='left',on='col1')

In [63]:
arivale_enrich_df[(arivale_enrich_df.SUB_PATHWAY == 'Diacylglycerol')]

Unnamed: 0,col1,E2_assoc,E4_assoc,SUB_PATHWAY,SUPER_PATHWAY
9,oleoyl-linoleoyl-glycerol (18:1/18:2) [1],ns,Positive,Diacylglycerol,Lipid
15,oleoyl-linoleoyl-glycerol (18:1/18:2) [2],ns,Positive,Diacylglycerol,Lipid
43,palmitoyl-linoleoyl-glycerol (16:0/18:2) [1]*,ns,Positive,Diacylglycerol,Lipid
75,myristoyl-linoleoyl-glycerol (14:0/18:2) [1]*,ns,ns,Diacylglycerol,Lipid
80,palmitoleoyl-arachidonoyl-glycerol (16:1/20:4)...,Positive,ns,Diacylglycerol,Lipid
99,oleoyl-oleoyl-glycerol (18:1/18:1) [2]*,ns,ns,Diacylglycerol,Lipid
108,oleoyl-oleoyl-glycerol (18:1/18:1) [1]*,Positive,ns,Diacylglycerol,Lipid
118,stearoyl-arachidonoyl-glycerol (18:0/20:4) [1]*,Positive,ns,Diacylglycerol,Lipid
166,palmitoyl-docosahexaenoyl-glycerol (16:0/22:6)...,Positive,ns,Diacylglycerol,Lipid
206,palmitoyl-linoleoyl-glycerol (16:0/18:2) [2]*,ns,ns,Diacylglycerol,Lipid


In [64]:
dict_all_analytes = {}

In [65]:
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    dict_all_analytes[i] = arivale_enrich_df[(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

In [66]:
# make dict for a each thing

In [67]:
E2_positive = {}
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    E2_positive[i] = arivale_enrich_df[(arivale_enrich_df.E2_assoc == 'Positive')&(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

E2_negative = {}
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    E2_negative[i] = arivale_enrich_df[(arivale_enrich_df.E2_assoc == 'Negative')&(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

E4_positive = {}
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    E4_positive[i] = arivale_enrich_df[(arivale_enrich_df.E4_assoc == 'Positive')&(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

E4_negative = {}
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    E4_negative[i] = arivale_enrich_df[(arivale_enrich_df.E4_assoc == 'Negative')&(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()


In [68]:
E2_pos_results = calc_enrichment_bckgset(E2_positive, dict_all_analytes, thr=0.05)

In [69]:
E2_neg_results = calc_enrichment_bckgset(E2_negative, dict_all_analytes, thr=0.05)

In [70]:
E4_pos_results = calc_enrichment_bckgset(E4_positive, dict_all_analytes, thr=0.05)

In [71]:
E4_neg_results = calc_enrichment_bckgset(E4_negative, dict_all_analytes, thr=0.05)

In [72]:
E2_pos_results.head(n=10)

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Diacylglycerol,729,13,36,6,1.3e-05,0.001079,0.001079
"Leucine, Isoleucine and Valine Metabolism",729,28,36,6,0.001615,0.137248,0.068624
Mevalonate Metabolism,729,1,36,1,0.049383,4.197531,1.0
Sphingolipid Metabolism,729,44,36,5,0.058721,4.991306,1.0
Plasmalogen,729,21,36,3,0.079706,6.775,1.0
Phospholipid Metabolism,729,73,36,6,0.140798,11.967813,1.0
Fatty Acid Metabolism (also BCAA Metabolism),729,3,36,1,0.141137,11.996617,1.0
Gamma-glutamyl Amino Acid,729,14,36,2,0.148724,12.641561,1.0
Glutamate Metabolism,729,7,36,1,0.299536,25.460528,1.0
Ceramides,729,7,36,1,0.299536,25.460528,1.0


In [73]:
E2_neg_results.head(n=10)

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Pantothenate and CoA Metabolism,729,1,11,1,0.015089,1.282579,0.503008
Bacterial/Fungal,729,1,11,1,0.015089,1.282579,0.503008
Phospholipid Metabolism,729,73,11,4,0.017753,1.509023,0.503008
"Fatty Acid, Amino",729,3,11,1,0.044648,3.795101,0.75902
Ascorbate and Aldarate Metabolism,729,3,11,1,0.044648,3.795101,0.75902
"Glycolysis, Gluconeogenesis, and Pyruvate Metabolism",729,6,11,1,0.087477,7.435532,1.0
Food Component/Plant,729,30,11,1,0.372193,31.636433,1.0
Lysolipid,729,46,11,1,0.514269,43.712847,1.0
,729,0,11,0,1.0,85.0,1.0
Long Chain Fatty Acid,729,15,11,0,1.0,85.0,1.0


In [74]:
E4_pos_results.head(n=10)

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Phospholipid Metabolism,729,73,23,14,0.0,1.027875e-07,1.027875e-07
Diacylglycerol,729,13,23,3,0.006395,0.5435414,0.2717707
"Fatty Acid Metabolism (Acyl Carnitine, Monounsaturated)",729,1,23,1,0.03155,2.681756,0.8939186
Tocopherol Metabolism,729,3,23,1,0.091817,7.804482,1.0
Secondary Bile Acid Metabolism,729,14,23,1,0.364242,30.96057,1.0
Phenylalanine and Tyrosine Metabolism,729,21,23,1,0.494797,42.05778,1.0
"Leucine, Isoleucine and Valine Metabolism",729,28,23,1,0.599473,50.95522,1.0
Food Component/Plant,729,30,23,1,0.625343,53.15415,1.0
Hemoglobin and Porphyrin Metabolism,729,4,23,0,1.0,85.0,1.0
Carnitine Metabolism,729,2,23,0,1.0,85.0,1.0


In [75]:
E4_neg_results.head(n=10)

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Androgenic Steroids,729,2,4,1,0.010951,0.930863,0.696708
Creatine Metabolism,729,3,4,1,0.016393,1.393416,0.696708
Chemical,729,13,4,1,0.069585,5.914695,1.0
"Methionine, Cysteine, SAM and Taurine Metabolism",729,18,4,1,0.095356,8.105296,1.0
,729,0,4,0,1.0,85.0,1.0
"Glycolysis, Gluconeogenesis, and Pyruvate Metabolism",729,6,4,0,1.0,85.0,1.0
Long Chain Fatty Acid,729,15,4,0,1.0,85.0,1.0
Phosphatidylserine (PS),729,1,4,0,1.0,85.0,1.0
Fatty Acid Metabolism(Acyl Carnitine),729,32,4,0,1.0,85.0,1.0
"Fructose, Mannose and Galactose Metabolism",729,3,4,0,1.0,85.0,1.0


# Arivale APOE female, pre-adjusted

In [76]:
arivale_APOE_result_df = pd.read_csv('/notebooks/0. APOE-Multiomics/Data_Files/250217_Arivale_Indiv_FEMALE.csv')

In [77]:
arivale_APOE_result_df['E2_assoc'] = 'ns'
arivale_APOE_result_df['E4_assoc'] = 'ns'

for i in arivale_APOE_result_df.index:
    if (arivale_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E2]_p'][i] < 0.05) and (arivale_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E2]'][i] > 0):
        arivale_APOE_result_df['E2_assoc'][i] = 'Positive'
    if (arivale_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E2]_p'][i] < 0.05) and (arivale_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E2]'][i] < 0):
        arivale_APOE_result_df['E2_assoc'][i] = 'Negative'
        
    if (arivale_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E4]_p'][i] < 0.05) and (arivale_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E4]'][i] > 0):
        arivale_APOE_result_df['E4_assoc'][i] = 'Positive'
    if (arivale_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E4]_p'][i] < 0.05) and (arivale_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E4]'][i] < 0):
        arivale_APOE_result_df['E4_assoc'][i] = 'Negative'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  arivale_APOE_result_df['E2_assoc'][i] = 'Negative'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  arivale_APOE_result_df['E4_assoc'][i] = 'Positive'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  arivale_APOE_result_df['E4_assoc'][i] = 'Negative'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  arivale_APOE_re

In [78]:
annots = met_met[['SUB_PATHWAY','SUPER_PATHWAY','BIOCHEMICAL_NAME']]
annots.rename(columns={"BIOCHEMICAL_NAME": "col1"},inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [79]:
arivale_APOE_result_df = arivale_APOE_result_df[['col1','E2_assoc','E4_assoc']]

In [80]:
arivale_enrich_df = arivale_APOE_result_df.merge(annots, how='left',on='col1')

In [81]:
arivale_enrich_df[(arivale_enrich_df.SUB_PATHWAY == 'Diacylglycerol')]

Unnamed: 0,col1,E2_assoc,E4_assoc,SUB_PATHWAY,SUPER_PATHWAY
540,myristoyl-linoleoyl-glycerol (14:0/18:2) [1]*,Positive,ns,Diacylglycerol,Lipid
542,oleoyl-linoleoyl-glycerol (18:1/18:2) [1],Positive,ns,Diacylglycerol,Lipid
547,palmitoyl-linoleoyl-glycerol (16:0/18:2) [2]*,Positive,ns,Diacylglycerol,Lipid
560,oleoyl-linoleoyl-glycerol (18:1/18:2) [2],Positive,ns,Diacylglycerol,Lipid
578,palmitoyl-oleoyl-glycerol (16:0/18:1) [1]*,Positive,ns,Diacylglycerol,Lipid
590,palmitoyl-oleoyl-glycerol (16:0/18:1) [2]*,Positive,ns,Diacylglycerol,Lipid
598,stearoyl-arachidonoyl-glycerol (18:0/20:4) [2]*,Positive,ns,Diacylglycerol,Lipid
606,oleoyl-oleoyl-glycerol (18:1/18:1) [1]*,Positive,ns,Diacylglycerol,Lipid
607,oleoyl-oleoyl-glycerol (18:1/18:1) [2]*,Positive,ns,Diacylglycerol,Lipid
624,palmitoyl-linoleoyl-glycerol (16:0/18:2) [1]*,Positive,ns,Diacylglycerol,Lipid


In [82]:
dict_all_analytes = {}

In [83]:
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    dict_all_analytes[i] = arivale_enrich_df[(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

In [84]:
# make dict for a each thing

In [85]:
E2_positive = {}
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    E2_positive[i] = arivale_enrich_df[(arivale_enrich_df.E2_assoc == 'Positive')&(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

E2_negative = {}
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    E2_negative[i] = arivale_enrich_df[(arivale_enrich_df.E2_assoc == 'Negative')&(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

E4_positive = {}
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    E4_positive[i] = arivale_enrich_df[(arivale_enrich_df.E4_assoc == 'Positive')&(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

E4_negative = {}
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    E4_negative[i] = arivale_enrich_df[(arivale_enrich_df.E4_assoc == 'Negative')&(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()


In [86]:
E2_pos_results = calc_enrichment_bckgset(E2_positive, dict_all_analytes, thr=0.05)

In [87]:
E2_neg_results = calc_enrichment_bckgset(E2_negative, dict_all_analytes, thr=0.05)

In [88]:
E4_pos_results = calc_enrichment_bckgset(E4_positive, dict_all_analytes, thr=0.05)

In [89]:
E4_neg_results = calc_enrichment_bckgset(E4_negative, dict_all_analytes, thr=0.05)

In [90]:
E2_pos_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Diacylglycerol,729,13,37,13,0.0,1.278580e-16,1.278580e-16
Steroid,729,26,37,5,0.007704,6.548601e-01,3.274300e-01
Monoacylglycerol,729,4,37,2,0.014109,1.199294e+00,3.997647e-01
Primary Bile Acid Metabolism,729,8,37,2,0.057875,4.919335e+00,1.000000e+00
Benzoate Metabolism,729,13,37,2,0.137557,1.169232e+01,1.000000e+00
...,...,...,...,...,...,...,...
"Fatty Acid, Amino",729,3,37,0,1.0,8.500000e+01,1.000000e+00
"Leucine, Isoleucine and Valine Metabolism",729,28,37,0,1.0,8.500000e+01,1.000000e+00
Polyunsaturated Fatty Acid (n3 and n6),729,13,37,0,1.0,8.500000e+01,1.000000e+00
"Fructose, Mannose and Galactose Metabolism",729,3,37,0,1.0,8.500000e+01,1.000000e+00


In [91]:
E2_neg_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Sphingolipid Metabolism,729,44,25,15,0.0,7.488917e-12,7.488917e-12
Ceramide PEs,729,1,25,1,0.034294,2.914952e+00,9.716507e-01
Sphingomyelins,729,1,25,1,0.034294,2.914952e+00,9.716507e-01
Tocopherol Metabolism,729,3,25,1,0.099525,8.459604e+00,1.000000e+00
Sterol,729,5,25,1,0.160514,1.364373e+01,1.000000e+00
...,...,...,...,...,...,...,...
Aminosugar Metabolism,729,4,25,0,1.0,8.500000e+01,1.000000e+00
"Fructose, Mannose and Galactose Metabolism",729,3,25,0,1.0,8.500000e+01,1.000000e+00
Guanidino and Acetamido Metabolism,729,1,25,0,1.0,8.500000e+01,1.000000e+00
"Purine Metabolism, Guanine containing",729,2,25,0,1.0,8.500000e+01,1.000000e+00


In [92]:
E4_pos_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Lysolipid,729,46,25,15,0.0,1.621732e-11,1.621732e-11
Tryptophan Metabolism,729,15,25,2,0.090242,7.670601e+00,1.000000e+00
Ascorbate and Aldarate Metabolism,729,3,25,1,0.099525,8.459604e+00,1.000000e+00
"Methionine, Cysteine, SAM and Taurine Metabolism",729,18,25,2,0.123553,1.050202e+01,1.000000e+00
Monoacylglycerol,729,4,25,1,0.130533,1.109529e+01,1.000000e+00
...,...,...,...,...,...,...,...
Secondary Bile Acid Metabolism,729,14,25,0,1.0,8.500000e+01,1.000000e+00
Polyunsaturated Fatty Acid (n3 and n6),729,13,25,0,1.0,8.500000e+01,1.000000e+00
Aminosugar Metabolism,729,4,25,0,1.0,8.500000e+01,1.000000e+00
Gamma-glutamyl Amino Acid,729,14,25,0,1.0,8.500000e+01,1.000000e+00


In [93]:
E4_neg_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Chemical,729,13,7,2,0.005868,0.498770,0.49877
"Glycolysis, Gluconeogenesis, and Pyruvate Metabolism",729,6,7,1,0.056437,4.797139,1.00000
"Glycine, Serine and Threonine Metabolism",729,9,7,1,0.083616,7.107368,1.00000
Plasmalogen,729,21,7,1,0.185733,15.787314,1.00000
Steroid,729,26,7,1,0.225308,19.151181,1.00000
...,...,...,...,...,...,...,...
Guanidino and Acetamido Metabolism,729,1,7,0,1.0,85.000000,1.00000
Fatty Acid Metabolism (also BCAA Metabolism),729,3,7,0,1.0,85.000000,1.00000
Long Chain Fatty Acid,729,15,7,0,1.0,85.000000,1.00000
Glycogen Metabolism,729,2,7,0,1.0,85.000000,1.00000


# Arivale male delta age, pre-adjusted

In [94]:
arivale_delta_age_result_df = pd.read_csv('/notebooks/0. APOE-Multiomics/Data_Files/250217_Arivale_Indiv_delage_MALE.csv')

In [95]:
arivale_delta_age_result_df['bioyoung_assoc'] = 'ns'
arivale_delta_age_result_df['bioold_assoc'] = 'ns'

for i in arivale_delta_age_result_df.index:
    if (arivale_delta_age_result_df['C(Model_Health_Indiv, Treatment(reference=1))[T.Healthy]_p'][i] < 0.05) and (arivale_delta_age_result_df['C(Model_Health_Indiv, Treatment(reference=1))[T.Healthy]'][i] > 0):
        arivale_delta_age_result_df['bioyoung_assoc'][i] = 'Positive'
    if (arivale_delta_age_result_df['C(Model_Health_Indiv, Treatment(reference=1))[T.Healthy]_p'][i] < 0.05) and (arivale_delta_age_result_df['C(Model_Health_Indiv, Treatment(reference=1))[T.Healthy]'][i] < 0):
        arivale_delta_age_result_df['bioyoung_assoc'][i] = 'Negative'
        
    if (arivale_delta_age_result_df['C(Model_Health_Indiv, Treatment(reference=1))[T.Unhealthy]_p'][i] < 0.05) and (arivale_delta_age_result_df['C(Model_Health_Indiv, Treatment(reference=1))[T.Unhealthy]'][i] > 0):
        arivale_delta_age_result_df['bioold_assoc'][i] = 'Positive'
    if (arivale_delta_age_result_df['C(Model_Health_Indiv, Treatment(reference=1))[T.Unhealthy]_p'][i] < 0.05) and (arivale_delta_age_result_df['C(Model_Health_Indiv, Treatment(reference=1))[T.Unhealthy]'][i] < 0):
        arivale_delta_age_result_df['bioold_assoc'][i] = 'Negative'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  arivale_delta_age_result_df['bioold_assoc'][i] = 'Negative'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  arivale_delta_age_result_df['bioyoung_assoc'][i] = 'Negative'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  arivale_delta_age_result_df['bioold_assoc'][i] = 'Positive'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-ve

In [96]:
annots = met_met[['SUB_PATHWAY','SUPER_PATHWAY','BIOCHEMICAL_NAME']]
annots.rename(columns={"BIOCHEMICAL_NAME": "col1"},inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [97]:
arivale_delta_age_result_df = arivale_delta_age_result_df[['col1','bioyoung_assoc','bioold_assoc']]

In [98]:
arivale_enrich_df = arivale_delta_age_result_df.merge(annots, how='left',on='col1')

In [99]:
dict_all_analytes = {}

In [100]:
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    dict_all_analytes[i] = arivale_enrich_df[(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

In [101]:
# make dict for a each thing

In [102]:
bioyoung_positive = {}
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    bioyoung_positive[i] = arivale_enrich_df[(arivale_enrich_df.bioyoung_assoc == 'Positive')&(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

bioyoung_negative = {}
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    bioyoung_negative[i] = arivale_enrich_df[(arivale_enrich_df.bioyoung_assoc == 'Negative')&(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

bioold_positive = {}
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    bioold_positive[i] = arivale_enrich_df[(arivale_enrich_df.bioold_assoc == 'Positive')&(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

bioold_negative = {}
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    bioold_negative[i] = arivale_enrich_df[(arivale_enrich_df.bioold_assoc == 'Negative')&(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()


In [103]:
bioyoung_pos_results = calc_enrichment_bckgset(bioyoung_positive, dict_all_analytes, thr=0.05)

In [104]:
bioyoung_neg_results = calc_enrichment_bckgset(bioyoung_negative, dict_all_analytes, thr=0.05)

In [105]:
bioold_pos_results = calc_enrichment_bckgset(bioold_positive, dict_all_analytes, thr=0.05)

In [106]:
bioold_neg_results = calc_enrichment_bckgset(bioold_negative, dict_all_analytes, thr=0.05)

In [107]:
bioyoung_pos_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Long Chain Fatty Acid,729,15,77,13,0.0,5.822243e-10,5.822243e-10
Diacylglycerol,729,13,77,11,0.0,5.175997e-08,2.587999e-08
Polyunsaturated Fatty Acid (n3 and n6),729,13,77,10,0.0,1.862148e-06,6.207160e-07
Fatty Acid Metabolism(Acyl Carnitine),729,32,77,10,0.000912,7.748399e-02,1.937100e-02
Endocannabinoid,729,4,77,2,0.057408,4.879645e+00,8.132742e-01
...,...,...,...,...,...,...,...
Secondary Bile Acid Metabolism,729,14,77,0,1.0,8.500000e+01,1.000000e+00
Histidine Metabolism,729,12,77,0,1.0,8.500000e+01,1.000000e+00
Aminosugar Metabolism,729,4,77,0,1.0,8.500000e+01,1.000000e+00
Ceramides,729,7,77,0,1.0,8.500000e+01,1.000000e+00


In [108]:
bioyoung_neg_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Dipeptide,729,5,17,2,0.004917,0.417923,0.417923
"Pyrimidine Metabolism, Uracil containing",729,9,17,2,0.016749,1.423699,0.660722
Dipeptide Derivative,729,1,17,1,0.02332,1.982167,0.660722
Carnitine Metabolism,729,2,17,1,0.046127,3.920771,0.958858
Fatty Acid Metabolism (also BCAA Metabolism),729,3,17,1,0.068432,5.816709,0.958858
...,...,...,...,...,...,...,...
Histidine Metabolism,729,12,17,0,1.0,85.000000,1.000000
Aminosugar Metabolism,729,4,17,0,1.0,85.000000,1.000000
Oxidative Phosphorylation,729,1,17,0,1.0,85.000000,1.000000
Sterol,729,5,17,0,1.0,85.000000,1.000000


In [109]:
bioold_pos_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Phospholipid Metabolism,729,73,63,18,0.00001,0.000845,0.000845
Polyamine Metabolism,729,7,63,5,0.000076,0.006457,0.003229
Nicotinate and Nicotinamide Metabolism,729,5,63,2,0.061997,5.269768,1.000000
Histidine Metabolism,729,12,63,3,0.077024,6.547040,1.000000
Phenylalanine Metabolism,729,1,63,1,0.08642,7.345679,1.000000
...,...,...,...,...,...,...,...
Lysine Metabolism,729,11,63,0,1.0,85.000000,1.000000
TCA Cycle,729,7,63,0,1.0,85.000000,1.000000
Dipeptide,729,5,63,0,1.0,85.000000,1.000000
Endocannabinoid,729,4,63,0,1.0,85.000000,1.000000


In [110]:
bioold_neg_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
"Fatty Acid, Dicarboxylate",729,22,41,5,0.005684,0.483147,0.318763
Tryptophan Metabolism,729,15,41,4,0.0075,0.637525,0.318763
Oxidative Phosphorylation,729,1,41,1,0.056241,4.780521,1.000000
Dipeptide Derivative,729,1,41,1,0.056241,4.780521,1.000000
"Methionine, Cysteine, SAM and Taurine Metabolism",729,18,41,3,0.07476,6.354621,1.000000
...,...,...,...,...,...,...,...
Glycogen Metabolism,729,2,41,0,1.0,85.000000,1.000000
Fatty Acid Metabolism(Acyl Glycine),729,2,41,0,1.0,85.000000,1.000000
Lysine Metabolism,729,11,41,0,1.0,85.000000,1.000000
"Fructose, Mannose and Galactose Metabolism",729,3,41,0,1.0,85.000000,1.000000


# Arivale female delta age, pre-adjusted

In [111]:
arivale_delta_age_result_df = pd.read_csv('/notebooks/0. APOE-Multiomics/Data_Files/250217_Arivale_Indiv_delage_FEMALE.csv')

In [112]:
arivale_delta_age_result_df['bioyoung_assoc'] = 'ns'
arivale_delta_age_result_df['bioold_assoc'] = 'ns'

for i in arivale_delta_age_result_df.index:
    if (arivale_delta_age_result_df['C(Model_Health_Indiv, Treatment(reference=1))[T.Healthy]_p'][i] < 0.05) and (arivale_delta_age_result_df['C(Model_Health_Indiv, Treatment(reference=1))[T.Healthy]'][i] > 0):
        arivale_delta_age_result_df['bioyoung_assoc'][i] = 'Positive'
    if (arivale_delta_age_result_df['C(Model_Health_Indiv, Treatment(reference=1))[T.Healthy]_p'][i] < 0.05) and (arivale_delta_age_result_df['C(Model_Health_Indiv, Treatment(reference=1))[T.Healthy]'][i] < 0):
        arivale_delta_age_result_df['bioyoung_assoc'][i] = 'Negative'
        
    if (arivale_delta_age_result_df['C(Model_Health_Indiv, Treatment(reference=1))[T.Unhealthy]_p'][i] < 0.05) and (arivale_delta_age_result_df['C(Model_Health_Indiv, Treatment(reference=1))[T.Unhealthy]'][i] > 0):
        arivale_delta_age_result_df['bioold_assoc'][i] = 'Positive'
    if (arivale_delta_age_result_df['C(Model_Health_Indiv, Treatment(reference=1))[T.Unhealthy]_p'][i] < 0.05) and (arivale_delta_age_result_df['C(Model_Health_Indiv, Treatment(reference=1))[T.Unhealthy]'][i] < 0):
        arivale_delta_age_result_df['bioold_assoc'][i] = 'Negative'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  arivale_delta_age_result_df['bioyoung_assoc'][i] = 'Negative'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  arivale_delta_age_result_df['bioold_assoc'][i] = 'Positive'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  arivale_delta_age_result_df['bioold_assoc'][i] = 'Negative'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-ve

In [113]:
annots = met_met[['SUB_PATHWAY','SUPER_PATHWAY','BIOCHEMICAL_NAME']]
annots.rename(columns={"BIOCHEMICAL_NAME": "col1"},inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [114]:
arivale_delta_age_result_df = arivale_delta_age_result_df[['col1','bioyoung_assoc','bioold_assoc']]

In [115]:
arivale_enrich_df = arivale_delta_age_result_df.merge(annots, how='left',on='col1')

In [116]:
dict_all_analytes = {}

In [117]:
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    dict_all_analytes[i] = arivale_enrich_df[(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

In [118]:
# make dict for a each thing

In [119]:
bioyoung_positive = {}
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    bioyoung_positive[i] = arivale_enrich_df[(arivale_enrich_df.bioyoung_assoc == 'Positive')&(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

bioyoung_negative = {}
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    bioyoung_negative[i] = arivale_enrich_df[(arivale_enrich_df.bioyoung_assoc == 'Negative')&(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

bioold_positive = {}
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    bioold_positive[i] = arivale_enrich_df[(arivale_enrich_df.bioold_assoc == 'Positive')&(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

bioold_negative = {}
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    bioold_negative[i] = arivale_enrich_df[(arivale_enrich_df.bioold_assoc == 'Negative')&(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()


In [120]:
bioyoung_pos_results = calc_enrichment_bckgset(bioyoung_positive, dict_all_analytes, thr=0.05)

In [121]:
bioyoung_neg_results = calc_enrichment_bckgset(bioyoung_negative, dict_all_analytes, thr=0.05)

In [122]:
bioold_pos_results = calc_enrichment_bckgset(bioold_positive, dict_all_analytes, thr=0.05)

In [123]:
bioold_neg_results = calc_enrichment_bckgset(bioold_negative, dict_all_analytes, thr=0.05)

In [124]:
bioyoung_pos_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Steroid,729,26,31,6,0.000453,0.038536,0.038536
Sphingolipid Metabolism,729,44,31,5,0.0332,2.821971,0.903635
"Methionine, Cysteine, SAM and Taurine Metabolism",729,18,31,3,0.036839,3.131318,0.903635
Phosphatidylserine (PS),729,1,31,1,0.042524,3.614540,0.903635
Glutathione Metabolism,729,3,31,1,0.122385,10.402711,1.000000
...,...,...,...,...,...,...,...
Mevalonate Metabolism,729,1,31,0,1.0,85.000000,1.000000
Xanthine Metabolism,729,11,31,0,1.0,85.000000,1.000000
"Pyrimidine Metabolism, Uracil containing",729,9,31,0,1.0,85.000000,1.000000
"Purine Metabolism, Adenine containing",729,5,31,0,1.0,85.000000,1.000000


In [125]:
bioyoung_neg_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Plasmalogen,729,21,90,9,0.000378,0.032133,0.032133
Urea cycle; Arginine and Proline Metabolism,729,17,90,7,0.002395,0.203542,0.101771
Vitamin B6 Metabolism,729,2,90,2,0.015093,1.282899,0.321750
Dipeptide,729,5,90,3,0.015141,1.287000,0.321750
Monoacylglycerol,729,4,90,2,0.076599,6.510910,1.000000
...,...,...,...,...,...,...,...
Pantothenate and CoA Metabolism,729,1,90,0,1.0,85.000000,1.000000
Secondary Bile Acid Metabolism,729,14,90,0,1.0,85.000000,1.000000
Androgenic Steroids,729,2,90,0,1.0,85.000000,1.000000
Diacylglycerol,729,13,90,0,1.0,85.000000,1.000000


In [126]:
bioold_pos_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Plasmalogen,729,21,153,20,0.0,1.431667e-11,1.431667e-11
Partially Characterized Molecules,729,5,153,4,0.007857,6.678109e-01,3.339054e-01
Urea cycle; Arginine and Proline Metabolism,729,17,153,7,0.045713,3.885590e+00,1.000000e+00
Food Component/Plant,729,30,153,10,0.076036,6.463102e+00,1.000000e+00
Histidine Metabolism,729,12,153,5,0.084683,7.198074e+00,1.000000e+00
...,...,...,...,...,...,...,...
Vitamin A Metabolism,729,1,153,0,1.0,8.500000e+01,1.000000e+00
Fatty Acid Metabolism (Acyl Choline),729,3,153,0,1.0,8.500000e+01,1.000000e+00
Primary Bile Acid Metabolism,729,8,153,0,1.0,8.500000e+01,1.000000e+00
"Fatty Acid, Branched",729,2,153,0,1.0,8.500000e+01,1.000000e+00


In [127]:
bioold_neg_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Diacylglycerol,729,13,21,4,0.000309,0.026276,0.026276
Steroid,729,26,21,4,0.005047,0.429019,0.214510
Creatine Metabolism,729,3,21,1,0.084066,7.145633,1.000000
"Glycolysis, Gluconeogenesis, and Pyruvate Metabolism",729,6,21,1,0.161375,13.716849,1.000000
TCA Cycle,729,7,21,1,0.185733,15.787314,1.000000
...,...,...,...,...,...,...,...
Mevalonate Metabolism,729,1,21,0,1.0,85.000000,1.000000
"Pyrimidine Metabolism, Uracil containing",729,9,21,0,1.0,85.000000,1.000000
Inositol Metabolism,729,1,21,0,1.0,85.000000,1.000000
Aminosugar Metabolism,729,4,21,0,1.0,85.000000,1.000000


# TwinsUK APOE male, pre-adjusted

In [128]:
TwinsUK_APOE_result_df = pd.read_csv('/notebooks/0. APOE-Multiomics/Data_Files/250217_TwinsUK_Indiv_MALE.csv')

In [129]:
TwinsUK_APOE_result_df['E2_assoc'] = 'ns'
TwinsUK_APOE_result_df['E4_assoc'] = 'ns'

for i in TwinsUK_APOE_result_df.index:
    if (TwinsUK_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E2]_p'][i] < 0.05) and (TwinsUK_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E2]'][i] > 0):
        TwinsUK_APOE_result_df['E2_assoc'][i] = 'Positive'
    if (TwinsUK_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E2]_p'][i] < 0.05) and (TwinsUK_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E2]'][i] < 0):
        TwinsUK_APOE_result_df['E2_assoc'][i] = 'Negative'
        
    if (TwinsUK_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E4]_p'][i] < 0.05) and (TwinsUK_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E4]'][i] > 0):
        TwinsUK_APOE_result_df['E4_assoc'][i] = 'Positive'
    if (TwinsUK_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E4]_p'][i] < 0.05) and (TwinsUK_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E4]'][i] < 0):
        TwinsUK_APOE_result_df['E4_assoc'][i] = 'Negative'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  TwinsUK_APOE_result_df['E4_assoc'][i] = 'Negative'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  TwinsUK_APOE_result_df['E2_assoc'][i] = 'Negative'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  TwinsUK_APOE_result_df['E4_assoc'][i] = 'Positive'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  TwinsUK_APOE_re

In [130]:
annots = met_met[['SUB_PATHWAY','SUPER_PATHWAY','BIOCHEMICAL_NAME']]
annots.rename(columns={"BIOCHEMICAL_NAME": "col1"},inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [131]:
TwinsUK_APOE_result_df = TwinsUK_APOE_result_df[['col1','E2_assoc','E4_assoc']]

In [132]:
TwinsUK_enrich_df = TwinsUK_APOE_result_df.merge(annots, how='left',on='col1')

In [133]:
dict_all_analytes = {}

In [134]:
for i in TwinsUK_enrich_df.SUB_PATHWAY.unique():
    dict_all_analytes[i] = TwinsUK_enrich_df[(TwinsUK_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

In [135]:
# make dict for a each thing

In [136]:
E2_positive = {}
for i in TwinsUK_enrich_df.SUB_PATHWAY.unique():
    E2_positive[i] = TwinsUK_enrich_df[(TwinsUK_enrich_df.E2_assoc == 'Positive')&(TwinsUK_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

E2_negative = {}
for i in TwinsUK_enrich_df.SUB_PATHWAY.unique():
    E2_negative[i] = TwinsUK_enrich_df[(TwinsUK_enrich_df.E2_assoc == 'Negative')&(TwinsUK_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

E4_positive = {}
for i in TwinsUK_enrich_df.SUB_PATHWAY.unique():
    E4_positive[i] = TwinsUK_enrich_df[(TwinsUK_enrich_df.E4_assoc == 'Positive')&(TwinsUK_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

E4_negative = {}
for i in TwinsUK_enrich_df.SUB_PATHWAY.unique():
    E4_negative[i] = TwinsUK_enrich_df[(TwinsUK_enrich_df.E4_assoc == 'Negative')&(TwinsUK_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()


In [137]:
E2_pos_results = calc_enrichment_bckgset(E2_positive, dict_all_analytes, thr=0.05)

In [138]:
E2_neg_results = calc_enrichment_bckgset(E2_negative, dict_all_analytes, thr=0.05)

In [139]:
E4_pos_results = calc_enrichment_bckgset(E4_positive, dict_all_analytes, thr=0.05)

In [140]:
E4_neg_results = calc_enrichment_bckgset(E4_negative, dict_all_analytes, thr=0.05)

In [141]:
E2_pos_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Drug,523,1,8,1,0.015296,1.116635,1.0
Fatty Acid Metabolism(Acyl Glycine),523,2,8,1,0.030388,2.218296,1.0
Aminosugar Metabolism,523,3,8,1,0.045276,3.305155,1.0
"Glycine, Serine and Threonine Metabolism",523,7,8,1,0.102849,7.507967,1.0
Lysolipid,523,44,8,2,0.14,10.220004,1.0
...,...,...,...,...,...,...,...
Glycerolipid Metabolism,523,2,8,0,1.0,73.000000,1.0
Dipeptide,523,6,8,0,1.0,73.000000,1.0
"Leucine, Isoleucine and Valine Metabolism",523,24,8,0,1.0,73.000000,1.0
"Fatty Acid, Amino",523,2,8,0,1.0,73.000000,1.0


In [142]:
E2_neg_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Steroid,523,22,16,3,0.025374,1.852288,0.781243
Dipeptide Derivative,523,1,16,1,0.030593,2.233270,0.781243
"Leucine, Isoleucine and Valine Metabolism",523,24,16,3,0.032106,2.343730,0.781243
Tryptophan Metabolism,523,12,16,2,0.048462,3.537709,0.884427
Hemoglobin and Porphyrin Metabolism,523,3,16,1,0.089165,6.509010,1.000000
...,...,...,...,...,...,...,...
Chemical,523,6,16,0,1.0,73.000000,1.000000
Glutathione Metabolism,523,1,16,0,1.0,73.000000,1.000000
Nicotinate and Nicotinamide Metabolism,523,5,16,0,1.0,73.000000,1.000000
Lysoplasmalogen,523,6,16,0,1.0,73.000000,1.000000


In [143]:
E4_pos_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Tocopherol Metabolism,523,3,13,2,0.00169,0.123379,0.121643
Primary Bile Acid Metabolism,523,4,13,2,0.003333,0.243285,0.121643
"Glycolysis, Gluconeogenesis, and Pyruvate Metabolism",523,5,13,2,0.005476,0.399772,0.133257
Mevalonate Metabolism,523,1,13,1,0.024857,1.814532,0.453633
Aminosugar Metabolism,523,3,13,1,0.072868,5.319335,1.000000
...,...,...,...,...,...,...,...
Glutathione Metabolism,523,1,13,0,1.0,73.000000,1.000000
Nicotinate and Nicotinamide Metabolism,523,5,13,0,1.0,73.000000,1.000000
Hemoglobin and Porphyrin Metabolism,523,3,13,0,1.0,73.000000,1.000000
Lysoplasmalogen,523,6,13,0,1.0,73.000000,1.000000


In [144]:
E4_neg_results.head(n=10)

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Long Chain Fatty Acid,523,13,41,12,0.0,9.225279e-12,9.225279e-12
Polyunsaturated Fatty Acid (n3 and n6),523,11,41,7,3e-06,0.0002093261,0.000104663
"Fatty Acid, Monohydroxy",523,10,41,4,0.004856,0.3544826,0.1181609
Medium Chain Fatty Acid,523,6,41,3,0.007597,0.554615,0.1386538
"Fatty Acid, Dicarboxylate",523,9,41,3,0.026991,1.970372,0.3940745
"Fatty Acid, Branched",523,1,41,1,0.078394,5.722753,0.8175362
Pantothenate and CoA Metabolism,523,1,41,1,0.078394,5.722753,0.8175362
Glycerolipid Metabolism,523,2,41,1,0.150781,11.00698,1.0
Fatty Acid Metabolism(Acyl Glycine),523,2,41,1,0.150781,11.00698,1.0
Urea cycle; Arginine and Proline Metabolism,523,13,41,2,0.271063,19.78762,1.0


# TwinsUK APOE female, pre-adjusted

In [145]:
TwinsUK_APOE_result_df = pd.read_csv('/notebooks/0. APOE-Multiomics/Data_Files/250217_TwinsUK_Indiv_FEMALE.csv')

In [146]:
TwinsUK_APOE_result_df['E2_assoc'] = 'ns'
TwinsUK_APOE_result_df['E4_assoc'] = 'ns'

for i in TwinsUK_APOE_result_df.index:
    if (TwinsUK_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E2]_p'][i] < 0.05) and (TwinsUK_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E2]'][i] > 0):
        TwinsUK_APOE_result_df['E2_assoc'][i] = 'Positive'
    if (TwinsUK_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E2]_p'][i] < 0.05) and (TwinsUK_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E2]'][i] < 0):
        TwinsUK_APOE_result_df['E2_assoc'][i] = 'Negative'
        
    if (TwinsUK_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E4]_p'][i] < 0.05) and (TwinsUK_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E4]'][i] > 0):
        TwinsUK_APOE_result_df['E4_assoc'][i] = 'Positive'
    if (TwinsUK_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E4]_p'][i] < 0.05) and (TwinsUK_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E4]'][i] < 0):
        TwinsUK_APOE_result_df['E4_assoc'][i] = 'Negative'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  TwinsUK_APOE_result_df['E2_assoc'][i] = 'Negative'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  TwinsUK_APOE_result_df['E4_assoc'][i] = 'Positive'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  TwinsUK_APOE_result_df['E2_assoc'][i] = 'Positive'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  TwinsUK_APOE_re

In [147]:
annots = met_met[['SUB_PATHWAY','SUPER_PATHWAY','BIOCHEMICAL_NAME']]
annots.rename(columns={"BIOCHEMICAL_NAME": "col1"},inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [148]:
TwinsUK_APOE_result_df = TwinsUK_APOE_result_df[['col1','E2_assoc','E4_assoc']]

In [149]:
TwinsUK_enrich_df = TwinsUK_APOE_result_df.merge(annots, how='left',on='col1')

In [150]:
dict_all_analytes = {}

In [151]:
for i in TwinsUK_enrich_df.SUB_PATHWAY.unique():
    dict_all_analytes[i] = TwinsUK_enrich_df[(TwinsUK_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

In [152]:
# make dict for a each thing

In [153]:
E2_positive = {}
for i in TwinsUK_enrich_df.SUB_PATHWAY.unique():
    E2_positive[i] = TwinsUK_enrich_df[(TwinsUK_enrich_df.E2_assoc == 'Positive')&(TwinsUK_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

E2_negative = {}
for i in TwinsUK_enrich_df.SUB_PATHWAY.unique():
    E2_negative[i] = TwinsUK_enrich_df[(TwinsUK_enrich_df.E2_assoc == 'Negative')&(TwinsUK_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

E4_positive = {}
for i in TwinsUK_enrich_df.SUB_PATHWAY.unique():
    E4_positive[i] = TwinsUK_enrich_df[(TwinsUK_enrich_df.E4_assoc == 'Positive')&(TwinsUK_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

E4_negative = {}
for i in TwinsUK_enrich_df.SUB_PATHWAY.unique():
    E4_negative[i] = TwinsUK_enrich_df[(TwinsUK_enrich_df.E4_assoc == 'Negative')&(TwinsUK_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()


In [154]:
E2_pos_results = calc_enrichment_bckgset(E2_positive, dict_all_analytes, thr=0.05)

In [155]:
E2_neg_results = calc_enrichment_bckgset(E2_negative, dict_all_analytes, thr=0.05)

In [156]:
E4_pos_results = calc_enrichment_bckgset(E4_positive, dict_all_analytes, thr=0.05)

In [157]:
E4_neg_results = calc_enrichment_bckgset(E4_negative, dict_all_analytes, thr=0.05)

In [158]:
E2_pos_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Diacylglycerol,523,4,32,4,0.000012,0.000852,0.000852
Steroid,523,22,32,5,0.00778,0.567953,0.283976
Phenylalanine and Tyrosine Metabolism,523,22,32,4,0.038698,2.824970,0.941657
Carnitine Metabolism,523,2,32,1,0.118737,8.667824,1.000000
"Fatty Acid, Monohydroxy",523,10,32,2,0.120048,8.763485,1.000000
...,...,...,...,...,...,...,...
TCA Cycle,523,7,32,0,1.0,73.000000,1.000000
"Glycine, Serine and Threonine Metabolism",523,7,32,0,1.0,73.000000,1.000000
"Purine Metabolism, (Hypo)Xanthine/Inosine containing",523,5,32,0,1.0,73.000000,1.000000
Vitamin A Metabolism,523,1,32,0,1.0,73.000000,1.000000


In [159]:
E2_neg_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Sphingolipid Metabolism,523,19,18,8,0.0,0.000002,0.000002
Bacterial/Fungal,523,1,18,1,0.034417,2.512428,1.000000
Sterol,523,2,18,1,0.067713,4.943034,1.000000
Ascorbate and Aldarate Metabolism,523,3,18,1,0.099922,7.294331,1.000000
Chemical,523,6,18,1,0.190362,13.896414,1.000000
...,...,...,...,...,...,...,...
"Pyrimidine Metabolism, Cytidine containing",523,1,18,0,1.0,73.000000,1.000000
,523,0,18,0,1.0,73.000000,1.000000
TCA Cycle,523,7,18,0,1.0,73.000000,1.000000
"Fatty Acid, Monohydroxy",523,10,18,0,1.0,73.000000,1.000000


In [160]:
E4_pos_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Lysolipid,523,44,45,19,0.0,2.623496e-09,2.623496e-09
Monoacylglycerol,523,8,45,7,0.0,1.215559e-05,6.077793e-06
Sphingolipid Metabolism,523,19,45,5,0.017556,1.281580e+00,4.271934e-01
Diacylglycerol,523,4,45,2,0.038872,2.837650e+00,7.094124e-01
Sterol,523,2,45,1,0.164832,1.203270e+01,1.000000e+00
...,...,...,...,...,...,...,...
Plasmalogen,523,15,45,0,1.0,7.300000e+01,1.000000e+00
TCA Cycle,523,7,45,0,1.0,7.300000e+01,1.000000e+00
"Glycine, Serine and Threonine Metabolism",523,7,45,0,1.0,7.300000e+01,1.000000e+00
Gamma-glutamyl Amino Acid,523,13,45,0,1.0,7.300000e+01,1.000000e+00


In [161]:
E4_neg_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Xanthine Metabolism,523,12,16,5,0.00001,0.000696,0.000696
Ketone Bodies,523,1,16,1,0.030593,2.233270,1.000000
Phenylalanine and Tyrosine Metabolism,523,22,16,2,0.141939,10.361539,1.000000
Steroid,523,22,16,2,0.141939,10.361539,1.000000
Alanine and Aspartate Metabolism,523,5,16,1,0.144406,10.541634,1.000000
...,...,...,...,...,...,...,...
"Pyrimidine Metabolism, Cytidine containing",523,1,16,0,1.0,73.000000,1.000000
Plasmalogen,523,15,16,0,1.0,73.000000,1.000000
TCA Cycle,523,7,16,0,1.0,73.000000,1.000000
Primary Bile Acid Metabolism,523,4,16,0,1.0,73.000000,1.000000


---

# Enrichment of pFDR < 0.1

In [162]:
arivale_APOE_result_df = pd.read_csv('/notebooks/0. APOE-Multiomics/Data_Files/250217_Arivale_Indiv.csv')

In [163]:
TwinsUK_APOE_result_df = pd.read_csv('/notebooks/0. APOE-Multiomics/Data_Files/250217_TwinsUK_Indiv.csv')

# ARIVALE

In [164]:
arivale_APOE_result_df['E2_assoc'] = 'ns'
arivale_APOE_result_df['E4_assoc'] = 'ns'

for i in arivale_APOE_result_df.index:
    if (arivale_APOE_result_df['E2_pval_adj'][i] < 0.1) and (arivale_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E2]'][i] > 0):
        arivale_APOE_result_df['E2_assoc'][i] = 'Positive'
    if (arivale_APOE_result_df['E2_pval_adj'][i] < 0.1) and (arivale_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E2]'][i] < 0):
        arivale_APOE_result_df['E2_assoc'][i] = 'Negative'
        
    if (arivale_APOE_result_df['E4_pval_adj'][i] < 0.1) and (arivale_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E4]'][i] > 0):
        arivale_APOE_result_df['E4_assoc'][i] = 'Positive'
    if (arivale_APOE_result_df['E4_pval_adj'][i] < 0.1) and (arivale_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E4]'][i] < 0):
        arivale_APOE_result_df['E4_assoc'][i] = 'Negative'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  arivale_APOE_result_df['E2_assoc'][i] = 'Positive'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  arivale_APOE_result_df['E2_assoc'][i] = 'Negative'


In [165]:
annots = met_met[['SUB_PATHWAY','SUPER_PATHWAY','BIOCHEMICAL_NAME']]
annots.rename(columns={"BIOCHEMICAL_NAME": "col1"},inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [166]:
arivale_APOE_result_df = arivale_APOE_result_df[['col1','E2_assoc','E4_assoc']]

In [167]:
arivale_enrich_df = arivale_APOE_result_df.merge(annots, how='left',on='col1')

In [168]:
dict_all_analytes = {}

In [169]:
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    dict_all_analytes[i] = arivale_enrich_df[(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

In [170]:
# make dict for a each thing

In [171]:
E2_positive = {}
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    E2_positive[i] = arivale_enrich_df[(arivale_enrich_df.E2_assoc == 'Positive')&(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

E2_negative = {}
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    E2_negative[i] = arivale_enrich_df[(arivale_enrich_df.E2_assoc == 'Negative')&(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

E4_positive = {}
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    E4_positive[i] = arivale_enrich_df[(arivale_enrich_df.E4_assoc == 'Positive')&(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

E4_negative = {}
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    E4_negative[i] = arivale_enrich_df[(arivale_enrich_df.E4_assoc == 'Negative')&(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()


In [172]:
E2_pos_results = calc_enrichment_bckgset(E2_positive, dict_all_analytes, thr=0.05)

In [173]:
E2_neg_results = calc_enrichment_bckgset(E2_negative, dict_all_analytes, thr=0.05)

In [174]:
E4_pos_results = calc_enrichment_bckgset(E4_positive, dict_all_analytes, thr=0.05)

In [175]:
E4_neg_results = calc_enrichment_bckgset(E4_negative, dict_all_analytes, thr=0.05)

In [176]:
E2_pos_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Diacylglycerol,729,13,10,8,0.0,2.554316e-12,2.554316e-12
Monoacylglycerol,729,4,10,1,0.05386,4.578069e+00,1.000000e+00
Phospholipid Metabolism,729,73,10,1,0.654258,5.561192e+01,1.000000e+00
Sphingomyelins,729,1,10,0,1.0,8.500000e+01,1.000000e+00
Vitamin B6 Metabolism,729,2,10,0,1.0,8.500000e+01,1.000000e+00
...,...,...,...,...,...,...,...
Fatty Acid Metabolism(Acyl Carnitine),729,32,10,0,1.0,8.500000e+01,1.000000e+00
"Purine Metabolism, Adenine containing",729,5,10,0,1.0,8.500000e+01,1.000000e+00
Chemical,729,13,10,0,1.0,8.500000e+01,1.000000e+00
Creatine Metabolism,729,3,10,0,1.0,8.500000e+01,1.000000e+00


In [177]:
E2_neg_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Sphingolipid Metabolism,729,44,3,2,0.010283,0.874068,0.874068
Food Component/Plant,729,30,3,1,0.118602,10.081170,1.000000
Diacylglycerol,729,13,3,0,1.0,85.000000,1.000000
Sphingomyelins,729,1,3,0,1.0,85.000000,1.000000
Vitamin B6 Metabolism,729,2,3,0,1.0,85.000000,1.000000
...,...,...,...,...,...,...,...
Fatty Acid Metabolism(Acyl Carnitine),729,32,3,0,1.0,85.000000,1.000000
"Purine Metabolism, Adenine containing",729,5,3,0,1.0,85.000000,1.000000
Chemical,729,13,3,0,1.0,85.000000,1.000000
Creatine Metabolism,729,3,3,0,1.0,85.000000,1.000000


In [178]:
E4_pos_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Diacylglycerol,729,13,0,0,1.0,85.0,1.0
Vitamin B6 Metabolism,729,2,0,0,1.0,85.0,1.0
Corticosteroids,729,1,0,0,1.0,85.0,1.0
Primary Bile Acid Metabolism,729,8,0,0,1.0,85.0,1.0
Guanidino and Acetamido Metabolism,729,1,0,0,1.0,85.0,1.0
...,...,...,...,...,...,...,...
Chemical,729,13,0,0,1.0,85.0,1.0
Endocannabinoid,729,4,0,0,1.0,85.0,1.0
Creatine Metabolism,729,3,0,0,1.0,85.0,1.0
"Pyrimidine Metabolism, Uracil containing",729,9,0,0,1.0,85.0,1.0


In [179]:
E4_neg_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Diacylglycerol,729,13,0,0,1.0,85.0,1.0
Vitamin B6 Metabolism,729,2,0,0,1.0,85.0,1.0
Corticosteroids,729,1,0,0,1.0,85.0,1.0
Primary Bile Acid Metabolism,729,8,0,0,1.0,85.0,1.0
Guanidino and Acetamido Metabolism,729,1,0,0,1.0,85.0,1.0
...,...,...,...,...,...,...,...
Chemical,729,13,0,0,1.0,85.0,1.0
Endocannabinoid,729,4,0,0,1.0,85.0,1.0
Creatine Metabolism,729,3,0,0,1.0,85.0,1.0
"Pyrimidine Metabolism, Uracil containing",729,9,0,0,1.0,85.0,1.0


# TwinsUK

In [180]:
TwinsUK_APOE_result_df['E2_assoc'] = 'ns'
TwinsUK_APOE_result_df['E4_assoc'] = 'ns'

for i in TwinsUK_APOE_result_df.index:
    if (TwinsUK_APOE_result_df['E2_pval_adj'][i] < 0.1) and (TwinsUK_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E2]'][i] > 0):
        TwinsUK_APOE_result_df['E2_assoc'][i] = 'Positive'
    if (TwinsUK_APOE_result_df['E2_pval_adj'][i] < 0.1) and (TwinsUK_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E2]'][i] < 0):
        TwinsUK_APOE_result_df['E2_assoc'][i] = 'Negative'
        
    if (TwinsUK_APOE_result_df['E4_pval_adj'][i] < 0.1) and (TwinsUK_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E4]'][i] > 0):
        TwinsUK_APOE_result_df['E4_assoc'][i] = 'Positive'
    if (TwinsUK_APOE_result_df['E4_pval_adj'][i] < 0.1) and (TwinsUK_APOE_result_df['C(APOE_Status, Treatment(reference=1))[T.E4]'][i] < 0):
        TwinsUK_APOE_result_df['E4_assoc'][i] = 'Negative'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  TwinsUK_APOE_result_df['E2_assoc'][i] = 'Negative'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  TwinsUK_APOE_result_df['E2_assoc'][i] = 'Positive'


In [181]:
annots = met_met[['SUB_PATHWAY','SUPER_PATHWAY','BIOCHEMICAL_NAME']]
annots.rename(columns={"BIOCHEMICAL_NAME": "col1"},inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [182]:
TwinsUK_APOE_result_df = TwinsUK_APOE_result_df[['col1','E2_assoc','E4_assoc']]

In [183]:
TwinsUK_enrich_df = TwinsUK_APOE_result_df.merge(annots, how='left',on='col1')

In [184]:
dict_all_analytes = {}

In [185]:
for i in TwinsUK_enrich_df.SUB_PATHWAY.unique():
    dict_all_analytes[i] = TwinsUK_enrich_df[(TwinsUK_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

In [186]:
# make dict for a each thing

In [187]:
E2_positive = {}
for i in TwinsUK_enrich_df.SUB_PATHWAY.unique():
    E2_positive[i] = TwinsUK_enrich_df[(TwinsUK_enrich_df.E2_assoc == 'Positive')&(TwinsUK_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

E2_negative = {}
for i in TwinsUK_enrich_df.SUB_PATHWAY.unique():
    E2_negative[i] = TwinsUK_enrich_df[(TwinsUK_enrich_df.E2_assoc == 'Negative')&(TwinsUK_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

E4_positive = {}
for i in TwinsUK_enrich_df.SUB_PATHWAY.unique():
    E4_positive[i] = TwinsUK_enrich_df[(TwinsUK_enrich_df.E4_assoc == 'Positive')&(TwinsUK_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

E4_negative = {}
for i in TwinsUK_enrich_df.SUB_PATHWAY.unique():
    E4_negative[i] = TwinsUK_enrich_df[(TwinsUK_enrich_df.E4_assoc == 'Negative')&(TwinsUK_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()


In [188]:
E2_pos_results = calc_enrichment_bckgset(E2_positive, dict_all_analytes, thr=0.05)

In [189]:
E2_neg_results = calc_enrichment_bckgset(E2_negative, dict_all_analytes, thr=0.05)

In [190]:
E4_pos_results = calc_enrichment_bckgset(E4_positive, dict_all_analytes, thr=0.05)

In [191]:
E4_neg_results = calc_enrichment_bckgset(E4_negative, dict_all_analytes, thr=0.05)

In [192]:
E2_pos_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Lysolipid,523,44,0,0,1.0,73.0,1.0
Secondary Bile Acid Metabolism,523,8,0,0,1.0,73.0,1.0
Pentose Metabolism,523,4,0,0,1.0,73.0,1.0
Plasmalogen,523,15,0,0,1.0,73.0,1.0
Oxidative Phosphorylation,523,1,0,0,1.0,73.0,1.0
...,...,...,...,...,...,...,...
Lyso-phospho-ether,523,2,0,0,1.0,73.0,1.0
Ketone Bodies,523,1,0,0,1.0,73.0,1.0
Food Component/Plant,523,18,0,0,1.0,73.0,1.0
Nicotinate and Nicotinamide Metabolism,523,5,0,0,1.0,73.0,1.0


In [193]:
E2_neg_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Sterol,523,2,2,1,0.007641,0.557783,0.557783
Sphingolipid Metabolism,523,19,2,1,0.071405,5.212567,1.000000
Lysolipid,523,44,2,0,1.0,73.000000,1.000000
Fibrinogen Cleavage Peptide,523,1,2,0,1.0,73.000000,1.000000
Secondary Bile Acid Metabolism,523,8,2,0,1.0,73.000000,1.000000
...,...,...,...,...,...,...,...
Lysoplasmalogen,523,6,2,0,1.0,73.000000,1.000000
Benzoate Metabolism,523,13,2,0,1.0,73.000000,1.000000
Lyso-phospho-ether,523,2,2,0,1.0,73.000000,1.000000
Polyunsaturated Fatty Acid (n3 and n6),523,11,2,0,1.0,73.000000,1.000000


In [194]:
E4_pos_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Lysolipid,523,44,0,0,1.0,73.0,1.0
Secondary Bile Acid Metabolism,523,8,0,0,1.0,73.0,1.0
Pentose Metabolism,523,4,0,0,1.0,73.0,1.0
Plasmalogen,523,15,0,0,1.0,73.0,1.0
Oxidative Phosphorylation,523,1,0,0,1.0,73.0,1.0
...,...,...,...,...,...,...,...
Lyso-phospho-ether,523,2,0,0,1.0,73.0,1.0
Ketone Bodies,523,1,0,0,1.0,73.0,1.0
Food Component/Plant,523,18,0,0,1.0,73.0,1.0
Nicotinate and Nicotinamide Metabolism,523,5,0,0,1.0,73.0,1.0


In [195]:
E4_neg_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Lysolipid,523,44,0,0,1.0,73.0,1.0
Secondary Bile Acid Metabolism,523,8,0,0,1.0,73.0,1.0
Pentose Metabolism,523,4,0,0,1.0,73.0,1.0
Plasmalogen,523,15,0,0,1.0,73.0,1.0
Oxidative Phosphorylation,523,1,0,0,1.0,73.0,1.0
...,...,...,...,...,...,...,...
Lyso-phospho-ether,523,2,0,0,1.0,73.0,1.0
Ketone Bodies,523,1,0,0,1.0,73.0,1.0
Food Component/Plant,523,18,0,0,1.0,73.0,1.0
Nicotinate and Nicotinamide Metabolism,523,5,0,0,1.0,73.0,1.0


# Arivale delta age

In [196]:
arivale_delta_age_result_df = pd.read_csv('/notebooks/0. APOE-Multiomics/Data_Files/250217_Arivale_Indiv_delage.csv')

In [197]:
arivale_delta_age_result_df['bioyoung_assoc'] = 'ns'
arivale_delta_age_result_df['bioold_assoc'] = 'ns'

for i in arivale_delta_age_result_df.index:
    if (arivale_delta_age_result_df['Health_pval_adj'][i] < 0.1) and (arivale_delta_age_result_df['C(Model_Health_Indiv, Treatment(reference=1))[T.Healthy]'][i] > 0):
        arivale_delta_age_result_df['bioyoung_assoc'][i] = 'Positive'
    if (arivale_delta_age_result_df['Health_pval_adj'][i] < 0.1) and (arivale_delta_age_result_df['C(Model_Health_Indiv, Treatment(reference=1))[T.Healthy]'][i] < 0):
        arivale_delta_age_result_df['bioyoung_assoc'][i] = 'Negative'
        
    if (arivale_delta_age_result_df['Unhealth_pval_adj'][i] < 0.1) and (arivale_delta_age_result_df['C(Model_Health_Indiv, Treatment(reference=1))[T.Unhealthy]'][i] > 0):
        arivale_delta_age_result_df['bioold_assoc'][i] = 'Positive'
    if (arivale_delta_age_result_df['Unhealth_pval_adj'][i] < 0.1) and (arivale_delta_age_result_df['C(Model_Health_Indiv, Treatment(reference=1))[T.Unhealthy]'][i] < 0):
        arivale_delta_age_result_df['bioold_assoc'][i] = 'Negative'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  arivale_delta_age_result_df['bioold_assoc'][i] = 'Negative'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  arivale_delta_age_result_df['bioyoung_assoc'][i] = 'Negative'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  arivale_delta_age_result_df['bioold_assoc'][i] = 'Positive'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-ve

In [198]:
annots = met_met[['SUB_PATHWAY','SUPER_PATHWAY','BIOCHEMICAL_NAME']]
annots.rename(columns={"BIOCHEMICAL_NAME": "col1"},inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [199]:
arivale_delta_age_result_df = arivale_delta_age_result_df[['col1','bioyoung_assoc','bioold_assoc']]

In [200]:
arivale_enrich_df = arivale_delta_age_result_df.merge(annots, how='left',on='col1')

In [201]:
dict_all_analytes = {}

In [202]:
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    dict_all_analytes[i] = arivale_enrich_df[(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

In [203]:
# make dict for a each thing

In [204]:
bioyoung_positive = {}
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    bioyoung_positive[i] = arivale_enrich_df[(arivale_enrich_df.bioyoung_assoc == 'Positive')&(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

bioyoung_negative = {}
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    bioyoung_negative[i] = arivale_enrich_df[(arivale_enrich_df.bioyoung_assoc == 'Negative')&(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

bioold_positive = {}
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    bioold_positive[i] = arivale_enrich_df[(arivale_enrich_df.bioold_assoc == 'Positive')&(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()

bioold_negative = {}
for i in arivale_enrich_df.SUB_PATHWAY.unique():
    bioold_negative[i] = arivale_enrich_df[(arivale_enrich_df.bioold_assoc == 'Negative')&(arivale_enrich_df.SUB_PATHWAY == i)]['col1'].to_list()


In [205]:
bioyoung_pos_results = calc_enrichment_bckgset(bioyoung_positive, dict_all_analytes, thr=0.05)

In [206]:
bioyoung_neg_results = calc_enrichment_bckgset(bioyoung_negative, dict_all_analytes, thr=0.05)

In [207]:
bioold_pos_results = calc_enrichment_bckgset(bioold_positive, dict_all_analytes, thr=0.05)

In [208]:
bioold_neg_results = calc_enrichment_bckgset(bioold_negative, dict_all_analytes, thr=0.05)

In [209]:
bioyoung_pos_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Steroid,729,26,24,5,0.001036,0.088053,0.088053
Endocannabinoid,729,4,24,2,0.005992,0.509286,0.254643
Phosphatidylserine (PS),729,1,24,1,0.032922,2.798354,0.932785
Sphingolipid Metabolism,729,44,24,4,0.050531,4.295167,1.000000
Tryptophan Metabolism,729,15,24,2,0.084012,7.141002,1.000000
...,...,...,...,...,...,...,...
Partially Characterized Molecules,729,5,24,0,1.0,85.000000,1.000000
Tocopherol Metabolism,729,3,24,0,1.0,85.000000,1.000000
Aminosugar Metabolism,729,4,24,0,1.0,85.000000,1.000000
Chemical,729,13,24,0,1.0,85.000000,1.000000


In [210]:
bioyoung_neg_results.head(n=10)

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Plasmalogen,729,21,21,7,1e-06,4.3e-05,4.3e-05
Dipeptide,729,5,21,2,0.007508,0.63817,0.319085
Ceramide PEs,729,1,21,1,0.028807,2.44856,0.816187
Chemical,729,13,21,2,0.050938,4.329738,0.96597
Vitamin B6 Metabolism,729,2,21,1,0.056822,4.829851,0.96597
Urea cycle; Arginine and Proline Metabolism,729,17,21,2,0.08288,7.044795,1.0
"Fatty Acid, Amino",729,3,21,1,0.084066,7.145633,1.0
Sterol,729,5,21,1,0.136323,11.587481,1.0
"Glycine, Serine and Threonine Metabolism",729,9,21,1,0.232443,19.757695,1.0
Histidine Metabolism,729,12,21,1,0.297753,25.309002,1.0


In [211]:
bioyoung_neg_results.head(n=10)

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Plasmalogen,729,21,21,7,1e-06,4.3e-05,4.3e-05
Dipeptide,729,5,21,2,0.007508,0.63817,0.319085
Ceramide PEs,729,1,21,1,0.028807,2.44856,0.816187
Chemical,729,13,21,2,0.050938,4.329738,0.96597
Vitamin B6 Metabolism,729,2,21,1,0.056822,4.829851,0.96597
Urea cycle; Arginine and Proline Metabolism,729,17,21,2,0.08288,7.044795,1.0
"Fatty Acid, Amino",729,3,21,1,0.084066,7.145633,1.0
Sterol,729,5,21,1,0.136323,11.587481,1.0
"Glycine, Serine and Threonine Metabolism",729,9,21,1,0.232443,19.757695,1.0
Histidine Metabolism,729,12,21,1,0.297753,25.309002,1.0


In [212]:
bioold_pos_results.head(n=10)

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Plasmalogen,729,21,99,15,0.0,8.519822e-08,8.519822e-08
Polyamine Metabolism,729,7,99,5,0.000706,0.06001494,0.03000747
Histidine Metabolism,729,12,99,5,0.015112,1.284511,0.4281705
Phenylalanine and Tyrosine Metabolism,729,21,99,6,0.052801,4.48808,0.9237434
Phospholipid Metabolism,729,73,99,15,0.054338,4.618717,0.9237434
"Leucine, Isoleucine and Valine Metabolism",729,28,99,7,0.071979,6.118234,1.0
"Methionine, Cysteine, SAM and Taurine Metabolism",729,18,99,5,0.083839,7.126276,1.0
Mevalonate Metabolism,729,1,99,1,0.135802,11.54321,1.0
Partially Characterized Molecules,729,5,99,2,0.138697,11.78923,1.0
Nicotinate and Nicotinamide Metabolism,729,5,99,2,0.138697,11.78923,1.0


In [213]:
bioold_neg_results

Unnamed: 0,total_size,total_in_set,overall_test_size,test_in_set_aka_overlap,pvalue,bon_adj_pvalue,fdr_adj_pvalue
Steroid,729,26,18,3,0.023037,1.958160,1.0
Tryptophan Metabolism,729,15,18,2,0.050009,4.250749,1.0
Glutathione Metabolism,729,3,18,1,0.072357,6.150346,1.0
Creatine Metabolism,729,3,18,1,0.072357,6.150346,1.0
"Fatty Acid, Dicarboxylate",729,22,18,2,0.09936,8.445567,1.0
...,...,...,...,...,...,...,...
Partially Characterized Molecules,729,5,18,0,1.0,85.000000,1.0
Tocopherol Metabolism,729,3,18,0,1.0,85.000000,1.0
Ascorbate and Aldarate Metabolism,729,3,18,0,1.0,85.000000,1.0
Mevalonate Metabolism,729,1,18,0,1.0,85.000000,1.0


---

# Indiv GLM overlapping table, Table 3

In [214]:
arivale_df_full = pd.read_csv('/notebooks/0. APOE-Multiomics/Data_Files/250217_Arivale_Indiv.csv')

In [215]:
twinsuk_df_full = pd.read_csv('/notebooks/0. APOE-Multiomics/Data_Files/250217_TwinsUK_Indiv.csv')

In [216]:
arivale_df = arivale_df_full[['col1','n','C(APOE_Status, Treatment(reference=1))[T.E2]',
       'C(APOE_Status, Treatment(reference=1))[T.E4]', 'C(APOE_Status, Treatment(reference=1))[T.E2]_p',
       'C(APOE_Status, Treatment(reference=1))[T.E4]_p', 'E2_pval_adj', 'E4_pval_adj']]

In [217]:
arivale_df = arivale_df.rename(columns={'C(APOE_Status, Treatment(reference=1))[T.E2]':'Arivale E2',
                           'C(APOE_Status, Treatment(reference=1))[T.E4]':'Arivale E4',
                           'E2_pval_adj':'Arivale E2 pFDR','E4_pval_adj':'Arivale E4 pFDR'})

In [218]:
arivale_df['Arivale E2'] = arivale_df['Arivale E2'].round(3)
arivale_df['Arivale E4'] = arivale_df['Arivale E4'].round(3)

In [219]:
for i in arivale_df.index:
    if arivale_df['Arivale E2 pFDR'][i]<=0.1:
        arivale_df['Arivale E2'][i] = '*'+str(arivale_df['Arivale E2'][i])+'*'
    elif arivale_df['C(APOE_Status, Treatment(reference=1))[T.E2]_p'][i]>0.05:
        arivale_df['Arivale E2'][i] = 'ns'
    
    if arivale_df['Arivale E4 pFDR'][i]<=0.1:
        arivale_df['Arivale E4'][i] = '*'+str(arivale_df['Arivale E4'][i])+'*'
    elif arivale_df['C(APOE_Status, Treatment(reference=1))[T.E4]_p'][i]>0.05:
        arivale_df['Arivale E4'][i] = 'ns'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  arivale_df['Arivale E2'][i] = '*'+str(arivale_df['Arivale E2'][i])+'*'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  arivale_df['Arivale E4'][i] = 'ns'


In [220]:
arivale_df = arivale_df[['col1', 'Arivale E2', 'Arivale E4']]

In [221]:
twinsuk_df = twinsuk_df_full[['col1','n','C(APOE_Status, Treatment(reference=1))[T.E2]',
       'C(APOE_Status, Treatment(reference=1))[T.E4]', 'C(APOE_Status, Treatment(reference=1))[T.E2]_p',
       'C(APOE_Status, Treatment(reference=1))[T.E4]_p', 'E2_pval_adj', 'E4_pval_adj']]

In [222]:
twinsuk_df = twinsuk_df.rename(columns={'C(APOE_Status, Treatment(reference=1))[T.E2]':'TwinsUK E2',
                           'C(APOE_Status, Treatment(reference=1))[T.E4]':'TwinsUK E4',
                           'E2_pval_adj':'TwinsUK E2 pFDR','E4_pval_adj':'TwinsUK E4 pFDR'})

In [223]:
twinsuk_df['TwinsUK E2'] = twinsuk_df['TwinsUK E2'].round(3)
twinsuk_df['TwinsUK E4'] = twinsuk_df['TwinsUK E4'].round(3)

In [224]:
for i in twinsuk_df.index:
    
    if twinsuk_df['TwinsUK E2 pFDR'][i]<=0.1:
        twinsuk_df['TwinsUK E2'][i] = "*"+str(twinsuk_df['TwinsUK E2'][i])+"*"
    elif twinsuk_df['C(APOE_Status, Treatment(reference=1))[T.E2]_p'][i]>0.05:
        twinsuk_df['TwinsUK E2'][i] = 'ns'
        
    if twinsuk_df['TwinsUK E4 pFDR'][i]<=0.1:
        twinsuk_df['TwinsUK E4'][i] = "*"+str(twinsuk_df['TwinsUK E4'][i])+"*"
    elif twinsuk_df['C(APOE_Status, Treatment(reference=1))[T.E4]_p'][i]>0.05:
        twinsuk_df['TwinsUK E4'][i] = 'ns'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  twinsuk_df['TwinsUK E2'][i] = 'ns'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  twinsuk_df['TwinsUK E4'][i] = 'ns'


In [225]:
twinsuk_df = twinsuk_df[['col1', 'TwinsUK E2', 'TwinsUK E4']]

In [226]:
comp_df = arivale_df.merge(twinsuk_df,on='col1',how='outer')

In [227]:
comp_df.fillna('Not in Dataset', inplace=True)

In [228]:
for i in comp_df.index:
    drop_counter = 0
    if (comp_df['Arivale E2'][i] == 'ns')|(comp_df['Arivale E2'][i] == 'Not in Dataset'):
        drop_counter +=1
    if (comp_df['Arivale E4'][i] == 'ns')|(comp_df['Arivale E4'][i] == 'Not in Dataset'):
        drop_counter +=1
    if (comp_df['TwinsUK E2'][i] == 'ns')|(comp_df['TwinsUK E2'][i] == 'Not in Dataset'):
        drop_counter +=1
    if (comp_df['TwinsUK E4'][i] == 'ns')|(comp_df['TwinsUK E4'][i] == 'Not in Dataset'):
        drop_counter +=1
    if drop_counter >= 3:
        comp_df.drop(index=i,inplace=True)

In [229]:
comp_df['col1'] = comp_df['col1'].astype(str)
comp_df.sort_values(by='col1',ascending = True)

Unnamed: 0,col1,Arivale E2,Arivale E4,TwinsUK E2,TwinsUK E4
65,1-(1-enyl-stearoyl)-2-arachidonoyl-GPC (P-18:0...,0.17,-0.116,ns,ns
12,1-(1-enyl-stearoyl)-2-dihomo-linolenoyl-GPE (P...,0.19,-0.126,Not in Dataset,Not in Dataset
193,1-(1-enyl-stearoyl)-2-oleoyl-GPE (P-18:0/18:1),0.163,ns,0.21,ns
47,1-docosapentaenoyl-GPC (22:5n3)*,ns,0.108,ns,0.148
8,1-linoleoylglycerol (18:2),*0.23*,0.15,ns,0.184
62,1-oleoylglycerol (18:1),0.202,0.112,ns,0.15
138,1-stearoyl-2-arachidonoyl-GPE (18:0/20:4),0.166,ns,0.156,ns
35,1-stearoyl-2-linoleoyl-GPE (18:0/18:2)*,ns,0.134,ns,0.128
10,1-stearoyl-GPE (18:0),ns,0.137,ns,0.16
61,1-stearyl-2-arachidonoyl-GPC (O-18:0/20:4)*,*0.232*,-0.11,Not in Dataset,Not in Dataset


In [230]:
# later sorted to unidentified metabs on bottom