# TwinsUK inter-omic interaction analysis

Inputs: Arivale_metabolomics_metadata.csv; E1199_15122022_2_TwinsUK_Interaction_Analysis.csv  
Outputs: Data organized into Supplementary File 5

'Save' lines are commented out  

In [None]:
# Load packages
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import scipy
import sys
import math
from datetime import datetime
import logging
logging.basicConfig(level=logging.INFO)

logger = logging.getLogger('analytics')
logger.setLevel(logging.INFO)

# Set up logging
from analytics.util.analytics_logger import GetAnalyticsLogger
import logging
logger = GetAnalyticsLogger()
logger.setLevel(logging.INFO)

In [None]:
df_analysis_5SD_valid = pd.read_csv('/notebooks/0. APOE-Multiomics/Data_Files/E1199_15122022_2_TwinsUK_Interaction_Analysis.csv').drop(columns=['Unnamed: 0'])

In [None]:
# get chem and metab lists
chem_list = df_analysis_5SD_valid.columns[-17:-5].to_list()
chem_list.append('IGF-1')
metab_list = df_analysis_5SD_valid.columns[10:-24].to_list()

# Correlation network

In [None]:
import statsmodels.api as sm
import statsmodels.formula.api as smf
import statsmodels.sandbox.stats.multicomp
from statsmodels.genmod.families import family, links
import itertools

In [None]:
analytes = chem_list + metab_list

In [None]:
pairs = list(itertools.combinations(analytes, 2))

In [None]:
pairs[0:5]

In [None]:
# restrict data to subset to be tested
df_F = df_analysis_5SD_valid[(df_analysis_5SD_valid.Sex == 'F')]
df_FE2 = df_F[(df_F.APOE_Status == 'E3')|(df_F.APOE_Status == 'E2')]

In [None]:
# restrict data to subset to be tested
df_M = df_analysis_5SD_valid[(df_analysis_5SD_valid.Sex == 'M')]
df_ME2 = df_M[(df_M.APOE_Status == 'E3')|(df_M.APOE_Status == 'E2')]

In [None]:
# restrict data to subset to be tested
df_F = df_analysis_5SD_valid[(df_analysis_5SD_valid.Sex == 'F')]
df_FE4 = df_F[(df_F.APOE_Status == 'E3')|(df_F.APOE_Status == 'E4')]

In [None]:
# restrict data to subset to be tested
df_M = df_analysis_5SD_valid[(df_analysis_5SD_valid.Sex == 'M')]
df_ME4 = df_M[(df_M.APOE_Status == 'E3')|(df_M.APOE_Status == 'E4')]

In [None]:
# restrict data to subset to be tested
df_M = df_analysis_5SD_valid[(df_analysis_5SD_valid.Sex == 'M')]
df_Mhealth = df_M[(df_M.Model_Health == 'Bio_Young')|(df_M.Model_Health == 'BA_equals_CA')]

In [None]:
# restrict data to subset to be tested
df_F = df_analysis_5SD_valid[(df_analysis_5SD_valid.Sex == 'F')]
df_Fhealth = df_F[(df_F.Model_Health == 'Bio_Young')|(df_F.Model_Health == 'BA_equals_CA')]

In [None]:
# restrict data to subset to be tested
df_M = df_analysis_5SD_valid[(df_analysis_5SD_valid.Sex == 'M')]
df_Munhealth = df_M[(df_M.Model_Health == 'Bio_Old')|(df_M.Model_Health == 'BA_equals_CA')]

In [None]:
# restrict data to subset to be tested
df_F = df_analysis_5SD_valid[(df_analysis_5SD_valid.Sex == 'F')]
df_Funhealth = df_F[(df_F.Model_Health == 'Bio_Old')|(df_F.Model_Health == 'BA_equals_CA')]

In [None]:
# #TwinsUK Chem list: Arivale Chem List
# ['Urate' : 'URIC ACID',
#  'HbA1c' : 'GLYCOHEMOGLOBIN A1C',
#  'Creatinine' : 'CREATININE ENZ, SER',
#  'Triglycerides' : 'TRIGLYCERIDES',
#  'Urea' : 'UREA NITROGEN',
#  'hs_CRP' : 'CRP HIGH SENSITIVITY',
#  'Glucose' : 'GLUCOSE',
#  'Haemoglobin' : 'HEMOGLOBIN',
#  'eGFR' : 'GFR, MDRD',
#  'Tot-cholesterol' : 'CHOLESTEROL, TOTAL',
#  'HDL' : 'HDL CHOL DIRECT',
#  'LDL' : 'LDL-CHOL CALCULATION',
#  'IGF-1' : nan]

# Select specific metabs to test based on sub-pathway

In [None]:
metam = pd.read_csv('/notebooks/0. APOE-Multiomics/Data_Files/Arivale_metabolomics_metadata.csv').drop(columns=['Unnamed: 0'])
metam['CHEMICAL_ID'] = metam['CHEMICAL_ID'].astype(str)

In [None]:
metabs_to_add = []

In [None]:
metabs_to_add += metam[metam.SUB_PATHWAY=='TCA Cycle'].BIOCHEMICAL_NAME.to_list()
metabs_to_add += metam[metam.SUB_PATHWAY=='Glycolysis, Gluconeogenesis, and Pyruvate Metabolism'].BIOCHEMICAL_NAME.to_list()
metabs_to_add += metam[metam.SUB_PATHWAY=='Fructose, Mannose and Galactose Metabolism'].BIOCHEMICAL_NAME.to_list()
metabs_to_add += metam[metam.SUB_PATHWAY=='Pentose Metabolism'].BIOCHEMICAL_NAME.to_list()
metabs_to_add += metam[metam.SUB_PATHWAY=='Oxidative Phosphorylation'].BIOCHEMICAL_NAME.to_list()
metabs_to_add += metam[metam.SUB_PATHWAY=='Phospholipid Metabolism'].BIOCHEMICAL_NAME.to_list()
metabs_to_add += metam[metam.SUB_PATHWAY=='Sphingolipid Metabolism'].BIOCHEMICAL_NAME.to_list()
metabs_to_add += metam[metam.SUB_PATHWAY=='Leucine, Isoleucine and Valine Metabolism'].BIOCHEMICAL_NAME.to_list()
metabs_to_add += metam[metam.SUB_PATHWAY=='Diacylglycerol'].BIOCHEMICAL_NAME.to_list()

In [None]:
valid_test = []

for i in metabs_to_add:
    
    valid_test.append(('Glucose',i))
    
    valid_test.append(('Triglycerides',i))
    
    valid_test.append(('HDL',i))
    
    valid_test.append(('LDL',i))
                       
    valid_test.append(('Tot-cholesterol',i))
                       
    # energy_valid_test.append(('HbA1c',i))

# Select specific metabs to test based on Arivale results

In [None]:
FE2_valid_list = []

In [None]:
ME2_valid_list = [('Triglycerides', 'ribitol'),
 ('HbA1c', 'phenol sulfate'),
 ('Triglycerides', 'ceramide (d18:1/20:0, d16:1/22:0, d20:1/18:0)*'),
 ('Glucose', 'phenol sulfate')]

In [None]:
FE4_valid_list = []

In [None]:
ME4_valid_list = []

In [None]:
Fhealth_valid_list = []

In [None]:
Mhealth_valid_list = []

In [None]:
Funhealth_valid_list = [('HbA1c', 'alpha-ketobutyrate'),
 ('HbA1c', 'gluconate'),
 ('HbA1c', '1-(1-enyl-palmitoyl)-2-myristoyl-GPC (P-16:0/14:0)*'),
 ('HbA1c', 'X - 19438'),
 ('HbA1c', 'taurine'),
 ('HbA1c', '2-hydroxybutyrate/2-hydroxyisobutyrate'),
 ('HbA1c', 'X - 24295'),
 ('HbA1c', 'alpha-ketoglutarate'),
 ('HbA1c', '4-methyl-2-oxopentanoate'),
 ('HbA1c', 'fructose'),
 ('Glucose', 'gluconate'),
 ('Glucose', '1-(1-enyl-palmitoyl)-2-myristoyl-GPC (P-16:0/14:0)*'),
 ('HbA1c', 'N-acetylleucine'),
 ('HbA1c', '3-methyl-2-oxovalerate'),
 ('HbA1c', 'pyruvate'),
 ('HbA1c', 'margarate (17:0)'),
 ('Glucose', 'alpha-ketobutyrate'),
 ('HbA1c', '3-hydroxy-2-ethylpropionate'),
 ('HbA1c', 'mannose'),
 ('Urea', 'linolenoylcarnitine (C18:3)*'),
 ('Glucose', 'X - 16087')]

In [None]:
Munhealth_valid_list = [('Glucose', 'N-acetylvaline'),
 ('Glucose', 'linoleoyl-linoleoyl-glycerol (18:2/18:2) [1]*'),
 ('Glucose', 'mannose'),
 ('HbA1c', 'N-acetylvaline'),
 ('Glucose', 'glutamate'),
 ('HbA1c', 'linoleoyl-linoleoyl-glycerol (18:2/18:2) [1]*'),
 ('HbA1c', 'aspartate'),
 ('Glucose', 'palmitoleoyl-linoleoyl-glycerol (16:1/18:2) [1]*'),
 ('HbA1c', 'pyruvate'),
 ('Glucose', 'pyruvate'),
 ('Glucose', 'leucine'),
 ('Glucose', 'aspartate'),
 ('Glucose', 'palmitoyl-linoleoyl-glycerol (16:0/18:2) [2]*'),
 ('Haemoglobin', 'N-acetylthreonine'),
 ('HbA1c', 'glutamate'),
 ('Haemoglobin', 'gamma-glutamylglycine'),
 ('HbA1c', '1-carboxyethylvaline'),
 ('HbA1c', 'linoleoyl-arachidonoyl-glycerol (18:2/20:4) [1]*'),
 ('Glucose', 'oleoyl-linoleoyl-glycerol (18:1/18:2) [1]'),
 ('HbA1c', 'isoleucine'),
 ('Glucose', 'cortolone glucuronide (1)'),
 ('HbA1c', 'leucine'),
 ('Glucose', 'butyrylcarnitine (C4)'),
 ('Glucose', 'phenol sulfate'),
 ('Glucose', 'linoleoyl-arachidonoyl-glycerol (18:2/20:4) [1]*'),
 ('HbA1c', '1-carboxyethylleucine'),
 ('HbA1c', 'phenol sulfate'),
 ('HbA1c', 'X - 24337'),
 ('HbA1c', 'X - 16087'),
 ('HbA1c', 'gamma-glutamylisoleucine*'),
 ('Glucose', 'creatine'),
 ('Glucose', '1-(1-enyl-palmitoyl)-2-oleoyl-GPC (P-16:0/18:1)*'),
 ('HbA1c', 'palmitoyl-linoleoyl-glycerol (16:0/18:2) [2]*'),
 ('Glucose', 'X - 23641'),
 ('HbA1c', 'lactate'),
 ('Glucose', '1-stearoyl-2-dihomo-linolenoyl-GPE (18:0/20:3n3 or 6)*'),
 ('Glucose', 'hydroxyasparagine**'),
 ('HbA1c', 'palmitoyl-linoleoyl-glycerol (16:0/18:2) [1]*'),
 ('Glucose', 'X - 16087'),
 ('Glucose', 'gamma-glutamylleucine'),
 ('Glucose', 'oleoyl-oleoyl-glycerol (18:1/18:1)  [1]*'),
 ('Glucose', 'oleoyl-linoleoyl-glycerol (18:1/18:2) [2]'),
 ('Glucose', 'valine'),
 ('HbA1c', 'mannose'),
 ('HbA1c', 'glutamine conjugate of C6H10O2 (1)*'),
 ('Glucose', 'sphingomyelin (d18:1/22:1, d18:2/22:0, d16:1/24:1)*'),
 ('Glucose', '1-oleoylglycerol (18:1)'),
 ('HbA1c', 'N-acetylisoleucine'),
 ('Haemoglobin', 'X - 12026'),
 ('HbA1c', 'oleoyl-linoleoyl-glycerol (18:1/18:2) [1]'),
 ('Glucose', 'linoleoyl-linolenoyl-glycerol (18:2/18:3) [2]*'),
 ('Glucose', 'hydroquinone sulfate'),
 ('HbA1c', 'N-acetylaspartate (NAA)'),
 ('Glucose', 'palmitoyl-oleoyl-glycerol (16:0/18:1) [1]*'),
 ('Glucose', 'choline'),
 ('Glucose', 'linoleoyl ethanolamide'),
 ('Glucose', '1-carboxyethylvaline'),
 ('Glucose', '1-linoleoyl-GPC (18:2)'),
 ('HbA1c', 'X - 12101'),
 ('HbA1c', 'palmitoleoyl-linoleoyl-glycerol (16:1/18:2) [1]*'),
 ('HbA1c', 'N2,N2-dimethylguanosine'),
 ('Glucose', 'palmitoyl-linoleoyl-glycerol (16:0/18:2) [1]*')]

In [None]:
E2_allele_valid_list = [('HbA1c', 'malate'),
 ('HbA1c', 'fumarate'),
 ('Glucose', 'aconitate [cis or trans]'),
 ('Glucose', '3-hydroxy-2-ethylpropionate'),
 ('Tot-cholesterol', 'arabonate/xylonate')]

In [None]:
E4_allele_valid_list = []

In [None]:
delta_age_continuous_valid_list = [('Insulin', 'eicosenoate (20:1)'),
 ('Tot-cholesterol', 'alpha-hydroxyisocaproate'),
 ('Tot-cholesterol', 'sphingomyelin (d18:2/23:1)*'),
 ('Tot-cholesterol', 'leucine'),
 ('Tot-cholesterol', '1-palmitoyl-GPE (16:0)'),
 ('Glucose', 'palmitate (16:0)'),
 ('Glucose', 'oleoyl-arachidonoyl-glycerol (18:1/20:4) [2]*'),
 ('Glucose', '1-carboxyethylleucine'),
 ('Glucose', '3,5-dichloro-2,6-dihydroxybenzoic acid'),
 ('Glucose', 'X - 23639'),
 ('Glucose', 'palmitoyl-oleoyl-glycerol (16:0/18:1) [2]*'),
 ('Glucose', 'beta-cryptoxanthin'),
 ('Glucose', '1-stearoyl-2-docosapentaenoyl-GPC (18:0/22:5n6)*'),
 ('Glucose', '1-eicosenoyl-GPC (20:1)*'),
 ('Glucose', 'valine'),
 ('Glucose', 'arginine'),
 ('Glucose', '1-oleoyl-GPC (18:1)'),
 ('Glucose', 'X - 24337'),
 ('Glucose', 'dihomo-linoleate (20:2n6)'),
 ('Glucose', 'glutamine conjugate of C6H10O2 (1)*'),
 ('Glucose', '1-(1-enyl-palmitoyl)-GPC (P-16:0)*'),
 ('Glucose', 'glutamine'),
 ('Glucose', 'behenoyl dihydrosphingomyelin (d18:0/22:0)*'),
 ('Glucose', 'aspartate'),
 ('Glucose', '1-linoleoyl-2-docosahexaenoyl-GPC (18:2/22:6)*'),
 ('Glucose', 'S-methylcysteine sulfoxide'),
 ('Glucose', '1,5-anhydroglucitol (1,5-AG)'),
 ('Glucose', 'S-methylcysteine'),
 ('Glucose', 'oxalate (ethanedioate)'),
 ('Glucose', 'hexanoylglutamine'),
 ('Glucose', '(S)-3-hydroxybutyrylcarnitine'),
 ('Glucose', 'tyrosine'),
 ('Glucose', 'alpha-ketoglutarate'),
 ('Glucose', '1-stearoyl-2-docosapentaenoyl-GPE (18:0/22:5n6)*'),
 ('Glucose', 'palmitoyl-linoleoyl-glycerol (16:0/18:2) [2]*'),
 ('Glucose', '1-margaroyl-GPC (17:0)'),
 ('Glucose', 'adrenate (22:4n6)'),
 ('Glucose', '3-hydroxyisobutyrate'),
 ('Glucose', '2-hydroxybutyrate/2-hydroxyisobutyrate'),
 ('Glucose', 'gamma-glutamylisoleucine*'),
 ('Glucose', 'palmitoyl-linoleoyl-glycerol (16:0/18:2) [1]*'),
 ('Glucose', '3-methyl-2-oxovalerate'),
 ('Glucose', 'linoleoyl-arachidonoyl-glycerol (18:2/20:4) [1]*'),
 ('Glucose', '1-palmityl-2-oleoyl-GPC (O-16:0/18:1)*'),
 ('Glucose', 'aconitate [cis or trans]'),
 ('Glucose', 'glycodeoxycholate'),
 ('Glucose', 'gamma-glutamylmethionine'),
 ('Glucose', 'linoleoyl-linoleoyl-glycerol (18:2/18:2) [1]*'),
 ('Glucose', 'leucine'),
 ('Glucose', 'X - 19438'),
 ('Glucose', 'lactate'),
 ('Glucose', 'gluconate'),
 ('Glucose', 'fructose'),
 ('Glucose', 'pyruvate'),
 ('Glucose', '1-carboxyethylphenylalanine'),
 ('Glucose', 'tartronate (hydroxymalonate)'),
 ('Glucose', 'stearate (18:0)'),
 ('Glucose', 'oleate/vaccenate (18:1)'),
 ('Glucose', '1,2-dilinoleoyl-GPC (18:2/18:2)'),
 ('Glucose', '1-(1-enyl-palmitoyl)-2-oleoyl-GPC (P-16:0/18:1)*'),
 ('Glucose', 'ribonate'),
 ('Glucose', 'gamma-glutamylcitrulline*'),
 ('Glucose', 'X - 24295'),
 ('Glucose', 'X - 16087'),
 ('Glucose', 'eicosenoate (20:1)'),
 ('Glucose', 'hydroxypalmitoyl sphingomyelin (d18:1/16:0(OH))**'),
 ('Glucose', 'metabolonic lactone sulfate'),
 ('Glucose', 'ribitol'),
 ('Glucose', 'X - 14056'),
 ('Glucose', 'alpha-ketobutyrate'),
 ('Glucose', 'N-acetylleucine'),
 ('Glucose', 'taurine'),
 ('Glucose', 'N-stearoyl-sphinganine (d18:0/18:0)*'),
 ('Glucose', 'docosapentaenoate (n6 DPA; 22:5n6)'),
 ('Glucose', '1-dihomo-linoleoyl-GPC (20:2)*'),
 ('Glucose', '5-hydroxylysine'),
 ('Glucose', 'hydantoin-5-propionate'),
 ('Glucose', 'X - 21829'),
 ('Glucose', '2-hydroxydecanoate'),
 ('Glucose', '3-methyl-2-oxobutyrate'),
 ('Glucose', 'asparagine'),
 ('Glucose', '4-methyl-2-oxopentanoate'),
 ('Glucose', 'glutamate'),
 ('Glucose', 'palmitoyl-oleoyl-glycerol (16:0/18:1) [1]*'),
 ('Glucose', 'N-acetylaspartate (NAA)'),
 ('Glucose', 'mannose'),
 ('Glucose', '3-hydroxy-2-ethylpropionate'),
 ('Glucose', '1-linoleoyl-GPC (18:2)'),
 ('Glucose', 'margarate (17:0)'),
 ('Glucose', '1-carboxyethylvaline'),
 ('Glucose', 'palmitoyl-arachidonoyl-glycerol (16:0/20:4) [2]*'),
 ('Glucose', 'gamma-glutamylglutamine'),
 ('Glucose', 'isoleucine'),
 ('Glucose', 'oleoyl-arachidonoyl-glycerol (18:1/20:4) [1]*'),
 ('Glucose', 'docosadienoate (22:2n6)'),
 ('Triglycerides', 'indolepropionate'),
 ('Triglycerides', '(16 or 17)-methylstearate (a19:0 or i19:0)'),
 ('HbA1c', 'deoxycholic acid 12-sulfate*'),
 ('HbA1c', 'palmitate (16:0)'),
 ('HbA1c', '1-carboxyethylleucine'),
 ('HbA1c', '1-arachidonoyl-GPC (20:4n6)*'),
 ('HbA1c', 'X - 23639'),
 ('HbA1c', 'glucose'),
 ('HbA1c', 'beta-cryptoxanthin'),
 ('HbA1c', 'sphingomyelin (d18:1/22:1, d18:2/22:0, d16:1/24:1)*'),
 ('HbA1c', 'glycodeoxycholate 3-sulfate'),
 ('HbA1c', '1-eicosenoyl-GPC (20:1)*'),
 ('HbA1c', 'valine'),
 ('HbA1c', '1-docosapentaenoyl-GPC (22:5n3)*'),
 ('HbA1c', '1-oleoyl-GPC (18:1)'),
 ('HbA1c', 'arachidonoylcholine'),
 ('HbA1c', 'X - 24337'),
 ('HbA1c', 'dihomo-linoleate (20:2n6)'),
 ('HbA1c', 'linoleoylcholine*'),
 ('HbA1c', 'glutamine conjugate of C6H10O2 (1)*'),
 ('HbA1c', 'sphingomyelin (d18:1/24:1, d18:2/24:0)*'),
 ('HbA1c', '1-(1-enyl-palmitoyl)-GPC (P-16:0)*'),
 ('HbA1c', 'glutamine'),
 ('HbA1c', '(R)-3-hydroxybutyrylcarnitine'),
 ('HbA1c', 'glycerate'),
 ('HbA1c', 'aspartate'),
 ('HbA1c', '1-linoleoyl-2-docosahexaenoyl-GPC (18:2/22:6)*'),
 ('HbA1c', '1,5-anhydroglucitol (1,5-AG)'),
 ('HbA1c', 'oxalate (ethanedioate)'),
 ('HbA1c', 'hexanoylglutamine'),
 ('HbA1c', '(S)-3-hydroxybutyrylcarnitine'),
 ('HbA1c', 'alpha-ketoglutarate'),
 ('HbA1c', 'palmitoyl-linoleoyl-glycerol (16:0/18:2) [2]*'),
 ('HbA1c', '10-nonadecenoate (19:1n9)'),
 ('HbA1c', 'adrenate (22:4n6)'),
 ('HbA1c', '3-hydroxyisobutyrate'),
 ('HbA1c', '2-hydroxybutyrate/2-hydroxyisobutyrate'),
 ('HbA1c', 'glutamine conjugate of C7H12O2*'),
 ('HbA1c', 'X - 11315'),
 ('HbA1c', 'gamma-glutamylisoleucine*'),
 ('HbA1c', 'palmitoyl-linoleoyl-glycerol (16:0/18:2) [1]*'),
 ('HbA1c', '4-allylphenol sulfate'),
 ('HbA1c', '3-methyl-2-oxovalerate'),
 ('HbA1c', 'linoleoyl-arachidonoyl-glycerol (18:2/20:4) [1]*'),
 ('HbA1c', 'aconitate [cis or trans]'),
 ('HbA1c', 'glycodeoxycholate'),
 ('HbA1c', 'gamma-glutamylleucine'),
 ('HbA1c', 'linoleate (18:2n6)'),
 ('HbA1c', 'linoleoyl-linoleoyl-glycerol (18:2/18:2) [1]*'),
 ('HbA1c', 'leucine'),
 ('HbA1c', 'X - 19438'),
 ('HbA1c', 'lactate'),
 ('HbA1c', 'gluconate'),
 ('HbA1c', 'fructose'),
 ('HbA1c', 'pyruvate'),
 ('HbA1c', '1-carboxyethylphenylalanine'),
 ('HbA1c', 'tartronate (hydroxymalonate)'),
 ('HbA1c', 'stearate (18:0)'),
 ('HbA1c', 'oleate/vaccenate (18:1)'),
 ('HbA1c', '1-(1-enyl-palmitoyl)-2-linoleoyl-GPC (P-16:0/18:2)*'),
 ('HbA1c', '1-(1-enyl-palmitoyl)-2-oleoyl-GPC (P-16:0/18:1)*'),
 ('HbA1c', 'palmitoylcholine'),
 ('HbA1c', 'maleate'),
 ('HbA1c', '3-hydroxyoctanoate'),
 ('HbA1c', 'X - 24295'),
 ('HbA1c', 'eicosenoate (20:1)'),
 ('HbA1c', 'metabolonic lactone sulfate'),
 ('HbA1c', 'X - 14056'),
 ('HbA1c', 'alpha-ketobutyrate'),
 ('HbA1c', '3-indoleglyoxylic acid'),
 ('HbA1c', 'N-acetylleucine'),
 ('HbA1c', '6-oxopiperidine-2-carboxylate'),
 ('HbA1c', 'taurine'),
 ('HbA1c', 'gamma-CEHC'),
 ('HbA1c', 'docosapentaenoate (n6 DPA; 22:5n6)'),
 ('HbA1c', 'X - 21339'),
 ('HbA1c', 'beta-hydroxyisovalerate'),
 ('HbA1c', 'gamma-glutamylvaline'),
 ('HbA1c', '1-dihomo-linoleoyl-GPC (20:2)*'),
 ('HbA1c', 'N-acetylglutamate'),
 ('HbA1c', 'N-acetylvaline'),
 ('HbA1c', 'X - 21829'),
 ('HbA1c', 'N-delta-acetylornithine'),
 ('HbA1c', '3-methyl-2-oxobutyrate'),
 ('HbA1c', 'glutamate'),
 ('HbA1c', 'N-acetylaspartate (NAA)'),
 ('HbA1c', 'mannose'),
 ('HbA1c', '3-hydroxy-2-ethylpropionate'),
 ('HbA1c', 'sphingomyelin (d18:2/24:1, d18:1/24:2)*'),
 ('HbA1c', '1-linoleoyl-GPC (18:2)'),
 ('HbA1c', 'X - 16935'),
 ('HbA1c', 'margarate (17:0)'),
 ('HbA1c', '1-carboxyethylvaline'),
 ('HbA1c', '3beta-hydroxy-5-cholestenoate'),
 ('HbA1c', 'palmitoyl-arachidonoyl-glycerol (16:0/20:4) [2]*'),
 ('HbA1c', 'gamma-glutamylglutamine'),
 ('HbA1c', 'isoleucine'),
 ('HbA1c', 'docosadienoate (22:2n6)'),
 ('HbA1c', 'proline'),
 ('Haemoglobin', '2,3-dihydroxy-5-methylthio-4-pentenoate (DMTPA)*'),
 ('Haemoglobin', 'X - 12007'),
 ('Haemoglobin', 'pregnenetriol disulfate*'),
 ('Haemoglobin', '1-methyl-5-imidazoleacetate'),
 ('Haemoglobin', 'creatine'),
 ('Haemoglobin', 'pyroglutamine*'),
 ('LDL', 'valine'),
 ('eGFR', '1-ribosyl-imidazoleacetate*'),
 ('HDL', '1-(1-enyl-palmitoyl)-2-palmitoleoyl-GPC (P-16:0/16:1)*'),
 ('HDL', '1-palmityl-2-linoleoyl-GPC (O-16:0/18:2)*'),
 ('HDL', 'palmitoyl-linoleoyl-glycerol (16:0/18:2) [2]*'),
 ('HDL', '1-palmityl-2-oleoyl-GPC (O-16:0/18:1)*'),
 ('HDL', '1-(1-enyl-palmitoyl)-2-myristoyl-GPC (P-16:0/14:0)*'),
 ('HDL', '1-(1-enyl-palmitoyl)-2-oleoyl-GPC (P-16:0/18:1)*'),
 ('Urate', 'valine'),
 ('Urate', '1-stearoyl-2-linoleoyl-GPI (18:0/18:2)'),
 ('Urate', '1-palmitoyl-2-oleoyl-GPI (16:0/18:1)*'),
 ('Creatinine', '1-methyl-4-imidazoleacetate'),
 ('Creatinine', '1-ribosyl-imidazoleacetate*'),
 ('Creatinine', 'X - 17351'),
 ('Creatinine', 'X - 21821')]

# Run glms

### E2

In [None]:
# this is edited slightly for each group (E2, E4, bio young, bio old)
def run_interaction_analysis(screened_pairs, dat, analytes, chems, metabs, max_run=None):

    print('Running {} pairs'.format(len(screened_pairs)))
         
    count = 0 
    done = 0
    skipped = 0
    completed = 0

    results = []
#     columns = list(dat.columns)

    start_time = datetime.now()
    for (col1, col2) in screened_pairs:
        
        if (col2 in metabs) == False:
            print('{} not in TwinsUK metabs'.format(col2))
            skipped += 1
            continue
            

        # Default is gaussian
        family_type = family.Gaussian()
        family_type.link = links.identity()
        family_name = 'Gaussian'
        family_link = 'Identity'

        # Covariance structure
        cov = sm.cov_struct.Exchangeable()

        sub = dat[['PublicID', col1, col2, 'Age', 'Sex', 'APOE_Status', 'Model_Health', 'Statin_User', 'BMI']].copy() #, 'MetBatch']].copy()
        sub.dropna(subset = [col1,col2], inplace=True)
        sub.drop_duplicates(subset = ['PublicID'], keep='first', inplace=True)
        sub.rename(columns={col1:'analyte1'}, inplace=True)
        sub.rename(columns={col2:'analyte2'}, inplace=True)

        if (sub['analyte1'].skew() > 1.5) | (sub['analyte1'].skew() < -1.5):

            #logger.info('Setting gamma family for skewed analyte %s'%(col))

            # Set any zero values to 1/2 the smallest value
            sub.loc[sub['analyte1']==0, 'analyte1'] = (sub.loc[sub['analyte1']>0, 'analyte1'].min() / 2.0)

            family_type = family.Gamma()
            family_type.link = links.log()
            family_name = 'Gamma'
            family_link = 'Log'
            
            # print('Used Gamma family with Log link for analytes {} {}'.format(col1, col2))

        try:
            
            ols_model = 'analyte1 ~ analyte2*C(APOE_Status, Treatment(reference=1)) + Age + BMI + Statin_User' # + MetBatch'
            # ols_model = 'analyte1 ~ analyte2*C(APOE_Status) + Age + BMI + Statin_User'
            # ols_model = 'analyte1 ~ analyte2*C(Model_Health) + Age + BMI + Statin_User'
            # ols_model = 'analyte1 ~ analyte2*C(Model_Health) + Age + BMI + Statin_User'
            #
            
            fitted_model = smf.glm(ols_model, data=sub, family=family_type, missing='drop').fit(maxiter=2000)
            result_to_append = (col1, col2, len(fitted_model.fittedvalues), fitted_model.converged, *fitted_model.params, *fitted_model.pvalues)
            if len(result_to_append) == 18:
                # make sure that each coefficient is represented in the model, avoid an error at the end
                fitted_model_to_use = fitted_model
                results.append(result_to_append)
            else:
                print('Failed analytes {} {}: not all coefficients were represented'.format(col1, col2))
                skipped += 1

        except Exception as e:
            print('Failed analytes {} {} with error {}'.format(col1, col2, str(e)))
            skipped += 1

        count += 1
        if (max_run is not None) and (count >= max_run):
            break
        
        if (count % 1000) == 0:

            elapsed_time = datetime.now() - start_time    
            print('Finished {} in {:.3f} seconds (skipped {})'.format(count, elapsed_time.total_seconds(), skipped))
      
    elapsed_time = datetime.now() - start_time    
    print('Complete! Yay! Finished {} in {:.3f} seconds (skipped {})'.format(count, elapsed_time.total_seconds(), skipped))
    
    df = pd.DataFrame(results, columns=['col1', 'col2', 'n', 'converged', *fitted_model_to_use.params.index, *[str(x)+'_p' for x in fitted_model_to_use.pvalues.index]])
    
    df.sort_values(['analyte2:C(APOE_Status, Treatment(reference=1))[T.E2]_p'], ascending=True, inplace=True)
    np.seterr(all='warn')
    (adj_pval_index, adj_pval, _, _) = statsmodels.sandbox.stats.multicomp.multipletests(df.loc[~df['analyte2:C(APOE_Status, Treatment(reference=1))[T.E2]_p'].isnull(), 'analyte2:C(APOE_Status, Treatment(reference=1))[T.E2]_p'], alpha=0.05, method='fdr_bh')
    df.loc[~df['analyte2:C(APOE_Status, Treatment(reference=1))[T.E2]_p'].isnull(), 'pval_adj'] = adj_pval
    df.sort_values(['pval_adj'], ascending=True, inplace=True)
    
#     df.sort_values(['analyte2:C(APOE_Status)[T.E4]_p'], ascending=True, inplace=True)
#     np.seterr(all='warn')
#     (adj_pval_index, adj_pval, _, _) = statsmodels.sandbox.stats.multicomp.multipletests(df.loc[~df['analyte2:C(APOE_Status)[T.E4]_p'].isnull(), 'analyte2:C(APOE_Status)[T.E4]_p'], alpha=0.05, method='fdr_bh')
#     df.loc[~df['analyte2:C(APOE_Status)[T.E4]_p'].isnull(), 'pval_adj'] = adj_pval
#     df.sort_values(['pval_adj'], ascending=True, inplace=True)
    
#     df.sort_values(['analyte2:C(Model_Health)[T.Bio_Old]_p'], ascending=True, inplace=True)
#     np.seterr(all='warn')
#     (adj_pval_index, adj_pval, _, _) = statsmodels.sandbox.stats.multicomp.multipletests(df.loc[~df['analyte2:C(Model_Health)[T.Bio_Old]_p'].isnull(), 'analyte2:C(Model_Health)[T.Bio_Old]_p'], alpha=0.05, method='fdr_bh')
#     df.loc[~df['analyte2:C(Model_Health)[T.Bio_Old]_p'].isnull(), 'pval_adj'] = adj_pval
#     df.sort_values(['pval_adj'], ascending=True, inplace=True)

    # df.sort_values(['analyte2:C(Model_Health)[T.Bio_Young]_p'], ascending=True, inplace=True)
    # np.seterr(all='warn')
    # (adj_pval_index, adj_pval, _, _) = statsmodels.sandbox.stats.multicomp.multipletests(df.loc[~df['analyte2:C(Model_Health)[T.Bio_Young]_p'].isnull(), 'analyte2:C(Model_Health)[T.Bio_Young]_p'], alpha=0.05, method='fdr_bh')
    # df.loc[~df['analyte2:C(Model_Health)[T.Bio_Young]_p'].isnull(), 'pval_adj'] = adj_pval
    # df.sort_values(['pval_adj'], ascending=True, inplace=True)
    
    return df

### E2 female

In [None]:
import sys
sys.stdout = open("240806_log_FE2.txt", "a")

In [None]:
print('log open')

In [None]:
temp_valid_list = list(set(valid_test+FE2_valid_list))

In [None]:
interact_glm_FE2_compare = run_interaction_analysis(temp_valid_list, df_FE2, analytes, chem_list, metab_list)

In [None]:
interact_glm_FE2_compare.head(n=5)

In [None]:
# interact_glm_FE2_compare.to_csv('/notebooks/Final_Paper/240423_Investigate_Redo_Interactions/Validation_Folder/output/240806_TwinsUK_FE2_5SD_valid_interact.csv')

In [None]:
temp1 = interact_glm_FE2_compare.copy()
temp2 = FE2_valid_list.copy()

col1_list = []
col2_list = []

for (col1, col2) in temp2:
    col1_list.append(col1)
    col2_list.append(col2)
    
for i in temp1.index:
    if temp1.col1[i] not in col1_list or temp1.col2[i] not in col2_list:
        temp1.drop(i,inplace=True)

s = [0,1,2,8,15,18]

temp1[list(np.array(temp1.columns[s]))]

### E2 male

In [None]:
import sys
sys.stdout = open("240806_log_ME2.txt", "a")

In [None]:
temp_valid_list = list(set(valid_test+ME2_valid_list))

In [None]:
interact_glm_ME2_compare = run_interaction_analysis(temp_valid_list, df_ME2, analytes, chem_list, metab_list)

In [None]:
# interact_glm_ME2_compare.to_csv('/notebooks/Final_Paper/240423_Investigate_Redo_Interactions/Validation_Folder/output/240806_TwinsUK_ME2_5SD_valid_interact.csv')

### E4

In [None]:
# this is edited slightly for each group (E2, E4, bio young, bio old)
def run_interaction_analysis(screened_pairs, dat, analytes, chems, metabs, max_run=None):

    print('Running {} pairs'.format(len(screened_pairs)))
         
    count = 0 
    done = 0
    skipped = 0
    completed = 0

    results = []
#     columns = list(dat.columns)

    start_time = datetime.now()
    for (col1, col2) in screened_pairs:
        
        if (col2 in metabs) == False:
            print('{} not in TwinsUK metabs'.format(col2))
            skipped += 1
            continue
            

        # Default is gaussian
        family_type = family.Gaussian()
        family_type.link = links.identity()
        family_name = 'Gaussian'
        family_link = 'Identity'

        # Covariance structure
        cov = sm.cov_struct.Exchangeable()

        sub = dat[['PublicID', col1, col2, 'Age', 'Sex', 'APOE_Status', 'Model_Health', 'Statin_User', 'BMI']].copy() #, 'MetBatch']].copy()
        sub.dropna(subset = [col1,col2], inplace=True)
        sub.drop_duplicates(subset = ['PublicID'], keep='first', inplace=True)
        sub.rename(columns={col1:'analyte1'}, inplace=True)
        sub.rename(columns={col2:'analyte2'}, inplace=True)

        if (sub['analyte1'].skew() > 1.5) | (sub['analyte1'].skew() < -1.5):

            #logger.info('Setting gamma family for skewed analyte %s'%(col))

            # Set any zero values to 1/2 the smallest value
            sub.loc[sub['analyte1']==0, 'analyte1'] = (sub.loc[sub['analyte1']>0, 'analyte1'].min() / 2.0)

            family_type = family.Gamma()
            family_type.link = links.log()
            family_name = 'Gamma'
            family_link = 'Log'
            
            # print('Used Gamma family with Log link for analytes {} {}'.format(col1, col2))

        try:
            
            # ols_model = 'analyte1 ~ analyte2*C(APOE_Status, Treatment(reference=1)) + Age + BMI + Statin_User' # + MetBatch'
            ols_model = 'analyte1 ~ analyte2*C(APOE_Status) + Age + BMI + Statin_User'
            # ols_model = 'analyte1 ~ analyte2*C(Model_Health) + Age + BMI + Statin_User'
            # ols_model = 'analyte1 ~ analyte2*C(Model_Health) + Age + BMI + Statin_User'
            #
            
            fitted_model = smf.glm(ols_model, data=sub, family=family_type, missing='drop').fit(maxiter=2000)
            result_to_append = (col1, col2, len(fitted_model.fittedvalues), fitted_model.converged, *fitted_model.params, *fitted_model.pvalues)
            if len(result_to_append) == 18:
                # make sure that each coefficient is represented in the model, avoid an error at the end
                fitted_model_to_use = fitted_model
                results.append(result_to_append)
            else:
                print('Failed analytes {} {}: not all coefficients were represented'.format(col1, col2))
                skipped += 1

        except Exception as e:
            print('Failed analytes {} {} with error {}'.format(col1, col2, str(e)))
            skipped += 1

        count += 1
        if (max_run is not None) and (count >= max_run):
            break
        
        if (count % 1000) == 0:

            elapsed_time = datetime.now() - start_time    
            print('Finished {} in {:.3f} seconds (skipped {})'.format(count, elapsed_time.total_seconds(), skipped))
      
    elapsed_time = datetime.now() - start_time    
    print('Complete! Yay! Finished {} in {:.3f} seconds (skipped {})'.format(count, elapsed_time.total_seconds(), skipped))
    
    df = pd.DataFrame(results, columns=['col1', 'col2', 'n', 'converged', *fitted_model_to_use.params.index, *[str(x)+'_p' for x in fitted_model_to_use.pvalues.index]])
    
#     df.sort_values(['analyte2:C(APOE_Status, Treatment(reference=1))[T.E2]_p'], ascending=True, inplace=True)
#     np.seterr(all='warn')
#     (adj_pval_index, adj_pval, _, _) = statsmodels.sandbox.stats.multicomp.multipletests(df.loc[~df['analyte2:C(APOE_Status, Treatment(reference=1))[T.E2]_p'].isnull(), 'analyte2:C(APOE_Status, Treatment(reference=1))[T.E2]_p'], alpha=0.05, method='fdr_bh')
#     df.loc[~df['analyte2:C(APOE_Status, Treatment(reference=1))[T.E2]_p'].isnull(), 'pval_adj'] = adj_pval
#     df.sort_values(['pval_adj'], ascending=True, inplace=True)
    
    df.sort_values(['analyte2:C(APOE_Status)[T.E4]_p'], ascending=True, inplace=True)
    np.seterr(all='warn')
    (adj_pval_index, adj_pval, _, _) = statsmodels.sandbox.stats.multicomp.multipletests(df.loc[~df['analyte2:C(APOE_Status)[T.E4]_p'].isnull(), 'analyte2:C(APOE_Status)[T.E4]_p'], alpha=0.05, method='fdr_bh')
    df.loc[~df['analyte2:C(APOE_Status)[T.E4]_p'].isnull(), 'pval_adj'] = adj_pval
    df.sort_values(['pval_adj'], ascending=True, inplace=True)
    
#     df.sort_values(['analyte2:C(Model_Health)[T.Bio_Old]_p'], ascending=True, inplace=True)
#     np.seterr(all='warn')
#     (adj_pval_index, adj_pval, _, _) = statsmodels.sandbox.stats.multicomp.multipletests(df.loc[~df['analyte2:C(Model_Health)[T.Bio_Old]_p'].isnull(), 'analyte2:C(Model_Health)[T.Bio_Old]_p'], alpha=0.05, method='fdr_bh')
#     df.loc[~df['analyte2:C(Model_Health)[T.Bio_Old]_p'].isnull(), 'pval_adj'] = adj_pval
#     df.sort_values(['pval_adj'], ascending=True, inplace=True)

    # df.sort_values(['analyte2:C(Model_Health)[T.Bio_Young]_p'], ascending=True, inplace=True)
    # np.seterr(all='warn')
    # (adj_pval_index, adj_pval, _, _) = statsmodels.sandbox.stats.multicomp.multipletests(df.loc[~df['analyte2:C(Model_Health)[T.Bio_Young]_p'].isnull(), 'analyte2:C(Model_Health)[T.Bio_Young]_p'], alpha=0.05, method='fdr_bh')
    # df.loc[~df['analyte2:C(Model_Health)[T.Bio_Young]_p'].isnull(), 'pval_adj'] = adj_pval
    # df.sort_values(['pval_adj'], ascending=True, inplace=True)
    
    return df

### E4 female

In [None]:
import sys
sys.stdout = open("240806_log_FE4.txt", "a")

In [None]:
temp_valid_list = list(set(valid_test+FE4_valid_list))

In [None]:
interact_glm_FE4_compare = run_interaction_analysis(temp_valid_list, df_FE4, analytes, chem_list, metab_list)

In [None]:
# interact_glm_FE4_compare.to_csv('/notebooks/Final_Paper/240423_Investigate_Redo_Interactions/Validation_Folder/output/240806_TwinsUK_FE4_5SD_valid_interact.csv')

### E4 male

In [None]:
import sys
sys.stdout = open("240806_log_ME4.txt", "a")

In [None]:
temp_valid_list = list(set(valid_test+ME4_valid_list))

In [None]:
interact_glm_ME4_compare = run_interaction_analysis(temp_valid_list, df_ME4, analytes, chem_list, metab_list)

In [None]:
# interact_glm_ME4_compare.to_csv('/notebooks/Final_Paper/240423_Investigate_Redo_Interactions/Validation_Folder/output/240806_TwinsUK_ME4_5SD_valid_interact.csv')

### Bio Young

In [None]:
# this is edited slightly for each group (E2, E4, bio young, bio old)
def run_interaction_analysis(screened_pairs, dat, analytes, chems, metabs, max_run=None):

    print('Running {} pairs'.format(len(screened_pairs)))
         
    count = 0 
    done = 0
    skipped = 0
    completed = 0

    results = []
#     columns = list(dat.columns)

    start_time = datetime.now()
    for (col1, col2) in screened_pairs:
        
        if (col2 in metabs) == False:
            print('{} not in TwinsUK metabs'.format(col2))
            skipped += 1
            continue
            

        # Default is gaussian
        family_type = family.Gaussian()
        family_type.link = links.identity()
        family_name = 'Gaussian'
        family_link = 'Identity'

        # Covariance structure
        cov = sm.cov_struct.Exchangeable()

        sub = dat[['PublicID', col1, col2, 'Age', 'Sex', 'APOE_Status', 'Model_Health', 'Statin_User', 'BMI']].copy() #, 'MetBatch']].copy()
        sub.dropna(subset = [col1,col2], inplace=True)
        sub.drop_duplicates(subset = ['PublicID'], keep='first', inplace=True)
        sub.rename(columns={col1:'analyte1'}, inplace=True)
        sub.rename(columns={col2:'analyte2'}, inplace=True)

        if (sub['analyte1'].skew() > 1.5) | (sub['analyte1'].skew() < -1.5):

            #logger.info('Setting gamma family for skewed analyte %s'%(col))

            # Set any zero values to 1/2 the smallest value
            sub.loc[sub['analyte1']==0, 'analyte1'] = (sub.loc[sub['analyte1']>0, 'analyte1'].min() / 2.0)

            family_type = family.Gamma()
            family_type.link = links.log()
            family_name = 'Gamma'
            family_link = 'Log'
            
            # print('Used Gamma family with Log link for analytes {} {}'.format(col1, col2))

        try:
            
            # ols_model = 'analyte1 ~ analyte2*C(APOE_Status, Treatment(reference=1)) + Age + BMI + Statin_User' # + MetBatch'
            # ols_model = 'analyte1 ~ analyte2*C(APOE_Status) + Age + BMI + Statin_User'
            ols_model = 'analyte1 ~ analyte2*C(Model_Health) + Age + BMI + Statin_User'
            # ols_model = 'analyte1 ~ analyte2*C(Model_Health) + Age + BMI + Statin_User'
            #
            
            fitted_model = smf.glm(ols_model, data=sub, family=family_type, missing='drop').fit(maxiter=2000)
            result_to_append = (col1, col2, len(fitted_model.fittedvalues), fitted_model.converged, *fitted_model.params, *fitted_model.pvalues)
            if len(result_to_append) == 18:
                # make sure that each coefficient is represented in the model, avoid an error at the end
                fitted_model_to_use = fitted_model
                results.append(result_to_append)
            else:
                print('Failed analytes {} {}: not all coefficients were represented'.format(col1, col2))
                skipped += 1

        except Exception as e:
            print('Failed analytes {} {} with error {}'.format(col1, col2, str(e)))
            skipped += 1

        count += 1
        if (max_run is not None) and (count >= max_run):
            break
        
        if (count % 1000) == 0:

            elapsed_time = datetime.now() - start_time    
            print('Finished {} in {:.3f} seconds (skipped {})'.format(count, elapsed_time.total_seconds(), skipped))
      
    elapsed_time = datetime.now() - start_time    
    print('Complete! Yay! Finished {} in {:.3f} seconds (skipped {})'.format(count, elapsed_time.total_seconds(), skipped))
    
    df = pd.DataFrame(results, columns=['col1', 'col2', 'n', 'converged', *fitted_model_to_use.params.index, *[str(x)+'_p' for x in fitted_model_to_use.pvalues.index]])
    
#     df.sort_values(['analyte2:C(APOE_Status, Treatment(reference=1))[T.E2]_p'], ascending=True, inplace=True)
#     np.seterr(all='warn')
#     (adj_pval_index, adj_pval, _, _) = statsmodels.sandbox.stats.multicomp.multipletests(df.loc[~df['analyte2:C(APOE_Status, Treatment(reference=1))[T.E2]_p'].isnull(), 'analyte2:C(APOE_Status, Treatment(reference=1))[T.E2]_p'], alpha=0.05, method='fdr_bh')
#     df.loc[~df['analyte2:C(APOE_Status, Treatment(reference=1))[T.E2]_p'].isnull(), 'pval_adj'] = adj_pval
#     df.sort_values(['pval_adj'], ascending=True, inplace=True)
    
    # df.sort_values(['analyte2:C(APOE_Status)[T.E4]_p'], ascending=True, inplace=True)
    # np.seterr(all='warn')
    # (adj_pval_index, adj_pval, _, _) = statsmodels.sandbox.stats.multicomp.multipletests(df.loc[~df['analyte2:C(APOE_Status)[T.E4]_p'].isnull(), 'analyte2:C(APOE_Status)[T.E4]_p'], alpha=0.05, method='fdr_bh')
    # df.loc[~df['analyte2:C(APOE_Status)[T.E4]_p'].isnull(), 'pval_adj'] = adj_pval
    # df.sort_values(['pval_adj'], ascending=True, inplace=True)
    
    # df.sort_values(['analyte2:C(Model_Health)[T.Bio_Old]_p'], ascending=True, inplace=True)
    # np.seterr(all='warn')
    # (adj_pval_index, adj_pval, _, _) = statsmodels.sandbox.stats.multicomp.multipletests(df.loc[~df['analyte2:C(Model_Health)[T.Bio_Old]_p'].isnull(), 'analyte2:C(Model_Health)[T.Bio_Old]_p'], alpha=0.05, method='fdr_bh')
    # df.loc[~df['analyte2:C(Model_Health)[T.Bio_Old]_p'].isnull(), 'pval_adj'] = adj_pval
    # df.sort_values(['pval_adj'], ascending=True, inplace=True)

    df.sort_values(['analyte2:C(Model_Health)[T.Bio_Young]_p'], ascending=True, inplace=True)
    np.seterr(all='warn')
    (adj_pval_index, adj_pval, _, _) = statsmodels.sandbox.stats.multicomp.multipletests(df.loc[~df['analyte2:C(Model_Health)[T.Bio_Young]_p'].isnull(), 'analyte2:C(Model_Health)[T.Bio_Young]_p'], alpha=0.05, method='fdr_bh')
    df.loc[~df['analyte2:C(Model_Health)[T.Bio_Young]_p'].isnull(), 'pval_adj'] = adj_pval
    df.sort_values(['pval_adj'], ascending=True, inplace=True)
    
    return df

### bio young female

In [None]:
import sys
sys.stdout = open("240806_log_Fhealth.txt", "a")

In [None]:
temp_valid_list = list(set(valid_test+Fhealth_valid_list))

In [None]:
interact_glm_Fhealth_compare = run_interaction_analysis(temp_valid_list, df_Fhealth, analytes, chem_list, metab_list)

In [None]:
# interact_glm_Fhealth_compare.to_csv('/notebooks/Final_Paper/240423_Investigate_Redo_Interactions/Validation_Folder/output/240806_TwinsUK_Fhealth_5SD_valid_interact.csv')

### bio young male

In [None]:
import sys
sys.stdout = open("240806_log_Mhealth.txt", "a")

In [None]:
temp_valid_list = list(set(valid_test+Mhealth_valid_list))

In [None]:
interact_glm_Mhealth_compare = run_interaction_analysis(temp_valid_list, df_Mhealth, analytes, chem_list, metab_list)

In [None]:
# interact_glm_Mhealth_compare.to_csv('/notebooks/Final_Paper/240423_Investigate_Redo_Interactions/Validation_Folder/output/240806_TwinsUK_Mhealth_5SD_valid_interact.csv')

### bio old

In [None]:
# this is edited slightly for each group (E2, E4, bio young, bio old)
def run_interaction_analysis(screened_pairs, dat, analytes, chems, metabs, max_run=None):

    print('Running {} pairs'.format(len(screened_pairs)))
         
    count = 0 
    done = 0
    skipped = 0
    completed = 0

    results = []
#     columns = list(dat.columns)

    start_time = datetime.now()
    for (col1, col2) in screened_pairs:
        
        if (col2 in metabs) == False:
            print('{} not in TwinsUK metabs'.format(col2))
            skipped += 1
            continue
            

        # Default is gaussian
        family_type = family.Gaussian()
        family_type.link = links.identity()
        family_name = 'Gaussian'
        family_link = 'Identity'

        # Covariance structure
        cov = sm.cov_struct.Exchangeable()

        sub = dat[['PublicID', col1, col2, 'Age', 'Sex', 'APOE_Status', 'Model_Health', 'Statin_User', 'BMI']].copy() #, 'MetBatch']].copy()
        sub.dropna(subset = [col1,col2], inplace=True)
        sub.drop_duplicates(subset = ['PublicID'], keep='first', inplace=True)
        sub.rename(columns={col1:'analyte1'}, inplace=True)
        sub.rename(columns={col2:'analyte2'}, inplace=True)

        if (sub['analyte1'].skew() > 1.5) | (sub['analyte1'].skew() < -1.5):

            #logger.info('Setting gamma family for skewed analyte %s'%(col))

            # Set any zero values to 1/2 the smallest value
            sub.loc[sub['analyte1']==0, 'analyte1'] = (sub.loc[sub['analyte1']>0, 'analyte1'].min() / 2.0)

            family_type = family.Gamma()
            family_type.link = links.log()
            family_name = 'Gamma'
            family_link = 'Log'
            
            # print('Used Gamma family with Log link for analytes {} {}'.format(col1, col2))

        try:
            
            # ols_model = 'analyte1 ~ analyte2*C(APOE_Status, Treatment(reference=1)) + Age + BMI + Statin_User' # + MetBatch'
            # ols_model = 'analyte1 ~ analyte2*C(APOE_Status) + Age + BMI + Statin_User'
            # ols_model = 'analyte1 ~ analyte2*C(Model_Health) + Age + BMI + Statin_User'
            ols_model = 'analyte1 ~ analyte2*C(Model_Health) + Age + BMI + Statin_User'
            #
            
            fitted_model = smf.glm(ols_model, data=sub, family=family_type, missing='drop').fit(maxiter=2000)
            result_to_append = (col1, col2, len(fitted_model.fittedvalues), fitted_model.converged, *fitted_model.params, *fitted_model.pvalues)
            if len(result_to_append) == 18:
                # make sure that each coefficient is represented in the model, avoid an error at the end
                fitted_model_to_use = fitted_model
                results.append(result_to_append)
            else:
                print('Failed analytes {} {}: not all coefficients were represented'.format(col1, col2))
                skipped += 1

        except Exception as e:
            print('Failed analytes {} {} with error {}'.format(col1, col2, str(e)))
            skipped += 1

        count += 1
        if (max_run is not None) and (count >= max_run):
            break
        
        if (count % 1000) == 0:

            elapsed_time = datetime.now() - start_time    
            print('Finished {} in {:.3f} seconds (skipped {})'.format(count, elapsed_time.total_seconds(), skipped))
      
    elapsed_time = datetime.now() - start_time    
    print('Complete! Yay! Finished {} in {:.3f} seconds (skipped {})'.format(count, elapsed_time.total_seconds(), skipped))
    
    df = pd.DataFrame(results, columns=['col1', 'col2', 'n', 'converged', *fitted_model_to_use.params.index, *[str(x)+'_p' for x in fitted_model_to_use.pvalues.index]])
    
#     df.sort_values(['analyte2:C(APOE_Status, Treatment(reference=1))[T.E2]_p'], ascending=True, inplace=True)
#     np.seterr(all='warn')
#     (adj_pval_index, adj_pval, _, _) = statsmodels.sandbox.stats.multicomp.multipletests(df.loc[~df['analyte2:C(APOE_Status, Treatment(reference=1))[T.E2]_p'].isnull(), 'analyte2:C(APOE_Status, Treatment(reference=1))[T.E2]_p'], alpha=0.05, method='fdr_bh')
#     df.loc[~df['analyte2:C(APOE_Status, Treatment(reference=1))[T.E2]_p'].isnull(), 'pval_adj'] = adj_pval
#     df.sort_values(['pval_adj'], ascending=True, inplace=True)
    
    # df.sort_values(['analyte2:C(APOE_Status)[T.E4]_p'], ascending=True, inplace=True)
    # np.seterr(all='warn')
    # (adj_pval_index, adj_pval, _, _) = statsmodels.sandbox.stats.multicomp.multipletests(df.loc[~df['analyte2:C(APOE_Status)[T.E4]_p'].isnull(), 'analyte2:C(APOE_Status)[T.E4]_p'], alpha=0.05, method='fdr_bh')
    # df.loc[~df['analyte2:C(APOE_Status)[T.E4]_p'].isnull(), 'pval_adj'] = adj_pval
    # df.sort_values(['pval_adj'], ascending=True, inplace=True)
    
    df.sort_values(['analyte2:C(Model_Health)[T.Bio_Old]_p'], ascending=True, inplace=True)
    np.seterr(all='warn')
    (adj_pval_index, adj_pval, _, _) = statsmodels.sandbox.stats.multicomp.multipletests(df.loc[~df['analyte2:C(Model_Health)[T.Bio_Old]_p'].isnull(), 'analyte2:C(Model_Health)[T.Bio_Old]_p'], alpha=0.05, method='fdr_bh')
    df.loc[~df['analyte2:C(Model_Health)[T.Bio_Old]_p'].isnull(), 'pval_adj'] = adj_pval
    df.sort_values(['pval_adj'], ascending=True, inplace=True)

    # df.sort_values(['analyte2:C(Model_Health)[T.Bio_Young]_p'], ascending=True, inplace=True)
    # np.seterr(all='warn')
    # (adj_pval_index, adj_pval, _, _) = statsmodels.sandbox.stats.multicomp.multipletests(df.loc[~df['analyte2:C(Model_Health)[T.Bio_Young]_p'].isnull(), 'analyte2:C(Model_Health)[T.Bio_Young]_p'], alpha=0.05, method='fdr_bh')
    # df.loc[~df['analyte2:C(Model_Health)[T.Bio_Young]_p'].isnull(), 'pval_adj'] = adj_pval
    # df.sort_values(['pval_adj'], ascending=True, inplace=True)
    
    return df

### bio old female

In [None]:
import sys
sys.stdout = open("240806_log_Funhealth.txt", "a")

In [None]:
temp_valid_list = list(set(valid_test+Funhealth_valid_list))

In [None]:
interact_glm_Funhealth_compare = run_interaction_analysis(temp_valid_list, df_Funhealth, analytes, chem_list, metab_list)

In [None]:
# interact_glm_Funhealth_compare.to_csv('/notebooks/Final_Paper/240423_Investigate_Redo_Interactions/Validation_Folder/output/240806_TwinsUK_Funhealth_5SD_valid_interact.csv')

### bio old male

In [None]:
import sys
sys.stdout = open("240806_log_Munhealth.txt", "a")

In [None]:
temp_valid_list = list(set(valid_test+Munhealth_valid_list))

In [None]:
interact_glm_Munhealth_compare = run_interaction_analysis(temp_valid_list, df_Munhealth, analytes, chem_list, metab_list)

In [None]:
# interact_glm_Munhealth_compare.to_csv('/notebooks/Final_Paper/240423_Investigate_Redo_Interactions/Validation_Folder/output/240806_TwinsUK_Munhealth_5SD_valid_interact.csv')

### e2 allele dosage

In [None]:
def run_interaction_analysis_continuous(screened_pairs, dat, analytes, chems, metabs, max_run=None):

    print('Running {} pairs'.format(len(screened_pairs)))
         
    count = 0 
    done = 0
    skipped = 0
    completed = 0

    results = []
#     columns = list(dat.columns)

    start_time = datetime.now()
    for (col1, col2) in screened_pairs:
        
        if (col2 in metabs) == False:
            print('{} not in TwinsUK metabs'.format(col2))
            skipped += 1
            continue
            

        # Default is gaussian
        family_type = family.Gaussian()
        family_type.link = links.identity()
        family_name = 'Gaussian'
        family_link = 'Identity'

        # Covariance structure
        cov = sm.cov_struct.Exchangeable()

        sub = dat[['PublicID', col1, col2, 'Age', 'Sex', 'e2_allele', 'e4_allele', 'metab_deltaAge', 'Statin_User', 'BMI']].copy() #, 'MetBatch']].copy()
        sub.dropna(subset = [col1,col2], inplace=True)
        sub.drop_duplicates(subset = ['PublicID'], keep='first', inplace=True)
        sub.rename(columns={col1:'analyte1'}, inplace=True)
        sub.rename(columns={col2:'analyte2'}, inplace=True)

        if (sub['analyte1'].skew() > 1.5) | (sub['analyte1'].skew() < -1.5):

            #logger.info('Setting gamma family for skewed analyte %s'%(col))

            # Set any zero values to 1/2 the smallest value
            sub.loc[sub['analyte1']==0, 'analyte1'] = (sub.loc[sub['analyte1']>0, 'analyte1'].min() / 2.0)

            family_type = family.Gamma()
            family_type.link = links.log()
            family_name = 'Gamma'
            family_link = 'Log'
            
            # print('Used Gamma family with Log link for analytes {} {}'.format(col1, col2))

        try:
            
            ols_model = 'analyte1 ~ analyte2*e2_allele + Age + BMI + Statin_User'
            # ols_model = 'analyte1 ~ analyte2*e4_allele + Age + BMI + Statin_User'
            # ols_model = 'analyte1 ~ analyte2*metab_deltaAge + Age + BMI + Statin_User'
            
            fitted_model = smf.glm(ols_model, data=sub, family=family_type, missing='drop').fit(maxiter=2000)
            result_to_append = (col1, col2, len(fitted_model.fittedvalues), fitted_model.converged, *fitted_model.params, *fitted_model.pvalues)
            if len(result_to_append) == 18:
                # make sure that each coefficient is represented in the model, avoid an error at the end
                fitted_model_to_use = fitted_model
                results.append(result_to_append)
            else:
                print('Failed analytes {} {}: not all coefficients were represented'.format(col1, col2))
                skipped += 1

        except Exception as e:
            print('Failed analytes {} {} with error {}'.format(col1, col2, str(e)))
            skipped += 1

        count += 1
        if (max_run is not None) and (count >= max_run):
            break
        
        if (count % 1000) == 0:

            elapsed_time = datetime.now() - start_time    
            print('Finished {} in {:.3f} seconds (skipped {})'.format(count, elapsed_time.total_seconds(), skipped))
      
    elapsed_time = datetime.now() - start_time    
    print('Complete! Yay! Finished {} in {:.3f} seconds (skipped {})'.format(count, elapsed_time.total_seconds(), skipped))
    
    df = pd.DataFrame(results, columns=['col1', 'col2', 'n', 'converged', *fitted_model_to_use.params.index, *[str(x)+'_p' for x in fitted_model_to_use.pvalues.index]])
    
    np.seterr(all='warn')
    (adj_pval_index, adj_pval, _, _) = statsmodels.sandbox.stats.multicomp.multipletests(df.loc[~df['analyte2:e2_allele_p'].isnull(), 'analyte2:e2_allele_p'], alpha=0.05, method='fdr_bh')
    df.loc[~df['analyte2:e2_allele_p'].isnull(), 'E2_pFDR'] = adj_pval # oops, typo
    
#     np.seterr(all='warn')
#     (adj_pval_index, adj_pval, _, _) = statsmodels.sandbox.stats.multicomp.multipletests(df.loc[~df['analyte2:e4_allele_p'].isnull(), 'analyte2:e4_allele_p'], alpha=0.05, method='fdr_bh')
#     df.loc[~df['analyte2:e4_allele_p'].isnull(), 'E4_pFDR'] = adj_pval
    
#     np.seterr(all='warn')
#     (adj_pval_index, adj_pval, _, _) = statsmodels.sandbox.stats.multicomp.multipletests(df.loc[~df['analyte2:metab_deltaAge_p'].isnull(), 'analyte2:metab_deltaAge_p'], alpha=0.05, method='fdr_bh')
#     df.loc[~df['analyte2:metab_deltaAge_p'].isnull(), 'delta_age_pFDR'] = adj_pval
    
    return df

In [None]:
import sys
sys.stdout = open("240806_log_E2_allele.txt", "a")

In [None]:
temp_valid_list = list(set(valid_test+E2_allele_valid_list))

In [None]:
interact_glm_E2_allele_compare = run_interaction_analysis_continuous(temp_valid_list, df_analysis_5SD_valid, analytes, chem_list, metab_list)

In [None]:
# interact_glm_E2_allele_compare.to_csv('/notebooks/Final_Paper/240423_Investigate_Redo_Interactions/Validation_Folder/output/240806_TwinsUK_E2_continuous_5SD_valid_interact.csv')

### e4 allele dosage

In [None]:
def run_interaction_analysis_continuous(screened_pairs, dat, analytes, chems, metabs, max_run=None):

    print('Running {} pairs'.format(len(screened_pairs)))
         
    count = 0 
    done = 0
    skipped = 0
    completed = 0

    results = []
#     columns = list(dat.columns)

    start_time = datetime.now()
    for (col1, col2) in screened_pairs:
        
        if (col2 in metabs) == False:
            print('{} not in TwinsUK metabs'.format(col2))
            skipped += 1
            continue
            

        # Default is gaussian
        family_type = family.Gaussian()
        family_type.link = links.identity()
        family_name = 'Gaussian'
        family_link = 'Identity'

        # Covariance structure
        cov = sm.cov_struct.Exchangeable()

        sub = dat[['PublicID', col1, col2, 'Age', 'Sex', 'e2_allele', 'e4_allele', 'metab_deltaAge', 'Statin_User', 'BMI']].copy() #, 'MetBatch']].copy()
        sub.dropna(subset = [col1,col2], inplace=True)
        sub.drop_duplicates(subset = ['PublicID'], keep='first', inplace=True)
        sub.rename(columns={col1:'analyte1'}, inplace=True)
        sub.rename(columns={col2:'analyte2'}, inplace=True)

        if (sub['analyte1'].skew() > 1.5) | (sub['analyte1'].skew() < -1.5):

            #logger.info('Setting gamma family for skewed analyte %s'%(col))

            # Set any zero values to 1/2 the smallest value
            sub.loc[sub['analyte1']==0, 'analyte1'] = (sub.loc[sub['analyte1']>0, 'analyte1'].min() / 2.0)

            family_type = family.Gamma()
            family_type.link = links.log()
            family_name = 'Gamma'
            family_link = 'Log'
            
            # print('Used Gamma family with Log link for analytes {} {}'.format(col1, col2))

        try:
            
            # ols_model = 'analyte1 ~ analyte2*e2_allele + Age + BMI + Statin_User'
            ols_model = 'analyte1 ~ analyte2*e4_allele + Age + BMI + Statin_User'
            # ols_model = 'analyte1 ~ analyte2*metab_deltaAge + Age + BMI + Statin_User'
            
            fitted_model = smf.glm(ols_model, data=sub, family=family_type, missing='drop').fit(maxiter=2000)
            result_to_append = (col1, col2, len(fitted_model.fittedvalues), fitted_model.converged, *fitted_model.params, *fitted_model.pvalues)
            if len(result_to_append) == 18:
                # make sure that each coefficient is represented in the model, avoid an error at the end
                fitted_model_to_use = fitted_model
                results.append(result_to_append)
            else:
                print('Failed analytes {} {}: not all coefficients were represented'.format(col1, col2))
                skipped += 1

        except Exception as e:
            print('Failed analytes {} {} with error {}'.format(col1, col2, str(e)))
            skipped += 1

        count += 1
        if (max_run is not None) and (count >= max_run):
            break
        
        if (count % 1000) == 0:

            elapsed_time = datetime.now() - start_time    
            print('Finished {} in {:.3f} seconds (skipped {})'.format(count, elapsed_time.total_seconds(), skipped))
      
    elapsed_time = datetime.now() - start_time    
    print('Complete! Yay! Finished {} in {:.3f} seconds (skipped {})'.format(count, elapsed_time.total_seconds(), skipped))
    
    df = pd.DataFrame(results, columns=['col1', 'col2', 'n', 'converged', *fitted_model_to_use.params.index, *[str(x)+'_p' for x in fitted_model_to_use.pvalues.index]])
    
    # np.seterr(all='warn')
    # (adj_pval_index, adj_pval, _, _) = statsmodels.sandbox.stats.multicomp.multipletests(df.loc[~df['analyte2:e2_allele_p'].isnull(), 'analyte2:e2_allele_p'], alpha=0.05, method='fdr_bh')
    # df.loc[~df['analyte2:e2_allele_p'].isnull(), 'E2_pFDR'] = adj_pval # oops, typo
    
    np.seterr(all='warn')
    (adj_pval_index, adj_pval, _, _) = statsmodels.sandbox.stats.multicomp.multipletests(df.loc[~df['analyte2:e4_allele_p'].isnull(), 'analyte2:e4_allele_p'], alpha=0.05, method='fdr_bh')
    df.loc[~df['analyte2:e4_allele_p'].isnull(), 'E4_pFDR'] = adj_pval
    
#     np.seterr(all='warn')
#     (adj_pval_index, adj_pval, _, _) = statsmodels.sandbox.stats.multicomp.multipletests(df.loc[~df['analyte2:metab_deltaAge_p'].isnull(), 'analyte2:metab_deltaAge_p'], alpha=0.05, method='fdr_bh')
#     df.loc[~df['analyte2:metab_deltaAge_p'].isnull(), 'delta_age_pFDR'] = adj_pval
    
    return df

In [None]:
import sys
sys.stdout = open("240806_log_E4_allele.txt", "a")

In [None]:
temp_valid_list = list(set(valid_test+E4_allele_valid_list))

In [None]:
interact_glm_E4_allele_compare = run_interaction_analysis_continuous(temp_valid_list, df_analysis_5SD_valid, analytes, chem_list, metab_list)

In [None]:
# interact_glm_E4_allele_compare.to_csv('/notebooks/Final_Paper/240423_Investigate_Redo_Interactions/Validation_Folder/output/240806_TwinsUK_E4_continuous_5SD_valid_interact.csv')

### delta age, continuous

In [None]:
def run_interaction_analysis_continuous(screened_pairs, dat, analytes, chems, metabs, max_run=None):

    print('Running {} pairs'.format(len(screened_pairs)))
         
    count = 0 
    done = 0
    skipped = 0
    completed = 0

    results = []
#     columns = list(dat.columns)

    start_time = datetime.now()
    for (col1, col2) in screened_pairs:
        
        if (col2 in metabs) == False:
            print('{} not in TwinsUK metabs'.format(col2))
            skipped += 1
            continue
            

        # Default is gaussian
        family_type = family.Gaussian()
        family_type.link = links.identity()
        family_name = 'Gaussian'
        family_link = 'Identity'

        # Covariance structure
        cov = sm.cov_struct.Exchangeable()

        sub = dat[['PublicID', col1, col2, 'Age', 'Sex', 'e2_allele', 'e4_allele', 'metab_deltaAge', 'Statin_User', 'BMI']].copy() #, 'MetBatch']].copy()
        sub.dropna(subset = [col1,col2], inplace=True)
        sub.drop_duplicates(subset = ['PublicID'], keep='first', inplace=True)
        sub.rename(columns={col1:'analyte1'}, inplace=True)
        sub.rename(columns={col2:'analyte2'}, inplace=True)

        if (sub['analyte1'].skew() > 1.5) | (sub['analyte1'].skew() < -1.5):

            #logger.info('Setting gamma family for skewed analyte %s'%(col))

            # Set any zero values to 1/2 the smallest value
            sub.loc[sub['analyte1']==0, 'analyte1'] = (sub.loc[sub['analyte1']>0, 'analyte1'].min() / 2.0)

            family_type = family.Gamma()
            family_type.link = links.log()
            family_name = 'Gamma'
            family_link = 'Log'
            
            # print('Used Gamma family with Log link for analytes {} {}'.format(col1, col2))

        try:
            
            # ols_model = 'analyte1 ~ analyte2*e2_allele + Age + BMI + Statin_User'
            # ols_model = 'analyte1 ~ analyte2*e4_allele + Age + BMI + Statin_User'
            ols_model = 'analyte1 ~ analyte2*metab_deltaAge + Age + BMI + Statin_User'
            
            fitted_model = smf.glm(ols_model, data=sub, family=family_type, missing='drop').fit(maxiter=2000)
            result_to_append = (col1, col2, len(fitted_model.fittedvalues), fitted_model.converged, *fitted_model.params, *fitted_model.pvalues)
            if len(result_to_append) == 18:
                # make sure that each coefficient is represented in the model, avoid an error at the end
                fitted_model_to_use = fitted_model
                results.append(result_to_append)
            else:
                print('Failed analytes {} {}: not all coefficients were represented'.format(col1, col2))
                skipped += 1

        except Exception as e:
            print('Failed analytes {} {} with error {}'.format(col1, col2, str(e)))
            skipped += 1

        count += 1
        if (max_run is not None) and (count >= max_run):
            break
        
        if (count % 1000) == 0:

            elapsed_time = datetime.now() - start_time    
            print('Finished {} in {:.3f} seconds (skipped {})'.format(count, elapsed_time.total_seconds(), skipped))
      
    elapsed_time = datetime.now() - start_time    
    print('Complete! Yay! Finished {} in {:.3f} seconds (skipped {})'.format(count, elapsed_time.total_seconds(), skipped))
    
    df = pd.DataFrame(results, columns=['col1', 'col2', 'n', 'converged', *fitted_model_to_use.params.index, *[str(x)+'_p' for x in fitted_model_to_use.pvalues.index]])
    
    # np.seterr(all='warn')
    # (adj_pval_index, adj_pval, _, _) = statsmodels.sandbox.stats.multicomp.multipletests(df.loc[~df['analyte2:e2_allele_p'].isnull(), 'analyte2:e2_allele_p'], alpha=0.05, method='fdr_bh')
    # df.loc[~df['analyte2:e2_allele_p'].isnull(), 'E2_pFDR'] = adj_pval # oops, typo
    
#     np.seterr(all='warn')
#     (adj_pval_index, adj_pval, _, _) = statsmodels.sandbox.stats.multicomp.multipletests(df.loc[~df['analyte2:e4_allele_p'].isnull(), 'analyte2:e4_allele_p'], alpha=0.05, method='fdr_bh')
#     df.loc[~df['analyte2:e4_allele_p'].isnull(), 'E4_pFDR'] = adj_pval
    
    np.seterr(all='warn')
    (adj_pval_index, adj_pval, _, _) = statsmodels.sandbox.stats.multicomp.multipletests(df.loc[~df['analyte2:metab_deltaAge_p'].isnull(), 'analyte2:metab_deltaAge_p'], alpha=0.05, method='fdr_bh')
    df.loc[~df['analyte2:metab_deltaAge_p'].isnull(), 'delta_age_pFDR'] = adj_pval
    
    return df

In [None]:
import sys
sys.stdout = open("240806_log_contin_delta_age_allele.txt", "a")

In [None]:
temp_valid_list = list(set(valid_test+delta_age_continuous_valid_list))

In [None]:
interact_glm_contin_delta_age_compare = run_interaction_analysis_continuous(temp_valid_list, df_analysis_5SD_valid, analytes, chem_list, metab_list)

In [None]:
# interact_glm_contin_delta_age_compare.to_csv('/notebooks/Final_Paper/240423_Investigate_Redo_Interactions/Validation_Folder/output/240806_TwinsUK_delta_age_continuous_5SD_valid_interact.csv')