# Imports

In [3]:
import pandas as pd
import numpy as np
import scipy
import re
import warnings
warnings.filterwarnings("ignore")
import pingouin as pg
from statsmodels.formula.api import mixedlm


def t_distri(n,sigma, percentile_point=0.975):
    return scipy.stats.t.ppf(percentile_point, n-1) * np.sqrt(n+1)/n * sigma

# Dictionaries

In [4]:
dict_variables={'EDV_(ml)_rest':'EDV at rest (ml)','EDV_(ml)_exercise':'EDV during exercise (ml)','EDV/BSA_(ml/m2)_rest':'EDV/BSA at rest (ml/m²)','EDV/BSA_(ml/m2)_exercise':'EDV/BSA during exercise (ml/m²)','ESV/BSA_(ml/m2)_rest':'ESV/BSA at rest (ml/m²)','ESV/BSA_(ml/m2)_exercise':'ESV/BSA during exercise (ml/m²)','ESV_(ml)_rest':'ESV at rest (ml)','ESV_(ml)_exercise':'ESV during exercise (ml)', 'SV_(ml)_rest':'SV at rest (ml)','SV_(ml)_exercise':'SV during exercise (ml)','SV/BSA_(ml/m2)_rest':'SV/BSA at rest (ml/m²)','SV/BSA_(ml/m2)_exercise':'SV/BSA during exercise (ml/m²)','EF_(%)_rest':'EF at rest (%)','EF_(%)_exercise':'EF during exercise (%)',
         'CO_(l/min)_rest': 'CO at rest (l/min)','CO_(l/min)_exercise':'CO during exercise (l/min)','CI_(l/min/m2)_rest':'CI at rest (l/min/m²)','CI_(l/min/m2)_exercise':'CI during exercise (l/min/m²)'}

variables=['EDV_(ml)_rest','EDV_(ml)_exercise','EDV/BSA_(ml/m2)_rest','EDV/BSA_(ml/m2)_exercise','ESV_(ml)_rest','ESV_(ml)_exercise','ESV/BSA_(ml/m2)_rest','ESV/BSA_(ml/m2)_exercise',
         'SV_(ml)_rest','SV_(ml)_exercise','SV/BSA_(ml/m2)_rest','SV/BSA_(ml/m2)_exercise','EF_(%)_rest','EF_(%)_exercise','BASELINE HR','HR reached','decade']

variables_different_rounding=['CO_(l/min)_rest','CO_(l/min)_exercise','CI_(l/min/m2)_rest','CI_(l/min/m2)_exercise','decade']

variables_ex=['EDV_(ml)_exercise','EDV/BSA_(ml/m2)_exercise','ESV_(ml)_exercise','ESV/BSA_(ml/m2)_exercise','SV_(ml)_exercise','SV/BSA_(ml/m2)_exercise','EF_(%)_exercise','HR reached','decade']
variables_ex_different_rounding=['CO_(l/min)_exercise','CI_(l/min/m2)_exercise','decade']

t_tests_dict = {
    'HR reached': 'HR (bpm)',
    'EDV': 'EDV (ml)',
    'EDV/BSA': 'EDV/BSA (ml/m²)',
    'ESV': 'ESV (ml)',
    'ESV/BSA': 'ESV/BSA (ml/m²)',
    'SV': 'SV (ml)',
    'SV/BSA': 'SV/BSA (ml/m²)',
    'EF': 'EF (%)',
    'CO': 'CO (l/min)',
    'CI': 'CI (l/min/m²)'
}
hr_dict = {
    'HR reached':'HR during exercise (bpm)',
    'BASELINE HR':'HR at rest (bpm)'
}

# Data Clearning and Pre-Processing

In [5]:
df_r=pd.read_excel('cleaned_folder/GenScanII_data.xlsm').iloc[:,:-1]
age=pd.DataFrame(((df_r['1.5T Scan date']-df_r['DoB'])/np.timedelta64(1, 'Y')).apply(np.floor)).dropna()
df_r['age']=age
df_r.set_index('BRU', inplace=True)
df_r=df_r.merge(pd.read_csv('cleaned_folder/LV_function/GenScanII_sax_cine_RT_rest.csv', delimiter=';').set_index('BRU'), left_index=True, right_index=True)
df_dcm=pd.read_excel('cleaned_folder/GenScanII_data.xlsm','DCM_positive')
df_dcm=df_dcm[((df_dcm['DCM_group']=='3A') |(df_dcm['DCM_group']=='3B') |(df_dcm['DCM_group']=='4')) & (df_dcm['LMNA_3A']!=1)].copy()
df_dcm.set_index("BRU",inplace=True)
df_dcm['pathogenic_gene']=1
df_r=df_r.merge(df_dcm['pathogenic_gene'], left_index=True, right_index=True,how='outer')
df_r.loc[df_r['pathogenic_gene'] != 1, 'pathogenic_gene'] = 0
df_r['BSA']=0.007184* ((df_r['Height']*100)**0.725) * (df_r['Weight']**0.425)
df_rest=df_r.copy()

df_r=pd.read_excel('cleaned_folder/GenScanII_data.xlsm').iloc[:,:-1]
age=pd.DataFrame(((df_r['1.5T Scan date']-df_r['DoB'])/np.timedelta64(1, 'Y')).apply(np.floor)).dropna()
df_r['age']=age
df_r.set_index('BRU', inplace=True)
df_r=df_r.merge(pd.read_csv('cleaned_folder/LV_function/GenScanII_sax_cine_RT_exercise.csv').set_index('BRU'), left_index=True, right_index=True)

df_dcm=pd.read_excel('cleaned_folder/GenScanII_data.xlsm','DCM_positive')
df_dcm=df_dcm[((df_dcm['DCM_group']=='3A') |(df_dcm['DCM_group']=='3B') |(df_dcm['DCM_group']=='4')) & (df_dcm['LMNA_3A']!=1)].copy()
df_dcm.set_index("BRU",inplace=True)
df_dcm['pathogenic_gene']=1
df_r=df_r.merge(df_dcm['pathogenic_gene'], left_index=True, right_index=True,how='outer')
df_r.loc[df_r['pathogenic_gene'] != 1, 'pathogenic_gene'] = 0
df_r['BSA']=0.007184* ((df_r['Height']*100)**0.725) * (df_r['Weight']**0.425)

df_combined=df_rest[['Sex','Race','DoB','Weight','Height','pathogenic_gene','BSA','age']].merge(df_rest.iloc[:,10:-2],left_index=True, right_index=True, suffixes=('','_rest'))
df_combined=df_combined.merge(df_r.iloc[:,10:-2],left_index=True, right_index=True, suffixes=('_rest','_exercise'))

df_combined=df_combined.merge(pd.read_excel('cleaned_folder/exercise_data.xlsx').set_index('BRU')[['HR reached','BASELINE HR','REACHED? ']], left_index=True, right_index=True)

df_combined['decade']=df_combined['age'].astype('str').str[0]
df_combined.dropna(subset=['age'],inplace=True)
df_combined=df_combined[df_combined['pathogenic_gene']==0]
df_combined['decade']=df_combined['decade'].astype('int').apply(lambda x: x if x%2==0 else x-1)

reference_table_f_dropped_na=df_combined[df_combined['Sex']=='F'][variables].copy()
reference_table_f_dropped_na = reference_table_f_dropped_na.rename(columns=dict_variables)
reference_table_f_dropped_na.dropna(inplace=True)


reference_table_f_dropped_na_rounding=df_combined[df_combined['Sex']=='F'][variables_different_rounding].copy()
reference_table_f_dropped_na_rounding= reference_table_f_dropped_na_rounding.rename(columns=dict_variables)
reference_table_f_dropped_na_rounding.dropna(inplace=True)

reference_table_m_dropped_na=df_combined[df_combined['Sex']=='M'][variables].copy()
reference_table_m_dropped_na = reference_table_m_dropped_na.rename(columns=dict_variables)
reference_table_m_dropped_na.dropna(inplace=True)

reference_table_m_dropped_na_rounding=df_combined[df_combined['Sex']=='M'][variables_different_rounding].copy()
reference_table_m_dropped_na_rounding= reference_table_m_dropped_na_rounding.rename(columns=dict_variables)
reference_table_m_dropped_na_rounding.dropna(inplace=True)


df_combined_dropped_na=df_combined[variables].copy()
df_combined_dropped_na=df_combined_dropped_na.rename(columns=dict_variables)
df_combined_dropped_na.dropna(inplace=True)

df_combined_dropped_na_rounding=df_combined[variables_different_rounding].copy()
df_combined_dropped_na_rounding=df_combined_dropped_na_rounding.rename(columns=dict_variables)
df_combined_dropped_na_rounding.dropna(inplace=True)

FileNotFoundError: [Errno 2] No such file or directory: 'cleaned_folder/GenScanII_data.xlsm'

# Baseline Table

In [None]:
df_baseline=df_combined.copy()
age_baseline=pd.DataFrame(((df_r['1.5T Scan date']-df_r['DoB'])/np.timedelta64(1, 'Y')))
df_baseline['age']=age_baseline
df_baseline=df_baseline[df_baseline['ESV_(ml)_exercise'].isna()==False]

#number of patients
bl_table_participants=pd.DataFrame(df_baseline.groupby('decade').count()['Sex'])
bl_table_participants.rename(columns={'Sex':'Number of Participants'},inplace=True)

#age
bl_table_age=pd.DataFrame(df_baseline.groupby('decade').mean()['age'].astype(int).astype(str) +' ± '  +df_baseline.groupby('decade').std()['age'].astype(int).astype(str))
bl_table_age=bl_table_age.rename(columns={'age':'Age (years)'})


#gender
bl_table_gender=(df_baseline.groupby('decade')['Sex'].value_counts(normalize=True)*100).loc[[(2,'M'),(4,'M'),(6,'M')]]
bl_table_gender=bl_table_gender.droplevel(1)
bl_table_gender=pd.DataFrame(bl_table_gender.round(decimals=1).astype(str))
bl_table_gender=bl_table_gender.rename(columns={'Sex':'Male (%)'})

#race
bl_table_race=(df_baseline.groupby('decade')['Race'].value_counts(normalize=True)*100).loc[[(2,'NFE'),(4,'NFE'),(6,'NFE')]]
bl_table_race=bl_table_race.droplevel(1)
bl_table_race=pd.DataFrame(bl_table_race.round(decimals=1).astype(str))
bl_table_race=bl_table_race.rename(columns={'Race':'Caucasian (%)'})




#Weight
bl_table_weight=pd.DataFrame(df_baseline.groupby('decade').mean()['Weight'].astype(int).astype(str) +' ± '  +df_baseline.groupby('decade').std()['Weight'].astype(int).astype(str))
bl_table_weight=bl_table_weight.rename(columns={'Weight':'Weight (kg)'})

#Height
bl_table_height=pd.DataFrame((df_baseline.groupby('decade').mean()['Height']*100).astype(int).astype(str) +' ± '  +(df_baseline.groupby('decade').std()['Height']*100).astype(int).astype(str))
bl_table_height=bl_table_height.rename(columns={'Height':'Height (cm)'})


#BSA
bl_table_BSA=pd.DataFrame((df_baseline.groupby('decade').mean()['BSA']).round(2).astype(str) +' ± '  +(df_baseline.groupby('decade').std()['BSA']).round(2).astype(str))
bl_table_BSA=bl_table_BSA.rename(columns={'BSA':'BSA (m²)'})


#BMI
df_baseline['BMI']=df_baseline['Weight']/df_baseline['Height']**2
bl_table_BMI=pd.DataFrame(df_baseline.groupby('decade').mean()['BMI'].round(2).astype(str) +' ± '  + df_baseline.groupby('decade').std()['BMI'].round(2).astype(str))
bl_table_BMI=bl_table_BMI.rename(columns={'BMI':'BMI (kg/m²)'})


#sBP
bl_vitals=pd.merge(df_baseline[['decade']],(pd.read_excel('cleaned_folder/exercise_data.xlsx')[['BRU','BASELINE BP','BASELINE HR','HR reached']]),left_on='BRU', right_on='BRU')
bl_vitals[['sBP','dBP']]=bl_vitals['BASELINE BP'].str.split('/', expand=True).dropna().astype(int)
bl_vitals.drop(['BASELINE BP'], axis=1, inplace=True)

bl_table_sBP=pd.DataFrame(bl_vitals.groupby('decade').mean()['sBP'].astype(int).astype(str) +' ± '  + bl_vitals.groupby('decade').std()['sBP'].astype(int).astype(str))
bl_table_sBP=bl_table_sBP.rename(columns={'sBP':'Systolic Blood Pressure (mmHg)'})


#dBP
bl_table_dBP=pd.DataFrame(bl_vitals.groupby('decade').mean()['dBP'].astype(int).astype(str) +' ± '  + bl_vitals.groupby('decade').std()['dBP'].astype(int).astype(str))
bl_table_dBP=bl_table_dBP.rename(columns={'dBP':'Diastolic Blood Pressure (mmHg)'})


#HR Baseline
bl_table_hr_bl=pd.DataFrame(bl_vitals.groupby('decade').mean()['BASELINE HR'].astype(int).astype(str) +' ± '  + bl_vitals.groupby('decade').std()['BASELINE HR'].astype(int).astype(str))
bl_table_hr_bl=bl_table_hr_bl.rename(columns={'BASELINE HR':'Heart Rate Baseline (beats per minute)'})


#HR Max
bl_table_hr_max=pd.DataFrame(bl_vitals.groupby('decade').mean()['HR reached'].astype(int).astype(str) +' ± '  + bl_vitals.groupby('decade').std()['HR reached'].astype(int).astype(str))
bl_table_hr_max=bl_table_hr_max.rename(columns={'HR reached':'Heart Rate Max (beats per minute)'})

summary_table=pd.DataFrame(bl_table_participants)
lst_df_bl=[bl_table_age,bl_table_gender,bl_table_race,bl_table_weight,bl_table_height,bl_table_BSA,bl_table_BMI,bl_table_sBP,bl_table_dBP,bl_table_hr_bl,bl_table_hr_max]

for i in lst_df_bl:
    summary_table=summary_table.join(i)
summary_table=summary_table.rename({2:'20-39 years', 4:'40-59 years', 6:'60-79 years'})
summary_table.index.rename('Age Groups', inplace=True)
summary_table_style=summary_table.T.style.set_properties(**{'text-align': 'left'}).set_properties(**{'width': '100px'})
display(summary_table_style)

# Overview of the absolute Values for Women

In [None]:
reference_table_f=df_combined[df_combined['Sex']=='F'][variables].copy()
grouped=reference_table_f.groupby('decade')
women_table=grouped.mean().astype(int).astype(str)+' ± '+grouped.std().astype(int).astype(str) +' (' + (grouped.mean()-t_distri(grouped.count(),grouped.std())).astype(int).astype(str) + ' - ' + (grouped.mean()+(t_distri(grouped.count(),grouped.std()))).astype(int).astype(str) + ')'

reference_table_f_rounding=df_combined[df_combined['Sex']=='F'][variables_different_rounding].copy()
grouped_r=reference_table_f_rounding.groupby('decade')
women_table_rounding=grouped_r.mean().round(1).astype(str)+' ± '+grouped_r.std().round(1).astype(str) +' (' + (grouped_r.mean()-t_distri(grouped_r.count(),grouped_r.std())).round(1).astype(str) + ' - ' + (grouped_r.mean()+(t_distri(grouped_r.count(),grouped_r.std()))).round(1).astype(str) + ')'

women_table=pd.concat([women_table.T,women_table_rounding.T]).T

women_table=women_table.rename({2:'20-39 years',4:'40-59 years',6:'60-79 years'})
women_table.index.name='Parameters'
women_table.rename(columns={**dict_variables,**hr_dict}, inplace=True)
women_table=women_table.T
display(women_table)

# Overview of the absolute Values for Men

In [None]:
reference_table_m=df_combined[df_combined['Sex']=='M'][variables].copy()
grouped=reference_table_m.groupby('decade')
men_table=grouped.mean().astype(int).astype(str)+' ± '+grouped.std().astype(int).astype(str) +' (' + (grouped.mean()-t_distri(grouped.count(),grouped.std())).astype(int).astype(str) + ' - ' + (grouped.mean()+(t_distri(grouped.count(),grouped.std()))).astype(int).astype(str) + ')'

reference_table_m_rounding=df_combined[df_combined['Sex']=='M'][variables_different_rounding].copy()
grouped_r=reference_table_m_rounding.groupby('decade')
men_table_rounding=grouped_r.mean().round(1).astype(str)+' ± '+grouped_r.std().round(1).astype(str) +' (' + (grouped_r.mean()-t_distri(grouped_r.count(),grouped_r.std())).round(1).astype(str) + ' - ' + (grouped_r.mean()+(t_distri(grouped_r.count(),grouped_r.std()))).round(1).astype(str) + ')'

men_table=pd.concat([men_table.T,men_table_rounding.T]).T

men_table=men_table.rename({2:'20-39 years',4:'40-59 years',6:'60-79 years'})
men_table.index.name='Parameters'
men_table.rename(columns=dict_variables, inplace=True)
men_table=men_table.T
display(men_table)

# Reference Values (absolute difference) by Age Group for Men

In [None]:
#Dictionary
new_index_values = ['EDV (ml)', "EDV/BSA (ml/m2)", "ESV (ml)", "ESV/BSA (ml/m2)", "SV (ml)", "SV/BSA (ml/m2)", "EF (%)",'HR reached', "CO (l/min)","CI (l/min/m2)"]

reference_table_m=df_combined[df_combined['Sex']=='M'][variables].copy()
cols = reference_table_m.columns

# Create a list to store the columns to be dropped
cols_to_drop = []

# Iterate over each pair of columns, subtracting the previous from the current
for i in range(1, len(cols), 2):
    reference_table_m[cols[i]] = reference_table_m[cols[i]] - reference_table_m[cols[i-1]]
    cols_to_drop.append(cols[i-1])

# Drop the columns that were used for the subtraction
reference_table_m = reference_table_m.drop(columns=cols_to_drop)
grouped=reference_table_m.groupby('decade')
b_lower=(grouped.mean()-t_distri(grouped.count(),grouped.std())).round(0).astype(int)
b_mean=(grouped.mean()).round(0).astype(int)
b_upper=(grouped.mean()+t_distri(grouped.count(),grouped.std())).round(0).astype(int)

reference_table_m_rounding = df_combined[df_combined['Sex']=='M'][variables_different_rounding].copy()
cols = reference_table_m_rounding.columns

# Create a list to store the columns to be dropped
cols_to_drop = []

# Iterate over each pair of columns, subtracting the previous from the current
for i in range(1, len(cols), 2):
    reference_table_m_rounding[cols[i]] = reference_table_m_rounding[cols[i]] - reference_table_m_rounding[cols[i-1]]
    cols_to_drop.append(cols[i-1])

# Drop the columns that were used for the subtraction
reference_table_m_rounding = reference_table_m_rounding.drop(columns=cols_to_drop)

# Calculate values
grouped_r=reference_table_m_rounding.groupby('decade')
b_r_lower=(grouped_r.mean()-t_distri(grouped_r.count(),grouped_r.std())).round(1).astype('str')
b_r_mean=(grouped_r.mean()).round(1).astype('str')
b_r_upper=(grouped_r.mean()+t_distri(grouped_r.count(),grouped_r.std())).round(1).astype('str')

#Concat tables with different rounding
b_lower=pd.concat([b_lower,b_r_lower],axis=1)
b_mean=pd.concat([b_mean,b_r_mean],axis=1)
b_upper=pd.concat([b_upper,b_r_upper],axis=1)

#Set new index
b_lower.set_index([b_lower.index,['lower']*len(b_lower.index)],inplace=True)
b_mean.set_index([b_mean.index,['mean']*len(b_mean.index)], inplace=True)
b_upper.set_index([b_upper.index,['upper']*len(b_upper.index)], inplace=True)

#Concat tables to final table
men_by_decade=pd.concat([b_lower.iloc[0,:].T,b_mean.iloc[0,:].T,b_upper.iloc[0,:].T,b_lower.iloc[1,:].T,b_mean.iloc[1,:].T,b_upper.iloc[1,:].T,b_lower.iloc[2,:].T,b_mean.iloc[2,:].T,b_upper.iloc[2,:].T], axis=1)
men_by_decade.rename(index={index: value for index, value in zip(men_by_decade.index, new_index_values)}, columns={2:'20-39 years',4:'40-59 years',6:'60-79 years'}, inplace=True)
men_by_decade.drop(['HR reached'], inplace=True)

#Change style and display
men_by_decade=men_by_decade.style.set_table_styles([dict(selector='th', props=[('text-align', 'center')])])
men_by_decade=men_by_decade.set_properties(**{'width': '50px'},**{'text-align': 'center'})
display(men_by_decade)

# Reference Values (relative difference) by Age Group for Men

In [None]:
#Dictionary
new_index_values = ['EDV (%)', "Delta EDV/BSA (ml/m2)", "ESV (%)", "Delta ESV/BSA (ml/m2)", "SV (%)", "Delta SV/BSA (ml/m2)", "EF (%)",'HR reached', "CO (%)","Delta CI (l/min/m2)"]
reference_table_m=df_combined[df_combined['Sex']=='M'][variables].copy()
cols = reference_table_m.columns

# Create a list to store the columns to be dropped
cols_to_drop = []

# Iterate over each pair of columns, subtracting the previous from the current
for i in range(1, len(cols), 2):
    reference_table_m[cols[i]] = ((reference_table_m[cols[i]] - reference_table_m[cols[i-1]])/reference_table_m[cols[i-1]])*100
    cols_to_drop.append(cols[i-1])

# Drop the columns that were used for the subtraction
reference_table_m = reference_table_m.drop(columns=cols_to_drop)
grouped=reference_table_m.groupby('decade')
b_lower=(grouped.mean()-t_distri(grouped.count(),grouped.std())).astype(int)
b_mean=(grouped.mean()).astype(int)
b_upper=(grouped.mean()+t_distri(grouped.count(),grouped.std())).astype(int)

reference_table_m_rounding = df_combined[df_combined['Sex']=='M'][variables_different_rounding].copy()
cols = reference_table_m_rounding.columns

# Create a list to store the columns to be dropped
cols_to_drop = []

# Iterate over each pair of columns, subtracting the previous from the current
for i in range(1, len(cols), 2):
    reference_table_m_rounding[cols[i]] = ((reference_table_m_rounding[cols[i]] - reference_table_m_rounding[cols[i-1]])/reference_table_m_rounding[cols[i-1]])*100
    cols_to_drop.append(cols[i-1])

# Drop the columns that were used for the subtraction
reference_table_m_rounding = reference_table_m_rounding.drop(columns=cols_to_drop)
grouped_r=reference_table_m_rounding.groupby('decade')
b_r_lower=(grouped_r.mean()-t_distri(grouped_r.count(),grouped_r.std())).round(1).astype('str')
b_r_mean=(grouped_r.mean()).round(1).astype('str')
b_r_upper=(grouped_r.mean()+t_distri(grouped_r.count(),grouped_r.std())).round(1).astype('str')

#Concat tables with different rounding
b_lower=pd.concat([b_lower,b_r_lower],axis=1)
b_mean=pd.concat([b_mean,b_r_mean],axis=1)
b_upper=pd.concat([b_upper,b_r_upper],axis=1)

#Set new index
b_lower.set_index([b_lower.index,['lower']*len(b_lower.index)],inplace=True)
b_mean.set_index([b_mean.index,['mean']*len(b_mean.index)], inplace=True)
b_upper.set_index([b_upper.index,['upper']*len(b_upper.index)], inplace=True)

#Concat tables to final table
men_by_decade=pd.concat([b_lower.iloc[0,:].T,b_mean.iloc[0,:].T,b_upper.iloc[0,:].T,b_lower.iloc[1,:].T,b_mean.iloc[1,:].T,b_upper.iloc[1,:].T,b_lower.iloc[2,:].T,b_mean.iloc[2,:].T,b_upper.iloc[2,:].T], axis=1)
men_by_decade.rename(index={index: value for index, value in zip(men_by_decade.index, new_index_values)}, columns={2:'20-39 years',4:'40-59 years',6:'60-79 years'}, inplace=True)
men_by_decade.drop(new_index_values[1::2], inplace=True)
men_by_decade = men_by_decade.astype('float').round(0).astype('int')

#Change style and display
men_by_decade=men_by_decade.style.set_table_styles([dict(selector='th', props=[('text-align', 'center')])])
men_by_decade=men_by_decade.set_properties(**{'width': '50px'},**{'text-align': 'center'})
display(men_by_decade)

# Reference Values (absolute difference) by Age Group for Women

In [None]:
#Dictionary
new_index_values = ['EDV (ml)', "EDV/BSA (ml/m2)", "ESV (ml)", "ESV/BSA (ml/m2)", "SV (ml)", "SV/BSA (ml/m2)", "EF (%)",'HR reached', "CO (l/min)","CI (l/min/m2)"]

reference_table_f=df_combined[df_combined['Sex']=='F'][variables].copy()
cols = reference_table_f.columns

# Create a list to store the columns to be dropped
cols_to_drop = []

# Iterate over each pair of columns, subtracting the previous from the current
for i in range(1, len(cols), 2):
    reference_table_f[cols[i]] = reference_table_f[cols[i]] - reference_table_f[cols[i-1]]
    cols_to_drop.append(cols[i-1])

# Drop the columns that were used for the subtraction
reference_table_f = reference_table_f.drop(columns=cols_to_drop)

# Calculate values
grouped=reference_table_f.groupby('decade')
b_lower=(grouped.mean()-t_distri(grouped.count(),grouped.std())).astype(int)
b_mean=(grouped.mean()).astype(int)
b_upper=(grouped.mean()+t_distri(grouped.count(),grouped.std())).astype(int)


reference_table_f_rounding=df_combined[df_combined['Sex']=='F'][variables_different_rounding].copy()
cols = reference_table_f_rounding.columns

# Create a list to store the columns to be dropped
cols_to_drop = []

# Iterate over each pair of columns, subtracting the previous from the current
for i in range(1, len(cols), 2):
    reference_table_f_rounding[cols[i]] = reference_table_f_rounding[cols[i]] - reference_table_f_rounding[cols[i-1]]
    cols_to_drop.append(cols[i-1])

# Drop the columns that were used for the subtraction
reference_table_f_rounding = reference_table_f_rounding.drop(columns=cols_to_drop)

# Calculate values
grouped_r=reference_table_f_rounding.groupby('decade')
b_r_lower=(grouped_r.mean()-t_distri(grouped_r.count(),grouped_r.std())).round(1).astype('str')
b_r_mean=(grouped_r.mean()).round(1).astype('str')
b_r_upper=(grouped_r.mean()+t_distri(grouped_r.count(),grouped_r.std())).round(1).astype('str')

#Concat tables with different rounding
b_lower=pd.concat([b_lower,b_r_lower],axis=1)
b_mean=pd.concat([b_mean,b_r_mean],axis=1)
b_upper=pd.concat([b_upper,b_r_upper],axis=1)

#Set new index
b_lower.set_index([b_lower.index,['lower']*len(b_lower.index)],inplace=True)
b_mean.set_index([b_mean.index,['mean']*len(b_mean.index)], inplace=True)
b_upper.set_index([b_upper.index,['upper']*len(b_upper.index)], inplace=True)

#Concat tables to final table
women_by_decade=pd.concat([b_lower.iloc[0,:].T,b_mean.iloc[0,:].T,b_upper.iloc[0,:].T,b_lower.iloc[1,:].T,b_mean.iloc[1,:].T,b_upper.iloc[1,:].T,b_lower.iloc[2,:].T,b_mean.iloc[2,:].T,b_upper.iloc[2,:].T], axis=1)
women_by_decade.rename(index={index: value for index, value in zip(women_by_decade.index, new_index_values)}, columns={2:'20-39 years',4:'40-59 years',6:'60-79 years'}, inplace=True)
women_by_decade.drop(['HR reached'], inplace=True)

women_by_decade=women_by_decade.style.set_table_styles([dict(selector='th', props=[('text-align', 'center')])])
women_by_decade=women_by_decade.set_properties(**{'width': '50px'},**{'text-align': 'center'})
display(women_by_decade)

# Reference Values (relative difference) by Age Group for Women

In [None]:
#Dictionary
new_index_values = ['EDV (%)', "Delta EDV/BSA (ml/m2)", "ESV (%)", "Delta ESV/BSA (ml/m2)", "SV (%)", "Delta SV/BSA (ml/m2)", "EF (%)",'HR reached', "CO (%)","Delta CI (l/min/m2)"]


reference_table_f=df_combined[df_combined['Sex']=='F'][variables].copy()
cols = reference_table_f.columns

# Create a list to store the columns to be dropped
cols_to_drop = []

# Iterate over each pair of columns, subtracting the previous from the current
for i in range(1, len(cols), 2):
    reference_table_f[cols[i]] = (reference_table_f[cols[i]] - reference_table_f[cols[i-1]])/reference_table_f[cols[i-1]]*100
    cols_to_drop.append(cols[i-1])

# Drop the columns that were used for the subtraction
reference_table_f = reference_table_f.drop(columns=cols_to_drop)

# Calculate values
grouped=reference_table_f.groupby('decade')
b_lower=(grouped.mean()-t_distri(grouped.count(),grouped.std())).astype(int)
b_mean=(grouped.mean()).astype(int)
b_upper=(grouped.mean()+t_distri(grouped.count(),grouped.std())).astype(int)


reference_table_f_rounding=df_combined[df_combined['Sex']=='F'][variables_different_rounding].copy()
cols = reference_table_f_rounding.columns

# Create a list to store the columns to be dropped
cols_to_drop = []

# Iterate over each pair of columns, subtracting the previous from the current
for i in range(1, len(cols), 2):
    reference_table_f_rounding[cols[i]] = (reference_table_f_rounding[cols[i]] - reference_table_f_rounding[cols[i-1]])/reference_table_f_rounding[cols[i-1]]*100
    cols_to_drop.append(cols[i-1])

# Drop the columns that were used for the subtraction
reference_table_f_rounding = reference_table_f_rounding.drop(columns=cols_to_drop)

# Calculate values
grouped_r=reference_table_f_rounding.groupby('decade')
b_r_lower=(grouped_r.mean()-t_distri(grouped_r.count(),grouped_r.std())).round(1).astype('str')
b_r_mean=(grouped_r.mean()).round(1).astype('str')
b_r_upper=(grouped_r.mean()+t_distri(grouped_r.count(),grouped_r.std())).round(1).astype('str')

#Concat tables with different rounding
b_lower=pd.concat([b_lower,b_r_lower],axis=1)
b_mean=pd.concat([b_mean,b_r_mean],axis=1)
b_upper=pd.concat([b_upper,b_r_upper],axis=1)

#Set new index
b_lower.set_index([b_lower.index,['lower']*len(b_lower.index)],inplace=True)
b_mean.set_index([b_mean.index,['mean']*len(b_mean.index)], inplace=True)
b_upper.set_index([b_upper.index,['upper']*len(b_upper.index)], inplace=True)

#Concat tables to final table
women_by_decade=pd.concat([b_lower.iloc[0,:].T,b_mean.iloc[0,:].T,b_upper.iloc[0,:].T,b_lower.iloc[1,:].T,b_mean.iloc[1,:].T,b_upper.iloc[1,:].T,b_lower.iloc[2,:].T,b_mean.iloc[2,:].T,b_upper.iloc[2,:].T], axis=1)
women_by_decade.rename(index={index: value for index, value in zip(women_by_decade.index, new_index_values)}, columns={2:'20-39 years',4:'40-59 years',6:'60-79 years'}, inplace=True)
women_by_decade.drop(new_index_values[1::2], inplace=True)
women_by_decade = women_by_decade.astype('float').round(0).astype('int')

#Change style and display
women_by_decade=women_by_decade.style.set_table_styles([dict(selector='th', props=[('text-align', 'center')])])
women_by_decade=women_by_decade.set_properties(**{'width': '50px'},**{'text-align': 'center'})
display(women_by_decade)

# Comparison between Rest and Exercise in Men

In [None]:
reference_table_m_dropped_na = df_combined[df_combined['Sex']=='M'][variables].copy()
reference_table_m_dropped_na.dropna(inplace=True)


reference_table_m_dropped_na_rounding=df_combined[df_combined['Sex']=='M'][variables_different_rounding].copy()
reference_table_m_dropped_na_rounding.dropna(inplace=True)

male_rest_vs_ex=pd.DataFrame()

for i,x in enumerate(['EDV','EDV/BSA','ESV','ESV/BSA','SV','SV/BSA','EF','BASELINE HR']):
    rest=pd.DataFrame([pd.Series(reference_table_m_dropped_na.iloc[:,i*2],name=reference_table_m_dropped_na.iloc[:,i*2].name),pd.Series(reference_table_m_dropped_na.iloc[:,0]*0+1,name='rest')])
    exercise=pd.DataFrame([pd.Series(reference_table_m_dropped_na.iloc[:,i*2+1],name=reference_table_m_dropped_na.iloc[:,i*2].name),pd.Series(reference_table_m_dropped_na.iloc[:,0]*0,name='rest')])
    combined=pd.concat((rest,exercise),axis=1)
    combined= combined.T.merge(df_combined['age'],left_index=True, right_index=True)
    combined=combined.rename(columns={f'{combined.iloc[:,0].name}':'EDV'})
    combined['ID']=combined.index

    model = mixedlm(formula='EDV ~ rest + age',
                    re_formula='~ 1',
    data=combined, groups='ID').fit()
    p = model.pvalues[1]
    if p>=0.001:
        male_rest_vs_ex=pd.concat([male_rest_vs_ex,pd.DataFrame([reference_table_m_dropped_na.iloc[:,i*2+1].name.split('_')[0],f'{int(round((reference_table_m_dropped_na.iloc[:,i*2]).mean(),0))} ± {int(round((reference_table_m_dropped_na.iloc[:,i*2]).std(),0))}',f'{int(round((reference_table_m_dropped_na.iloc[:,i*2+1]).mean(),0))} ± {int(round((reference_table_m_dropped_na.iloc[:,i*2+1]).std(),0))}',str(p)]).T])
    else:
        male_rest_vs_ex=pd.concat([male_rest_vs_ex,pd.DataFrame([reference_table_m_dropped_na.iloc[:,i*2+1].name.split('_')[0],f'{int(round((reference_table_m_dropped_na.iloc[:,i*2]).mean(),0))} ± {int(round((reference_table_m_dropped_na.iloc[:,i*2]).std(),0))}',f'{int(round((reference_table_m_dropped_na.iloc[:,i*2+1]).mean(),0))} ± {int(round((reference_table_m_dropped_na.iloc[:,i*2+1]).std(),0))}','<0.001']).T])

        
        
for i,x in enumerate(['CO','CI']):
    rest=pd.DataFrame([pd.Series(reference_table_m_dropped_na_rounding.iloc[:,i*2],name=reference_table_m_dropped_na_rounding.iloc[:,i*2].name),pd.Series(reference_table_m_dropped_na_rounding.iloc[:,0]*0,name='rest')])
    exercise=pd.DataFrame([pd.Series(reference_table_m_dropped_na_rounding.iloc[:,i*2+1],name=reference_table_m_dropped_na_rounding.iloc[:,i*2].name),pd.Series(reference_table_m_dropped_na_rounding.iloc[:,0]*0+1,name='rest')])
    combined=pd.concat((rest,exercise),axis=1)
    combined= combined.T.merge(df_combined['age'],left_index=True, right_index=True)
    combined=combined.rename(columns={f'{combined.iloc[:,0].name}':'EDV'})
    combined['ID']=combined.index


    model = mixedlm(formula='EDV ~ rest + age',
                    re_formula='~ 1',
    data=combined, groups='ID').fit()

    
    p = model.pvalues[1]
    if p>=0.001:
        male_rest_vs_ex=pd.concat([male_rest_vs_ex,pd.DataFrame([reference_table_m_dropped_na_rounding.iloc[:,i*2+1].name.split('_')[0],f'{round((reference_table_m_dropped_na_rounding.iloc[:,i*2]).mean(),1)} ± {round((reference_table_m_dropped_na_rounding.iloc[:,i*2]).std(),1)}',f'{round((reference_table_m_dropped_na_rounding.iloc[:,i*2+1]).mean(),1)} ± {round((reference_table_m_dropped_na_rounding.iloc[:,i*2+1]).std(),1)}',str(p)]).T])
    else:
        male_rest_vs_ex=pd.concat([male_rest_vs_ex,pd.DataFrame([reference_table_m_dropped_na_rounding.iloc[:,i*2+1].name.split('_')[0],f'{round((reference_table_m_dropped_na_rounding.iloc[:,i*2]).mean(),1)} ± {round((reference_table_m_dropped_na_rounding.iloc[:,i*2]).std(),1)}',f'{round((reference_table_m_dropped_na_rounding.iloc[:,i*2+1]).mean(),1)} ± {round((reference_table_m_dropped_na_rounding.iloc[:,i*2+1]).std(),1)}','<0.001']).T])

male_rest_vs_ex.columns=['Parameters','Rest','Exercise', 'p-value']
male_rest_vs_ex.set_index('Parameters',inplace=True)
male_rest_vs_ex=male_rest_vs_ex.iloc[[7,0,1,2,3,4,5,6,8,9]]
male_rest_vs_ex.rename(index=t_tests_dict,inplace=True)
display(male_rest_vs_ex)  

# Comparison between Rest and Exercise in Women

In [None]:
reference_table_f_dropped_na=df_combined[df_combined['Sex']=='F'][variables].copy()
reference_table_f_dropped_na.dropna(inplace=True)

reference_table_f_dropped_na_rounding=df_combined[df_combined['Sex']=='F'][variables_different_rounding].copy()
reference_table_f_dropped_na_rounding.dropna(inplace=True)

female_rest_vs_ex=pd.DataFrame()

for i,x in enumerate(['EDV','EDV/BSA','ESV','ESV/BSA','SV','SV/BSA','EF','BASELINE HR']):
    rest=pd.DataFrame([pd.Series(reference_table_f_dropped_na.iloc[:,i*2],name=reference_table_f_dropped_na.iloc[:,i*2].name),pd.Series(reference_table_f_dropped_na.iloc[:,0]*0,name='rest')])
    exercise=pd.DataFrame([pd.Series(reference_table_f_dropped_na.iloc[:,i*2+1],name=reference_table_f_dropped_na.iloc[:,i*2].name),pd.Series(reference_table_f_dropped_na.iloc[:,0]*0+1,name='rest')])
    combined=pd.concat((rest,exercise),axis=1)
    combined= combined.T.merge(df_combined['age'],left_index=True, right_index=True)
    combined=combined.rename(columns={f'{combined.iloc[:,0].name}':'EDV'})
    combined['ID']=combined.index
    
    model = mixedlm(formula='EDV ~ rest + age',
                    re_formula='~ 1',
    data=combined, groups='ID').fit()
    p = model.pvalues[1]
    if p>=0.001:
        female_rest_vs_ex=pd.concat([female_rest_vs_ex,pd.DataFrame([reference_table_f_dropped_na.iloc[:,i*2+1].name.split('_')[0],f'{int(round((reference_table_f_dropped_na.iloc[:,i*2]).mean(),0))} ± {int(round((reference_table_f_dropped_na.iloc[:,i*2]).std(),0))}',f'{int(round((reference_table_f_dropped_na.iloc[:,i*2+1]).mean(),0))} ± {int(round((reference_table_f_dropped_na.iloc[:,i*2+1]).std(),0))}',str(round(p,3))]).T])
    else:
        female_rest_vs_ex=pd.concat([female_rest_vs_ex,pd.DataFrame([reference_table_f_dropped_na.iloc[:,i*2+1].name.split('_')[0],f'{int(round((reference_table_f_dropped_na.iloc[:,i*2]).mean(),0))} ± {int(round((reference_table_f_dropped_na.iloc[:,i*2]).std(),0))}',f'{int(round((reference_table_f_dropped_na.iloc[:,i*2+1]).mean(),0))} ± {int(round((reference_table_f_dropped_na.iloc[:,i*2+1]).std(),0))}','<0.001']).T])
        
for i,x in enumerate(['CO','CI']):
    rest=pd.DataFrame([pd.Series(reference_table_f_dropped_na_rounding.iloc[:,i*2],name=reference_table_f_dropped_na_rounding.iloc[:,i*2].name),pd.Series(reference_table_f_dropped_na_rounding.iloc[:,0]*0,name='rest')])
    exercise=pd.DataFrame([pd.Series(reference_table_f_dropped_na_rounding.iloc[:,i*2+1],name=reference_table_f_dropped_na_rounding.iloc[:,i*2].name),pd.Series(reference_table_f_dropped_na_rounding.iloc[:,0]*0+1,name='rest')])
    combined=pd.concat((rest,exercise),axis=1)
    combined= combined.T.merge(df_combined['age'],left_index=True, right_index=True)
    combined=combined.rename(columns={f'{combined.iloc[:,0].name}':'EDV'})
    combined['ID']=combined.index



    model = mixedlm(formula='EDV ~ rest + age',
                    re_formula='~ 1',
    data=combined, groups='ID').fit()

    
    p = model.pvalues[1]
    if p>=0.001:
        female_rest_vs_ex=pd.concat([female_rest_vs_ex,pd.DataFrame([reference_table_f_dropped_na_rounding.iloc[:,i*2+1].name.split('_')[0],f'{round((reference_table_f_dropped_na_rounding.iloc[:,i*2]).mean(),1)} ± {round((reference_table_f_dropped_na_rounding.iloc[:,i*2]).std(),1)}',f'{round((reference_table_f_dropped_na_rounding.iloc[:,i*2+1]).mean(),1)} ± {round((reference_table_f_dropped_na_rounding.iloc[:,i*2+1]).std(),1)}',str(round(p,3))]).T])
    else:
        female_rest_vs_ex=pd.concat([female_rest_vs_ex,pd.DataFrame([reference_table_f_dropped_na_rounding.iloc[:,i*2+1].name.split('_')[0],f'{round((reference_table_f_dropped_na_rounding.iloc[:,i*2]).mean(),1)} ± {round((reference_table_f_dropped_na_rounding.iloc[:,i*2]).std(),1)}',f'{round((reference_table_f_dropped_na_rounding.iloc[:,i*2+1]).mean(),1)} ± {round((reference_table_f_dropped_na_rounding.iloc[:,i*2+1]).std(),1)}','<0.001']).T])


        
female_rest_vs_ex.columns=['Parameters','Rest','Exercise', 'p-value']
female_rest_vs_ex.set_index('Parameters',inplace=True)
female_rest_vs_ex=female_rest_vs_ex.iloc[[7,0,1,2,3,4,5,6,8,9]]
female_rest_vs_ex.rename(index=t_tests_dict,inplace=True)
display(female_rest_vs_ex)

# Comparison between Men and Women

In [None]:
gender_comparison=pd.DataFrame()

for i in range(16):
    female=pd.DataFrame([pd.Series(reference_table_f_dropped_na.iloc[:,i],name=reference_table_f_dropped_na.iloc[:,i].name),pd.Series(reference_table_f_dropped_na.iloc[:,0]*0,name='female')])
    male=pd.DataFrame([pd.Series(reference_table_m_dropped_na.iloc[:,i],name=reference_table_m_dropped_na.iloc[:,i].name),pd.Series(reference_table_m_dropped_na.iloc[:,0]*0+1,name='female')])
    combined = pd.concat((female,male),axis=1).dropna(axis=1)
    combined = combined.T.merge(df_combined['age'],left_index=True, right_index=True)
    combined['ID']=combined.index
    combined = combined.rename(columns={f'{combined.iloc[:,0].name}':'EDV'})

    
    model = mixedlm(formula='EDV ~ female + age', groups='ID',
                    re_formula='~ 1', data=combined).fit()
    p = model.pvalues[1]

    if p>=0.001:
        gender_comparison=pd.concat([gender_comparison,pd.DataFrame([reference_table_f_dropped_na.iloc[:,i].name,f'{int(round((df_combined_dropped_na.iloc[:,i]).mean(),0))} ± {int(round((df_combined_dropped_na.iloc[:,i]).std(),0))}',f'{int(round((reference_table_m_dropped_na.iloc[:,i]).mean(),0))} ± {int(round((reference_table_m_dropped_na.iloc[:,i]).std(),0))}',f'{int(round((reference_table_f_dropped_na.iloc[:,i]).mean(),0))} ± {int(round((reference_table_f_dropped_na.iloc[:,i]).std(),0))}',str(round(p,3))]).T])
    else:
        gender_comparison=pd.concat([gender_comparison,pd.DataFrame([reference_table_f_dropped_na.iloc[:,i].name,f'{int(round((df_combined_dropped_na.iloc[:,i]).mean(),0))} ± {int(round((df_combined_dropped_na.iloc[:,i]).std(),0))}',f'{int(round((reference_table_m_dropped_na.iloc[:,i]).mean(),0))} ± {int(round((reference_table_m_dropped_na.iloc[:,i]).std(),0))}',f'{int(round((reference_table_f_dropped_na.iloc[:,i]).mean(),0))} ± {int(round((reference_table_f_dropped_na.iloc[:,i]).std(),0))}','<0.001']).T])

for i in range(4):
    female=pd.DataFrame([pd.Series(reference_table_f_dropped_na_rounding.iloc[:,i],name=reference_table_f_dropped_na_rounding.iloc[:,i].name),pd.Series(reference_table_f_dropped_na_rounding.iloc[:,0]*0,name='female')])
    male=pd.DataFrame([pd.Series(reference_table_m_dropped_na_rounding.iloc[:,i],name=reference_table_m_dropped_na_rounding.iloc[:,i].name),pd.Series(reference_table_m_dropped_na_rounding.iloc[:,0]*0+1,name='female')])
    combined = pd.concat((female,male),axis=1).dropna(axis=1)
    combined = combined.T.merge(df_combined['age'],left_index=True, right_index=True)
    combined['ID']=combined.index
    combined = combined.rename(columns={f'{combined.iloc[:,0].name}':'EDV'})


    model = mixedlm(formula='EDV ~ female + age', groups='ID',
                    re_formula='~ 1', data=combined).fit()
    p = model.pvalues[1]
    if p>=0.001:
        gender_comparison=pd.concat([gender_comparison,pd.DataFrame([reference_table_f_dropped_na_rounding.iloc[:,i].name,f'{round((df_combined_dropped_na_rounding.iloc[:,i]).mean(),1)} ± {round((df_combined_dropped_na_rounding.iloc[:,i]).std(),1)}',f'{round((reference_table_m_dropped_na_rounding.iloc[:,i]).mean(),1)} ± {round((reference_table_m_dropped_na_rounding.iloc[:,i]).std(),1)}',f'{round((reference_table_f_dropped_na_rounding.iloc[:,i]).mean(),1)} ± {round((reference_table_f_dropped_na_rounding.iloc[:,i]).std(),1)}',str(round(p,3))]).T])
    else:
        gender_comparison=pd.concat([gender_comparison,pd.DataFrame([reference_table_f_dropped_na_rounding.iloc[:,i].name,f'{round((df_combined_dropped_na_rounding.iloc[:,i]).mean(),1)} ± {round((df_combined_dropped_na_rounding.iloc[:,i]).std(),1)}',f'{round((reference_table_m_dropped_na_rounding.iloc[:,i]).mean(),1)} ± {round((reference_table_m_dropped_na_rounding.iloc[:,i]).std(),1)}',f'{round((reference_table_f_dropped_na_rounding.iloc[:,i]).mean(),1)} ± {round((reference_table_f_dropped_na_rounding.iloc[:,i]).std(),1)}','<0.001']).T])

gender_comparison.columns=['Parameters','Overall', 'Men','Women','p-value']
gender_comparison.set_index('Parameters',inplace=True)
gender_comparison=gender_comparison.iloc[[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,16,17,18,19]]
gender_comparison.rename(index={**{'BASELINE HR':"HR at rest (bpm)",'HR reached':'HR during exercise(bpm)'}, **dict_variables}, inplace=True)
display(gender_comparison)

# Comparison between Men and Women (absolute difference)

In [None]:
gender_comparison_abs_diff=pd.DataFrame()


for i in range(0,8,1):
    men=reference_table_m_dropped_na.iloc[:,i*2+1]-reference_table_m_dropped_na.iloc[:,i*2]
    women=reference_table_f_dropped_na.iloc[:,i*2+1]-reference_table_f_dropped_na.iloc[:,i*2]
    female=pd.DataFrame([pd.Series(women,name=reference_table_m_dropped_na.iloc[:,i*2+1].name),pd.Series(women*0,name='female')])
    male=pd.DataFrame([pd.Series(men,name=reference_table_m_dropped_na.iloc[:,i*2+1].name),pd.Series(men*0+1,name='female')])
    combined = pd.concat((female,male),axis=1).dropna(axis=1)
    combined = combined.T.merge(df_combined['age'],left_index=True, right_index=True)
    combined['ID']=combined.index
    combined = combined.rename(columns={f'{combined.iloc[:,0].name}':'EDV'})
    combined.dropna(inplace=True)
    
    all_=df_combined_dropped_na.iloc[:,i*2+1]-df_combined_dropped_na.iloc[:,i*2].astype('float')
    men_std=int(round(men.std(),0))
    men_mean=int(round(men.mean(),0))
    women_std=int(round(women.std(),0))
    women_mean=int(round(women.mean(),0))
    all_std=int(round(all_.std(),0))
    all_mean=int(round(all_.mean(),0))
    
    
    model = mixedlm(formula='EDV ~ female + age', groups='ID',
                    re_formula='~ 1', data=combined).fit()
    p = model.pvalues[1]
    if p>0.001:
        gender_comparison_abs_diff=pd.concat([gender_comparison_abs_diff,pd.DataFrame([reference_table_f_dropped_na.iloc[:,i*2].name,f'{all_mean} ± {all_std}',f'{men_mean} ± {men_std}',f'{women_mean} ± {women_std}',str(round(p,3))]).T])
    elif p<0.001:
        gender_comparison_abs_diff=pd.concat([gender_comparison_abs_diff,pd.DataFrame([reference_table_f_dropped_na.iloc[:,i*2].name,f'{all_mean} ± {all_std}',f'{men_mean} ± {men_std}',f'{women_mean} ± {women_std}','<0.001']).T])

for i in range(0,2,1):
    men=reference_table_m_dropped_na_rounding.iloc[:,i*2+1]-reference_table_m_dropped_na_rounding.iloc[:,i*2]
    women=reference_table_f_dropped_na_rounding.iloc[:,i*2+1]-reference_table_f_dropped_na_rounding.iloc[:,i*2]
    female=pd.DataFrame([pd.Series(women,name=reference_table_m_dropped_na_rounding.iloc[:,i*2+1].name),pd.Series(women*0,name='female')])
    male=pd.DataFrame([pd.Series(men,name=reference_table_m_dropped_na_rounding.iloc[:,i*2+1].name),pd.Series(men*0+1,name='female')])
    combined = pd.concat((female,male),axis=1).dropna(axis=1)
    combined = combined.T.merge(df_combined['age'],left_index=True, right_index=True)
    combined['ID']=combined.index
    combined = combined.rename(columns={f'{combined.iloc[:,0].name}':'EDV'})
    combined['EDV'] = combined['EDV'].astype('float')
    
    all_=df_combined_dropped_na_rounding.iloc[:,i*2+1]-df_combined_dropped_na_rounding.iloc[:,i*2]
    men_std=round(men.std(),1)
    men_mean=round(men.mean(),1)
    women_std=round(women.std(),1)
    women_mean=round(women.mean(),1)
    all_std=round(all_.std(),1)
    all_mean=round(all_.mean(),1)
    
    model = mixedlm(formula='EDV ~ female + age', groups='ID',
                    re_formula='~ 1', data=combined).fit()
    p = model.pvalues[1]
    if p>0.001:
        gender_comparison_abs_diff=pd.concat([gender_comparison_abs_diff,pd.DataFrame([df_combined_dropped_na_rounding.iloc[:,i*2].name,f'{all_mean} ± {all_std}',f'{men_mean} ± {men_std}',f'{women_mean} ± {women_std}',str(round(p,3))]).T])
    elif p<0.001:
        gender_comparison_abs_diff=pd.concat([gender_comparison_abs_diff,pd.DataFrame([df_combined_dropped_na_rounding.iloc[:,i*2].name,f'{all_mean} ± {all_std}',f'{men_mean} ± {men_std}',f'{women_mean} ± {women_std}','<0.001']).T])


gender_comparison_abs_diff.columns=['Parameters','Overall', 'Men','Women','p-value']
gender_comparison_abs_diff.set_index('Parameters',inplace=True)
gender_comparison_abs_diff=gender_comparison_abs_diff.iloc[[7,0,1,2,3,4,5,6,8,9]]
gender_comparison_abs_diff.rename(index={**{'BASELINE HR':"HR at rest (bpm)"},**dict_variables}, inplace=True)
gender_comparison_abs_diff.index=[i.split()[0]+' '+i.split()[-1] for i in gender_comparison_abs_diff.index]
display(gender_comparison_abs_diff)

# Age Group Comparison Men (absolute values)

In [None]:
reference_table_m_dropped_na_2=reference_table_m_dropped_na[reference_table_m_dropped_na['decade']==2].copy()
reference_table_m_dropped_na_rounding_2=reference_table_m_dropped_na_rounding[reference_table_m_dropped_na_rounding['decade']==2].copy()

reference_table_m_dropped_na_4=reference_table_m_dropped_na[reference_table_m_dropped_na['decade']==4].copy()
reference_table_m_dropped_na_rounding_4=reference_table_m_dropped_na_rounding[reference_table_m_dropped_na_rounding['decade']==4].copy()

reference_table_m_dropped_na_6=reference_table_m_dropped_na[reference_table_m_dropped_na['decade']==6].copy()
reference_table_m_dropped_na_rounding_6=reference_table_m_dropped_na_rounding[reference_table_m_dropped_na_rounding['decade']==6].copy()


decade_men_comparison=pd.DataFrame()


for i in reference_table_m_dropped_na_2.columns[0:-1]:
    dec_2=reference_table_m_dropped_na_2[i]
    dec_4=reference_table_m_dropped_na_4[i]
    dec_6=reference_table_m_dropped_na_6[i]
    all_=reference_table_m_dropped_na[i]
        
    indices = (np.array([0, 0, 1]), np.array([1, 2, 2]))
    pht = (scipy.stats.tukey_hsd(dec_2,dec_4, dec_6).pvalue)[indices].round(3)
    pht = np.where(pht == 0, '<0.01',pht)
    
    dec_2_std=int(round(dec_2.std()))
    dec_2_mean=int(round(dec_2.mean()))
    dec_4_std=int(round(dec_4.std()))
    dec_4_mean=int(round(dec_4.mean()))
    dec_6_std=int(round(dec_6.std()))
    dec_6_mean=int(round(dec_6.mean()))
    all_std=int(round(all_.std()))
    all_mean=int(round(all_.mean()))
    stat,p = scipy.stats.f_oneway(dec_2.dropna(),dec_4.dropna(),dec_6.dropna())
    
    if p>0.001:
        if p<=0.05:
            decade_men_comparison=pd.concat([decade_men_comparison,pd.DataFrame([dec_2.name,f'{all_mean} ± {all_std}',f'{dec_2_mean} ± {dec_2_std}',f'{dec_4_mean} ± {dec_4_std}',f'{dec_6_mean} ± {dec_6_std}',str(round(p,3))]+pht.tolist()).T])
        else:
            decade_men_comparison=pd.concat([decade_men_comparison,pd.DataFrame([dec_2.name,f'{all_mean} ± {all_std}',f'{dec_2_mean} ± {dec_2_std}',f'{dec_4_mean} ± {dec_4_std}',f'{dec_6_mean} ± {dec_6_std}',str(round(p,3)),'-','-','-']).T])

    else:
        decade_men_comparison=pd.concat([decade_men_comparison,pd.DataFrame([dec_2.name,f'{all_mean} ± {all_std}',f'{dec_2_mean} ± {dec_2_std}',f'{dec_4_mean} ± {dec_4_std}',f'{dec_6_mean} ± {dec_6_std}',str(round(p,3))]+pht.tolist()).T])  

for i in reference_table_m_dropped_na_rounding_2.columns[0:-1]:
    dec_2=reference_table_m_dropped_na_rounding_2[i]
    dec_4=reference_table_m_dropped_na_rounding_4[i]
    dec_6=reference_table_m_dropped_na_rounding_6[i]
    all_=reference_table_m_dropped_na_rounding[i]
    
    indices = (np.array([0, 0, 1]), np.array([1, 2, 2]))
    pht = (scipy.stats.tukey_hsd(dec_2,dec_4, dec_6).pvalue)[indices].round(3)
    pht = np.where(pht == 0, '<0.01',pht)
    
    dec_2_std=round(dec_2.std(),1)
    dec_2_mean=round(dec_2.mean(),1)
    dec_4_std=round(dec_4.std(),1)
    dec_4_mean=round(dec_4.mean(),1)
    dec_6_std=round(dec_6.std(),1)
    dec_6_mean=round(dec_6.mean(),1)
    all_std=round(all_.std(),1)
    all_mean=round(all_.mean(),1)
    stat,p = scipy.stats.f_oneway(dec_2.dropna(),dec_4.dropna(),dec_6.dropna())
    
    if p>0.001:
        if p<=0.05:
            decade_men_comparison=pd.concat([decade_men_comparison,pd.DataFrame([dec_2.name,f'{all_mean} ± {all_std}',f'{dec_2_mean} ± {dec_2_std}',f'{dec_4_mean} ± {dec_4_std}',f'{dec_6_mean} ± {dec_6_std}',str(round(p,3))]+pht.tolist()).T])
        else:
            decade_men_comparison=pd.concat([decade_men_comparison,pd.DataFrame([dec_2.name,f'{all_mean} ± {all_std}',f'{dec_2_mean} ± {dec_2_std}',f'{dec_4_mean} ± {dec_4_std}',f'{dec_6_mean} ± {dec_6_std}',str(round(p,3)),'-','-','-']).T])

    else:
        decade_men_comparison=pd.concat([decade_men_comparison,pd.DataFrame([dec_2.name,f'{all_mean} ± {all_std}',f'{dec_2_mean} ± {dec_2_std}',f'{dec_4_mean} ± {dec_4_std}',f'{dec_6_mean} ± {dec_6_std}',str(round(p,3))]+pht.tolist()).T])
   
decade_men_comparison.columns = ['Parameters', 'Overall', '20 - 39 years', '40 - 59 years', '60 - 79 years', 'p-value','','Post-hoc testing','']
decade_men_comparison.set_index('Parameters',inplace=True)
decade_men_comparison=decade_men_comparison.T
decade_men_comparison.rename(columns=dict_variables, inplace=True)
decade_men_comparison=decade_men_comparison.T
decade_men_comparison=decade_men_comparison.iloc[[14,15,0,1,2,3,4,5,6,8,9,10,11,12,13,16,17,18,19]]
decade_men_comparison.rename(index=hr_dict,inplace=True)
display(decade_men_comparison)

# Age Group Comparison Men (absolute difference)

In [None]:
reference_table_m_dropped_na_2=reference_table_m_dropped_na[reference_table_m_dropped_na['decade']==2].copy()
reference_table_m_dropped_na_rounding_2=reference_table_m_dropped_na_rounding[reference_table_m_dropped_na_rounding['decade']==2].copy()

reference_table_m_dropped_na_4=reference_table_m_dropped_na[reference_table_m_dropped_na['decade']==4].copy()
reference_table_m_dropped_na_rounding_4=reference_table_m_dropped_na_rounding[reference_table_m_dropped_na_rounding['decade']==4].copy()

reference_table_m_dropped_na_6=reference_table_m_dropped_na[reference_table_m_dropped_na['decade']==6].copy()
reference_table_m_dropped_na_rounding_6=reference_table_m_dropped_na_rounding[reference_table_m_dropped_na_rounding['decade']==6].copy()

decade_men_comparison = pd.DataFrame()

for i in range(0, len(reference_table_m_dropped_na.columns)-1, 2):
    var1 = reference_table_m_dropped_na.columns[i]
    var2 = reference_table_m_dropped_na.columns[i+1]

    diff_2 = reference_table_m_dropped_na_2[var2] - reference_table_m_dropped_na_2[var1]
    diff_4 = reference_table_m_dropped_na_4[var2] - reference_table_m_dropped_na_4[var1]
    diff_6 = reference_table_m_dropped_na_6[var2] - reference_table_m_dropped_na_6[var1]
    all_diff = reference_table_m_dropped_na[var2] - reference_table_m_dropped_na[var1]
    
    indices = (np.array([0, 0, 1]), np.array([1, 2, 2]))
    pht = (scipy.stats.tukey_hsd(diff_2,diff_4, diff_6).pvalue)[indices].round(3)
    pht = np.where(pht == 0, '<0.01',pht)

    diff_2_std = int(round(diff_2.std()))
    diff_2_mean = int(round(diff_2.mean()))
    diff_4_std = int(round(diff_4.std()))
    diff_4_mean = int(round(diff_4.mean()))
    diff_6_std = int(round(diff_6.std()))
    diff_6_mean = int(round(diff_6.mean()))
    all_std = int(round(all_diff.std()))
    all_mean = int(round(all_diff.mean()))

    stat, p = scipy.stats.f_oneway(diff_2.dropna(), diff_4.dropna(), diff_6.dropna())

    if p > 0.001:
        if p<=0.05:
            row = pd.DataFrame([f'{var1.split("_")[0]}', f'{all_mean} ± {all_std}', f'{diff_2_mean} ± {diff_2_std}', f'{diff_4_mean} ± {diff_4_std}', f'{diff_6_mean} ± {diff_6_std}', str(round(p, 3))]+pht.tolist()).T
        else:
            row = pd.DataFrame([f'{var1.split("_")[0]}', f'{all_mean} ± {all_std}', f'{diff_2_mean} ± {diff_2_std}', f'{diff_4_mean} ± {diff_4_std}', f'{diff_6_mean} ± {diff_6_std}', str(round(p, 3)),'-','-','-']).T

    else:
        row = pd.DataFrame([f'{var1.split("_")[0]}', f'{all_mean} ± {all_std}', f'{diff_2_mean} ± {diff_2_std}', f'{diff_4_mean} ± {diff_4_std}', f'{diff_6_mean} ± {diff_6_std}', '<0.001']+pht.tolist()).T

    decade_men_comparison = pd.concat([decade_men_comparison, row])

    
    
for i in range(0, len(reference_table_m_dropped_na_rounding.columns)-1, 2):
    var1 = reference_table_m_dropped_na_rounding.columns[i]
    var2 = reference_table_m_dropped_na_rounding.columns[i+1]

    diff_2 = reference_table_m_dropped_na_rounding_2[var2] - reference_table_m_dropped_na_rounding_2[var1]
    diff_4 = reference_table_m_dropped_na_rounding_4[var2] - reference_table_m_dropped_na_rounding_4[var1]
    diff_6 = reference_table_m_dropped_na_rounding_6[var2] - reference_table_m_dropped_na_rounding_6[var1]
    all_diff = reference_table_m_dropped_na_rounding[var2] - reference_table_m_dropped_na_rounding[var1]
    
    indices = (np.array([0, 0, 1]), np.array([1, 2, 2]))
    pht = (scipy.stats.tukey_hsd(diff_2,diff_4, diff_6).pvalue)[indices].round(3)
    pht = np.where(pht == 0, '<0.01',pht)

    diff_2_std = round(diff_2.std(), 1)
    diff_2_mean = round(diff_2.mean(), 1)
    diff_4_std = round(diff_4.std(), 1)
    diff_4_mean = round(diff_4.mean(), 1)
    diff_6_std = round(diff_6.std(), 1)
    diff_6_mean = round(diff_6.mean(), 1)
    all_std = round(all_diff.std(), 1)
    all_mean = round(all_diff.mean(), 1)

    stat, p = scipy.stats.f_oneway(diff_2.dropna(), diff_4.dropna(), diff_6.dropna())

    if p > 0.001:
        if p<=0.05:
            row = pd.DataFrame([f'{var1.split("_")[0]}', f'{all_mean} ± {all_std}', f'{diff_2_mean} ± {diff_2_std}', f'{diff_4_mean} ± {diff_4_std}', f'{diff_6_mean} ± {diff_6_std}', str(round(p, 3))]+pht.tolist()).T
        else:
            row = pd.DataFrame([f'{var1.split("_")[0]}', f'{all_mean} ± {all_std}', f'{diff_2_mean} ± {diff_2_std}', f'{diff_4_mean} ± {diff_4_std}', f'{diff_6_mean} ± {diff_6_std}', str(round(p, 3)),'-','-','-']).T

    else:
        row = pd.DataFrame([f'{var1.split("_")[0]}', f'{all_mean} ± {all_std}', f'{diff_2_mean} ± {diff_2_std}', f'{diff_4_mean} ± {diff_4_std}', f'{diff_6_mean} ± {diff_6_std}', '<0.001']+pht.tolist()).T

    decade_men_comparison = pd.concat([decade_men_comparison, row])

decade_men_comparison.columns = ['Change in ... (exercise-rest)', 'Overall', '20 - 39 years', '40 - 59 years', '60 - 79 years', 'p-value','Post-hoc','Post-hoc','Post-hoc']
decade_men_comparison.set_index('Change in ... (exercise-rest)',inplace=True)
decade_men_comparison=decade_men_comparison.iloc[[7,0,1,2,3,4,5,6,8,9]]
decade_men_comparison.rename(index={**hr_dict,**t_tests_dict},inplace=True)
display(decade_men_comparison)

# Age Group Comparison Women (absolute values)

In [None]:
reference_table_f_dropped_na_2 = reference_table_f_dropped_na[reference_table_f_dropped_na['decade'] == 2].copy()
reference_table_f_dropped_na_rounding_2 = reference_table_f_dropped_na_rounding[reference_table_f_dropped_na_rounding['decade'] == 2].copy()

reference_table_f_dropped_na_4 = reference_table_f_dropped_na[reference_table_f_dropped_na['decade'] == 4].copy()
reference_table_f_dropped_na_rounding_4 = reference_table_f_dropped_na_rounding[reference_table_f_dropped_na_rounding['decade'] == 4].copy()

reference_table_f_dropped_na_6 = reference_table_f_dropped_na[reference_table_f_dropped_na['decade'] == 6].copy()
reference_table_f_dropped_na_rounding_6 = reference_table_f_dropped_na_rounding[reference_table_f_dropped_na_rounding['decade'] == 6].copy()

decade_women_comparison=pd.DataFrame()


for i in reference_table_f_dropped_na.columns[0:-1]:
    dec_2=reference_table_f_dropped_na_2[i]
    dec_4=reference_table_f_dropped_na_4[i]
    dec_6=reference_table_f_dropped_na_6[i]
    all_=reference_table_f_dropped_na[i]
    
    indices = (np.array([0, 0, 1]), np.array([1, 2, 2]))
    pht = (scipy.stats.tukey_hsd(dec_2,dec_4, dec_6).pvalue)[indices].round(3)
    pht = np.where(pht == 0, '<0.01',pht)
    
    dec_2_std=int(round(dec_2.std()))
    dec_2_mean=int(round(dec_2.mean()))
    dec_4_std=int(round(dec_4.std()))
    dec_4_mean=int(round(dec_4.mean()))
    dec_6_std=int(round(dec_6.std()))
    dec_6_mean=int(round(dec_6.mean()))
    all_std=int(round(all_.std()))
    all_mean=int(round(all_.mean()))
    stat,p = scipy.stats.f_oneway(dec_2.dropna(),dec_4.dropna(),dec_6.dropna())
    
    if p>0.001:
        if p<=0.05:
            decade_women_comparison=pd.concat([decade_women_comparison,pd.DataFrame([dec_2.name,f'{all_mean} ± {all_std}',f'{dec_2_mean} ± {dec_2_std}',f'{dec_4_mean} ± {dec_4_std}',f'{dec_6_mean} ± {dec_6_std}',str(round(p,3))]+pht.tolist()).T])
        else:
            decade_women_comparison=pd.concat([decade_women_comparison,pd.DataFrame([dec_2.name,f'{all_mean} ± {all_std}',f'{dec_2_mean} ± {dec_2_std}',f'{dec_4_mean} ± {dec_4_std}',f'{dec_6_mean} ± {dec_6_std}',str(round(p,3)),'-','-','-']).T])

    else:
        decade_women_comparison=pd.concat([decade_women_comparison,pd.DataFrame([dec_2.name,f'{all_mean} ± {all_std}',f'{dec_2_mean} ± {dec_2_std}',f'{dec_4_mean} ± {dec_4_std}',f'{dec_6_mean} ± {dec_6_std}',str(round(p,3))]+pht.tolist()).T])
   
for i in reference_table_f_dropped_na_rounding.columns[0:-1]:
    dec_2=reference_table_f_dropped_na_rounding_2[i]
    dec_4=reference_table_f_dropped_na_rounding_4[i]
    dec_6=reference_table_f_dropped_na_rounding_6[i]
    all_=reference_table_f_dropped_na_rounding[i]
    
    indices = (np.array([0, 0, 1]), np.array([1, 2, 2]))
    pht = (scipy.stats.tukey_hsd(dec_2,dec_4, dec_6).pvalue)[indices].round(3)
    pht = np.where(pht == 0, '<0.01',pht)
    
    dec_2_std=round(dec_2.std(),1)
    dec_2_mean=round(dec_2.mean(),1)
    dec_4_std=round(dec_4.std(),1)
    dec_4_mean=round(dec_4.mean(),1)
    dec_6_std=round(dec_6.std(),1)
    dec_6_mean=round(dec_6.mean(),1)
    all_std=round(all_.std(),1)
    all_mean=round(all_.mean(),1)
    stat,p = scipy.stats.f_oneway(dec_2.dropna(),dec_4.dropna(),dec_6.dropna())
    
    if p>0.001:
        if p<=0.05:
            decade_women_comparison=pd.concat([decade_women_comparison,pd.DataFrame([dec_2.name,f'{all_mean} ± {all_std}',f'{dec_2_mean} ± {dec_2_std}',f'{dec_4_mean} ± {dec_4_std}',f'{dec_6_mean} ± {dec_6_std}',str(round(p,3))]+pht.tolist()).T])
        else:
            decade_women_comparison=pd.concat([decade_women_comparison,pd.DataFrame([dec_2.name,f'{all_mean} ± {all_std}',f'{dec_2_mean} ± {dec_2_std}',f'{dec_4_mean} ± {dec_4_std}',f'{dec_6_mean} ± {dec_6_std}',str(round(p,3)),'-','-','-'])])

    else:
        decade_women_comparison=pd.concat([decade_women_comparison,pd.DataFrame([dec_2.name,f'{all_mean} ± {all_std}',f'{dec_2_mean} ± {dec_2_std}',f'{dec_4_mean} ± {dec_4_std}',f'{dec_6_mean} ± {dec_6_std}',str(round(p,3))]+pht.tolist()).T])

decade_women_comparison.columns = ['Parameters', 'Overall', '20 - 39 years', '40 - 59 years', '60 - 79 years', 'p-value','','Post-hoc testing','']
decade_women_comparison.set_index('Parameters',inplace=True)
decade_women_comparison=decade_women_comparison.T
decade_women_comparison.rename(columns=dict_variables, inplace=True)
decade_women_comparison=decade_women_comparison.T
decade_women_comparison=decade_women_comparison.iloc[[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,16,17,18,19]]
decade_women_comparison.rename(index=hr_dict,inplace=True)
display(decade_women_comparison)

# Age Group Comparison Women (absolute difference)

In [None]:
reference_table_f_dropped_na_2 = reference_table_f_dropped_na[reference_table_f_dropped_na['decade'] == 2].copy()
reference_table_f_dropped_na_rounding_2 = reference_table_f_dropped_na_rounding[reference_table_f_dropped_na_rounding['decade'] == 2].copy()

reference_table_f_dropped_na_4 = reference_table_f_dropped_na[reference_table_f_dropped_na['decade'] == 4].copy()
reference_table_f_dropped_na_rounding_4 = reference_table_f_dropped_na_rounding[reference_table_f_dropped_na_rounding['decade'] == 4].copy()

reference_table_f_dropped_na_6 = reference_table_f_dropped_na[reference_table_f_dropped_na['decade'] == 6].copy()
reference_table_f_dropped_na_rounding_6 = reference_table_f_dropped_na_rounding[reference_table_f_dropped_na_rounding['decade'] == 6].copy()

decade_women_comparison=pd.DataFrame()
for i in range(0, len(reference_table_f_dropped_na.columns)-1, 2):
    var1 = reference_table_f_dropped_na.columns[i]
    var2 = reference_table_f_dropped_na.columns[i+1]

    diff_2 = reference_table_f_dropped_na_2[var2] - reference_table_f_dropped_na_2[var1]
    diff_4 = reference_table_f_dropped_na_4[var2] - reference_table_f_dropped_na_4[var1]
    diff_6 = reference_table_f_dropped_na_6[var2] - reference_table_f_dropped_na_6[var1]
    all_diff = reference_table_f_dropped_na[var2] - reference_table_f_dropped_na[var1]
        
    indices = (np.array([0, 0, 1]), np.array([1, 2, 2]))
    pht = (scipy.stats.tukey_hsd(diff_2,diff_4, diff_6).pvalue)[indices].round(3)
    pht = np.where(pht == 0, '<0.01',pht)

    
    
    diff_2_std = int(round(diff_2.std()))
    diff_2_mean = int(round(diff_2.mean()))
    diff_4_std = int(round(diff_4.std()))
    diff_4_mean = int(round(diff_4.mean()))
    diff_6_std = int(round(diff_6.std()))
    diff_6_mean = int(round(diff_6.mean()))
    all_std = int(round(all_diff.std()))
    all_mean = int(round(all_diff.mean()))

    stat, p = scipy.stats.f_oneway(diff_2.dropna(), diff_4.dropna(), diff_6.dropna())

    if p > 0.001:
        if p<=0.05:
            row = pd.DataFrame([f'{var1.split("_")[0]}', f'{all_mean} ± {all_std}', f'{diff_2_mean} ± {diff_2_std}', f'{diff_4_mean} ± {diff_4_std}', f'{diff_6_mean} ± {diff_6_std}', str(round(p, 3))]+pht.tolist()).T
        else:
            row = pd.DataFrame([f'{var1.split("_")[0]}', f'{all_mean} ± {all_std}', f'{diff_2_mean} ± {diff_2_std}', f'{diff_4_mean} ± {diff_4_std}', f'{diff_6_mean} ± {diff_6_std}', str(round(p, 3)),'-','-','-']).T

    else:
        row = pd.DataFrame([f'{var1.split("_")[0]}', f'{all_mean} ± {all_std}', f'{diff_2_mean} ± {diff_2_std}', f'{diff_4_mean} ± {diff_4_std}', f'{diff_6_mean} ± {diff_6_std}', '<0.001']+pht.tolist()).T

    decade_women_comparison = pd.concat([decade_women_comparison, row])

# Set column names for the final comparison DataFrame

# Assuming variables_different_rounding, reference_table_f_dropped_na_rounding_2, reference_table_f_dropped_na_rounding_4, and reference_table_f_dropped_na_rounding_6 are defined
# Create an empty DataFrame for the final comparison
decade_women_comparison_rounding = pd.DataFrame()

for i in range(0, len(reference_table_f_dropped_na_rounding.columns)-1, 2):
    var1 = reference_table_f_dropped_na_rounding.columns[i]
    var2 = reference_table_f_dropped_na_rounding.columns[i+1]

    diff_2 = reference_table_f_dropped_na_rounding_2[var2] - reference_table_f_dropped_na_rounding_2[var1]
    diff_4 = reference_table_f_dropped_na_rounding_4[var2] - reference_table_f_dropped_na_rounding_4[var1]
    diff_6 = reference_table_f_dropped_na_rounding_6[var2] - reference_table_f_dropped_na_rounding_6[var1]
    all_diff = reference_table_f_dropped_na_rounding[var2] - reference_table_f_dropped_na_rounding[var1]
    
    indices = (np.array([0, 0, 1]), np.array([1, 2, 2]))
    pht = (scipy.stats.tukey_hsd(diff_2,diff_4, diff_6).pvalue)[indices].round(3)
    pht = np.where(pht == 0, '<0.01',pht)

    diff_2_std = round(diff_2.std(), 1)
    diff_2_mean = round(diff_2.mean(), 1)
    diff_4_std = round(diff_4.std(), 1)
    diff_4_mean = round(diff_4.mean(), 1)
    diff_6_std = round(diff_6.std(), 1)
    diff_6_mean = round(diff_6.mean(), 1)
    all_std = round(all_diff.std(), 1)
    all_mean = round(all_diff.mean(), 1)

    stat, p = scipy.stats.f_oneway(diff_2.dropna(), diff_4.dropna(), diff_6.dropna())

    if p > 0.001:
        if p<=0.05:
            row = pd.DataFrame([f'{var1.split("_")[0]}', f'{all_mean} ± {all_std}', f'{diff_2_mean} ± {diff_2_std}', f'{diff_4_mean} ± {diff_4_std}', f'{diff_6_mean} ± {diff_6_std}', str(round(p, 3))]+pht.tolist()).T
        else:
            row = pd.DataFrame([f'{var1.split("_")[0]}', f'{all_mean} ± {all_std}', f'{diff_2_mean} ± {diff_2_std}', f'{diff_4_mean} ± {diff_4_std}', f'{diff_6_mean} ± {diff_6_std}', str(round(p, 3)),'-','-','-']).T

    else:
        row = pd.DataFrame([f'{var1.split("_")[0]}', f'{all_mean} ± {all_std}', f'{diff_2_mean} ± {diff_2_std}', f'{diff_4_mean} ± {diff_4_std}', f'{diff_6_mean} ± {diff_6_std}', '<0.001']+pht.tolist()).T

    decade_women_comparison = pd.concat([decade_women_comparison, row])

# Set column names for the final comparison DataFrame

decade_women_comparison.columns=['Change in ... (exercise-rest)', 'Overall', '20 - 39 years', '40 - 59 years', '60 - 79 years', 'p-value','Post-hoc','Post-hoc','Post-hoc']
decade_women_comparison.set_index('Change in ... (exercise-rest)',inplace=True)
decade_women_comparison=decade_women_comparison.iloc[[7,0,1,2,3,4,5,6,8,9]]
decade_women_comparison.rename(index={**hr_dict,**t_tests_dict},inplace=True)
display(decade_women_comparison)

# Intra- and Inter-observer Variability

In [None]:
intra_obs_2=pd.merge(pd.read_excel('cleaned_folder/intra_observer/exercise/intraoberserver_results.xlsx'),pd.read_excel('cleaned_folder/intra_observer/exercise/inter_intra_observer_IDs.xlsx'), left_on='Patient',right_on='1.5T Name')
intra_obs_2['BRU']
intra_obs_1=pd.read_csv('cleaned_folder/LV_function/GenScanII_sax_cine_RT_exercise.csv')
intra_obs_1=pd.merge(intra_obs_1,intra_obs_2['BRU'],left_on='BRU', right_on='BRU')
intra_obs_1=intra_obs_1[['BRU', 'EDV_(ml)', 'ESV_(ml)', 'SV_(ml)', 'EF_(%)', 'CO_(l/min)',
       'CI_(l/min/m2)','EDV/BSA_(ml/m2)', 'ESV/BSA_(ml/m2)', 'SV/BSA_(ml/m2)']].copy()
intra_obs_1.columns=intra_obs_2[['BRU','EDV','ESV','SV','EF','CO','CI','EDV/BSA','ESV/BSA','SV/BSA']].columns
intra_obs_2=intra_obs_2[['BRU','EDV','ESV','SV','EF','CO','CI','EDV/BSA','ESV/BSA','SV/BSA']].copy()
intra_obs_1['rater']='first'
intra_obs_2['rater']='second'
intra_obs_ex=pd.concat([intra_obs_1,intra_obs_2],axis=0)

intra_class=dict()
for i in intra_obs_ex.columns[1:-1]:
    intra_class[i]=pg.intraclass_corr(intra_obs_ex, targets="BRU", raters='rater', ratings=i).set_index('Type').iloc[4,1]
    
intra_class_table_ex=pd.DataFrame(intra_class.items())
intra_class_table_ex.columns=['Parameters','Intra-Observer ICC']
intra_class_table_ex.set_index('Parameters', inplace=True)


intra_obs_2=pd.merge(pd.read_excel('cleaned_folder/intra_observer/rest/intra_observer_rest.xlsx'),pd.read_excel('cleaned_folder/intra_observer/rest/intra_observer_rest.xlsx',sheet_name=1), left_on='Patient',right_on='1.5T Name')
intra_obs_2['BRU']
intra_obs_1=pd.read_csv('cleaned_folder/LV_function/GenScanII_sax_cine_RT_rest.csv', delimiter=';')
intra_obs_1=pd.merge(intra_obs_1,intra_obs_2['BRU'],left_on='BRU', right_on='BRU')
intra_obs_1=intra_obs_1[['BRU', 'EDV_(ml)', 'ESV_(ml)', 'SV_(ml)', 'EF_(%)', 'CO_(l/min)',
       'CI_(l/min/m2)','EDV/BSA_(ml/m2)', 'ESV/BSA_(ml/m2)', 'SV/BSA_(ml/m2)']].copy()
intra_obs_1.columns=intra_obs_2[['BRU','EDV','ESV','SV','EF','CO','CI','EDV/BSA','ESV/BSA','SV/BSA']].columns
intra_obs_2=intra_obs_2[['BRU','EDV','ESV','SV','EF','CO','CI','EDV/BSA','ESV/BSA','SV/BSA']].copy()
intra_obs_1['rater']='first'
intra_obs_2['rater']='second'
intra_obs_rest=pd.concat([intra_obs_1,intra_obs_2],axis=0)

intra_class=dict()
for i in intra_obs_rest.columns[1:-1]:
    intra_class[i]=pg.intraclass_corr(intra_obs_rest, targets="BRU", raters='rater', ratings=i).set_index('Type').iloc[4,1]
    
intra_class_table_rest=pd.DataFrame(intra_class.items())
intra_class_table_rest.columns=['Parameters','Intra-Observer ICC']
intra_class_table_rest.set_index('Parameters', inplace=True)

intra_inter_obs=(pd.concat([intra_class_table_rest.T.add_suffix(' at rest').T,intra_class_table_ex.T.add_suffix(' during exercise').T]).iloc[[0,9,6,15,1,10,7,16,2,11,8,17,3,12,4,13,5,14]])

intra_inter_obs['Inter-Observer ICC']=''
display(intra_inter_obs.round(2))