# Multiomics BMI Paper — Comparison of Biological BMI and WHtR Models

***by Kengo Watanabe***  

This Jupyter Notebook (with Python 3 kernel) compared the blood omics-based BMI LASSO models with the blood omics-based WHtR LASSO models. Also, this notebook visualized ∆WHtR correlations across omics categories, and assessed differences in ∆WHtR between clinically-defined metabolic health conditions (in the baseline Arivale cohort).  

Input files:  
* Biological BMI models: 220801_Multiomics-BMI-NatMed1stRevision_BMI-baseline-LASSO_\[MetBMI/ProtBMI/ChemBMI/CombiBMI\]-BothSex-LASSObcoefs.tsv  
* Biological WHtR models: 220822_Multiomics-BMI-NatMed1stRevision_WHtR-baseline-LASSO-ver2_\[MetWHtR/ProtWHtR/ChemWHtR/CombiWHtR\]-BothSex-LASSObcoefs.tsv  
* Arivale baseline BMI predictions: 220803_Multiomics-BMI-NatMed1stRevision_DeltaBMI-misclassification_biologicalBMI-baseline-summary-BothSex.tsv  
* Arivale baseline WHtR predictions: 220822_Multiomics-BMI-NatMed1stRevision_WHtR-baseline-LASSO-ver2_\[MetWHtR/ProtWHtR/ChemWHtR/CombiWHtR\]-BothSex.tsv  
* Arivale baseline covariates: 220621_Multiomics-BMI-NatMedRevision_WHtR-DataCleaning_baseline-WHtR-final-cohort.tsv  
* Arivale baseline metabolic health condition: 220720_Multiomics-BMI-NatMedRevision_Misclassification_metabolic-health-summary.tsv  

Output figures and tables:  
* Supplementary Figure 7l, 7m, 8  
* Tables for Supplementary Data 10  

Original notebook (memo for my future tracing):  
* dalek:\[JupyterLab HOME\]/220621_Multiomics-BMI-NatMedRevision/220824_Multiomics-BMI-NatMed1stRevision_bBMI-vs-bWHtR-ver2.ipynb  

In [1]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
#For Arial font
#!conda install -c conda-forge -y mscorefonts
##-> The below was also needed in matplotlib 3.4.2
#import shutil
#import matplotlib
#shutil.rmtree(matplotlib.get_cachedir())
import warnings
warnings.filterwarnings('ignore')
from IPython.display import display
import time

#!pip install venn
from venn import venn
from statsmodels.stats import multitest as multi
from decimal import Decimal, ROUND_HALF_UP
import sys
from sklearn.preprocessing import StandardScaler
import statsmodels.formula.api as smf

!conda list

# packages in environment at /opt/conda/envs/arivale-py3:
#
# Name                    Version                   Build  Channel
_libgcc_mutex             0.1                 conda_forge    conda-forge
_openmp_mutex             4.5                       1_gnu    conda-forge
analytics                 0.1                      pypi_0    pypi
argon2-cffi               21.1.0           py39h3811e60_0    conda-forge
arivale-data-interface    0.1.0                    pypi_0    pypi
async_generator           1.10                       py_0    conda-forge
atk-1.0                   2.36.0               h3371d22_4    conda-forge
attrs                     21.2.0             pyhd8ed1ab_0    conda-forge
backcall                  0.2.0              pyh9f0ad1d_0    conda-forge
backports                 1.0                        py_2    conda-forge
backports.functools_lru_cache 1.6.4              pyhd8ed1ab_0    conda-forge
biopython                 1.79             py39h3811e60_0    conda-forge
bleach 

## 1. Variables between bBMI and bWHtR models

### 1-1. Import beta-coefficients

In [None]:
tempD = {}
for bbmi in ['MetBMI', 'ProtBMI', 'ChemBMI', 'CombiBMI']:
    #Import the LASSO beta-coefficients
    fileDir = './ExportData/'
    ipynbName = '220801_Multiomics-BMI-NatMed1stRevision_BMI-baseline-LASSO_'
    fileName = bbmi+'-BothSex-LASSObcoefs.tsv'
    tempDF = pd.read_csv(fileDir+ipynbName+fileName, sep='\t').set_index('Variable')
    tempDF = tempDF.drop(index=['Intercept'])
    tempD[bbmi] = tempDF
    
    #Check
    print(bbmi+':')
    print(' - Variables:', len(tempDF))
    #Variables with non-zero beta-coefficient
    tempDF1 = tempDF.loc[tempDF['nZeros']!=10]
    print(' - Variables with non-zero beta-coefficient:', len(tempDF1),
          '(', len(tempDF1)/len(tempDF)*100, '%)')
    #Extract robust beta-coefficient: no zeros in all 10 models
    tempDF1 = tempDF.loc[tempDF['nZeros']==0]
    tempDF1 = tempDF1.sort_values(by='Mean', ascending=False)
    print(' - Variables with non-zero beta-coefficient in all 10 models:', len(tempDF1),
          '(', len(tempDF1)/len(tempDF)*100, '%)')
    print('')

metBMI_B_bcoefs = tempD['MetBMI']
protBMI_B_bcoefs = tempD['ProtBMI']
chemBMI_B_bcoefs = tempD['ChemBMI']
combiBMI_B_bcoefs = tempD['CombiBMI']

In [None]:
tempD = {}
for bwhtr in ['MetWHtR', 'ProtWHtR', 'ChemWHtR', 'CombiWHtR']:
    #Import the LASSO beta-coefficients
    fileDir = './ExportData/'
    ipynbName = '220822_Multiomics-BMI-NatMed1stRevision_WHtR-baseline-LASSO-ver2_'
    fileName = bwhtr+'-BothSex-LASSObcoefs.tsv'
    tempDF = pd.read_csv(fileDir+ipynbName+fileName, sep='\t').set_index('Variable')
    tempDF = tempDF.drop(index=['Intercept'])
    tempD[bwhtr] = tempDF
    
    #Check
    print(bwhtr+':')
    print(' - Variables:', len(tempDF))
    tempDF1 = tempDF.loc[tempDF['nZeros']!=10]
    print(' - Variables with non-zero beta-coefficient:', len(tempDF1),
          '(', len(tempDF1)/len(tempDF)*100, '%)')
    tempDF1 = tempDF.loc[tempDF['nZeros']==0]
    print(' - Variables with non-zero beta-coefficient in all 10 models:', len(tempDF1),
          '(', len(tempDF1)/len(tempDF)*100, '%)')
    tempDF1 = tempDF1.sort_values(by='Mean', ascending=False)
    print('')

metWHtR_B_bcoefs = tempD['MetWHtR']
protWHtR_B_bcoefs = tempD['ProtWHtR']
chemWHtR_B_bcoefs = tempD['ChemWHtR']
combiWHtR_B_bcoefs = tempD['CombiWHtR']

### 1-2. Metabolites

In [None]:
#Prepare target analytes
tempDF1 = combiBMI_B_bcoefs.loc[metBMI_B_bcoefs.index.tolist()]
tempDF2 = combiWHtR_B_bcoefs.loc[metWHtR_B_bcoefs.index.tolist()]
tempD1 = {'CombiBMI':tempDF1, 'MetBMI':metBMI_B_bcoefs,
          'MetWHtR':metWHtR_B_bcoefs, 'CombiWHtR':tempDF2}
tempD2 = {'CombiBMI':'tab:blue', 'MetBMI':'tab:orange',
          'MetWHtR':'tab:green', 'CombiWHtR':'tab:red'}
analyte = 'metabolites'
title = 'Robustly retained metabolites'

#Prepare module sets
print('Variables with non-zero beta-coefficient in all 10 models')
tempD = {}
for measure in tempD1.keys():
    tempDF = tempD1[measure]
    #Extract robust beta-coefficient: no zeros in all 10 models
    tempDF1 = tempDF.loc[tempDF['nZeros']==0]
    print(measure+':', len(tempDF1), 'per', len(tempDF), analyte+' (',
          len(tempDF1)/len(tempDF)*100, '%)')
    tempD[measure] = set(tempDF1.index.tolist())

#Check common region (1,1,1,1)
tempS = list(tempD.values())[0]#Initialize
for measure in tempD.keys():
    tempS = tempS & tempD[measure]
print(' -> Common (1,1,1,1):', len(tempS))
display(tempS)

#Venn diagram
sns.set(style='ticks', font='Arial', context='talk')
fig, ax = plt.subplots(figsize=(4, 4))
venn(tempD, fmt='{size:,}', cmap=list(tempD2.values()), legend_loc=None, ax=ax)
plt.setp(ax, ylim=(0.1, 0.875))#Otherwise, weird space...
##Add legend annotation
x_coord = [0.1, 0.1, 0.9, 0.9]
y_coord = [0.25, 0.7, 0.7, 0.25]
h_align = ['right', 'right', 'left', 'left']
v_align = ['top', 'bottom', 'bottom', 'top']
for i in range(len(tempD1)):
    key = list(tempD2.keys())[i]
    total = f'{len(tempD[key]):,}'
    ax.text(x_coord[i], y_coord[i], key+'\n('+total+' '+analyte+')',
            fontsize='small', multialignment='center',
            horizontalalignment=h_align[i], verticalalignment=v_align[i],
            bbox={'boxstyle':'round', 'facecolor':tempD2[key], 'pad':0.2, 'alpha':0.5})
ax.set_title(title, fontsize='medium')
##Save
fileDir = './ExportFigures/'
ipynbName = '220824_Multiomics-BMI-NatMed1stRevision_bBMI-vs-bWHtR-ver2_'
fileName = 'venn-'+analyte+'.tif'
plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04,
                  pil_kwargs={'compression':'tiff_lzw'})
plt.show()

### 1-3. Proteins

In [None]:
#Prepare target analytes
tempDF1 = combiBMI_B_bcoefs.loc[protBMI_B_bcoefs.index.tolist()]
tempDF2 = combiWHtR_B_bcoefs.loc[protWHtR_B_bcoefs.index.tolist()]
tempD1 = {'CombiBMI':tempDF1, 'ProtBMI':protBMI_B_bcoefs,
          'ProtWHtR':protWHtR_B_bcoefs, 'CombiWHtR':tempDF2}
tempD2 = {'CombiBMI':'tab:blue', 'ProtBMI':'tab:orange',
          'ProtWHtR':'tab:green', 'CombiWHtR':'tab:red'}
analyte = 'proteins'
title = 'Robustly retained proteins'

#Prepare module sets
print('Variables with non-zero beta-coefficient in all 10 models')
tempD = {}
for measure in tempD1.keys():
    tempDF = tempD1[measure]
    #Extract robust beta-coefficient: no zeros in all 10 models
    tempDF1 = tempDF.loc[tempDF['nZeros']==0]
    print(measure+':', len(tempDF1), 'per', len(tempDF), analyte+' (',
          len(tempDF1)/len(tempDF)*100, '%)')
    tempD[measure] = set(tempDF1.index.tolist())

#Check common region (1,1,1,1)
tempS = list(tempD.values())[0]#Initialize
for measure in tempD.keys():
    tempS = tempS & tempD[measure]
print(' -> Common (1,1,1,1):', len(tempS))
display(tempS)

#Venn diagram
sns.set(style='ticks', font='Arial', context='talk')
fig, ax = plt.subplots(figsize=(4, 4))
venn(tempD, fmt='{size:,}', cmap=list(tempD2.values()), legend_loc=None, ax=ax)
plt.setp(ax, ylim=(0.1, 0.875))#Otherwise, weird space...
##Add legend annotation
x_coord = [0.1, 0.1, 0.9, 0.9]
y_coord = [0.25, 0.7, 0.7, 0.25]
h_align = ['right', 'right', 'left', 'left']
v_align = ['top', 'bottom', 'bottom', 'top']
for i in range(len(tempD1)):
    key = list(tempD2.keys())[i]
    total = f'{len(tempD[key]):,}'
    ax.text(x_coord[i], y_coord[i], key+'\n('+total+' '+analyte+')',
            fontsize='small', multialignment='center',
            horizontalalignment=h_align[i], verticalalignment=v_align[i],
            bbox={'boxstyle':'round', 'facecolor':tempD2[key], 'pad':0.2, 'alpha':0.5})
ax.set_title(title, fontsize='medium')
##Save
fileDir = './ExportFigures/'
ipynbName = '220824_Multiomics-BMI-NatMed1stRevision_bBMI-vs-bWHtR-ver2_'
fileName = 'venn-'+analyte+'.tif'
plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04,
                  pil_kwargs={'compression':'tiff_lzw'})
plt.show()

### 1-4. Clinical lab tests

In [None]:
#Prepare target analytes
tempDF1 = combiBMI_B_bcoefs.loc[chemBMI_B_bcoefs.index.tolist()]
tempDF2 = combiWHtR_B_bcoefs.loc[chemWHtR_B_bcoefs.index.tolist()]
tempD1 = {'CombiBMI':tempDF1, 'ChemBMI':chemBMI_B_bcoefs,
          'ChemWHtR':chemWHtR_B_bcoefs, 'CombiWHtR':tempDF2}
tempD2 = {'CombiBMI':'tab:blue', 'ChemBMI':'tab:orange',
          'ChemWHtR':'tab:green', 'CombiWHtR':'tab:red'}
analyte = 'tests'
title = 'Robustly retained clinical lab tests'

#Prepare module sets
print('Variables with non-zero beta-coefficient in all 10 models')
tempD = {}
for measure in tempD1.keys():
    tempDF = tempD1[measure]
    #Extract robust beta-coefficient: no zeros in all 10 models
    tempDF1 = tempDF.loc[tempDF['nZeros']==0]
    print(measure+':', len(tempDF1), 'per', len(tempDF), analyte+' (',
          len(tempDF1)/len(tempDF)*100, '%)')
    tempD[measure] = set(tempDF1.index.tolist())

#Check common region (1,1,1,1)
tempS = list(tempD.values())[0]#Initialize
for measure in tempD.keys():
    tempS = tempS & tempD[measure]
print(' -> Common (1,1,1,1):', len(tempS))
display(tempS)

#Venn diagram
sns.set(style='ticks', font='Arial', context='talk')
fig, ax = plt.subplots(figsize=(4, 4))
venn(tempD, fmt='{size:,}', cmap=list(tempD2.values()), legend_loc=None, ax=ax)
plt.setp(ax, ylim=(0.1, 0.875))#Otherwise, weird space...
##Add legend annotation
x_coord = [0.1, 0.1, 0.9, 0.9]
y_coord = [0.25, 0.7, 0.7, 0.25]
h_align = ['right', 'right', 'left', 'left']
v_align = ['top', 'bottom', 'bottom', 'top']
for i in range(len(tempD1)):
    key = list(tempD2.keys())[i]
    total = f'{len(tempD[key]):,}'
    ax.text(x_coord[i], y_coord[i], key+'\n('+total+' '+analyte+')',
            fontsize='small', multialignment='center',
            horizontalalignment=h_align[i], verticalalignment=v_align[i],
            bbox={'boxstyle':'round', 'facecolor':tempD2[key], 'pad':0.2, 'alpha':0.5})
ax.set_title(title, fontsize='medium')
##Save
fileDir = './ExportFigures/'
ipynbName = '220824_Multiomics-BMI-NatMed1stRevision_bBMI-vs-bWHtR-ver2_'
fileName = 'venn-'+analyte+'.tif'
plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04,
                  pil_kwargs={'compression':'tiff_lzw'})
plt.show()

### 1-5. Correlation of beta-coefficients

In [None]:
#All variables
tempD1 = {'BMI':metBMI_B_bcoefs, 'WHtR':metWHtR_B_bcoefs}
tempD2 = {'BMI':protBMI_B_bcoefs, 'WHtR':protWHtR_B_bcoefs}
tempD3 = {'BMI':chemBMI_B_bcoefs, 'WHtR':chemWHtR_B_bcoefs}
tempD4 = {'BMI':combiBMI_B_bcoefs, 'WHtR':combiWHtR_B_bcoefs}
tempD1 = {'Met':tempD1, 'Prot':tempD2, 'Chem':tempD3, 'Combi':tempD4}
tempD2 = {'Met':'Metabolomics', 'Prot':'Proteomics', 'Chem':'Clinical labs', 'Combi':'Combined omics'}
tempD3 = {'Metabolomics':'b', 'Proteomics':'r', 'Clinical labs':'g', 'Combined omics':'m'}
xvar = 'WHtR'
xvar_unit = '[log-scaled WHtR per s.d.]'
yvar = 'BMI'
yvar_unit = '[log-scaled BMI (kg m'+r'$^{-2}$'+') per s.d.]'

#Prepare DF
tempD = {}
for category in tempD1.keys():
    tempD4 = tempD1[category]
    tempDF1 = tempD4[xvar]
    tempS1 = tempDF1['Mean']
    tempS1.name = 'b'+xvar+'_Bcoef'
    tempDF2 = tempD4[yvar]
    tempS2 = tempDF2['Mean']
    tempS2.name = 'b'+yvar+'_Bcoef'
    tempDF = pd.merge(tempS1, tempS2, left_index=True, right_index=True, how='inner')
    tempDF['Category'] = tempD2[category]
    
    tempD[category] = tempDF
    
    print(tempD2[category])
    display(tempDF.describe(include='all'))
tempDF = pd.concat(list(tempD.values()), axis=0)

#Statistical tests
tempDF1 = pd.DataFrame(columns=['N', 'DoF', 'Pearson_r', 'Pval'])
for category in tempD3.keys():
    tempDF2 = tempDF.loc[tempDF['Category']==category]
    #Pearson's correlation
    pearson_r, pval = stats.pearsonr(tempDF2['b'+xvar+'_Bcoef'], tempDF2['b'+yvar+'_Bcoef'])
    size = len(tempDF2)
    dof = size - 2
    tempDF1.loc[category] = [size, dof, pearson_r, pval]
##P-value adjustment by using Benjamini–Hochberg method
tempDF1['AdjPval'] = multi.multipletests(tempDF1['Pval'], alpha=0.05, method='fdr_bh',
                                         is_sorted=False, returnsorted=False)[1]
tempDF1.index.rename('Category', inplace=True)
tempDF1['N'] = tempDF1['N'].astype('int64')#Otherwise, float64!
tempDF1['DoF'] = tempDF1['DoF'].astype('int64')#Otherwise, float64!
display(tempDF1)
##Save
fileDir = './ExportData/'
ipynbName = '220824_Multiomics-BMI-NatMed1stRevision_bBMI-vs-bWHtR-ver2_'
fileName = 'bcoef-difference_all.tsv'
tempDF1.to_csv(fileDir+ipynbName+fileName, sep='\t', index=True)

#Plot
sns.set(style='ticks', font='Arial', context='talk')
fig, axes = plt.subplots(nrows=2, ncols=2,
                         figsize=(3.5*2, 3.5*2-0.2), sharex=True, sharey=True)
axis_xmin = -0.0425
axis_xmax = 0.041
xmin = -0.04
xmax = 0.04
xinter = 0.02
axis_ymin = -0.0425
axis_ymax = 0.050
ymin = -0.04
ymax = 0.04
yinter = 0.02
#Set axis range first; otherwise, regression line can be truncated differently
plt.setp(axes, xlim=(axis_xmin, axis_xmax), xticks=np.arange(xmin, xmax+xinter/10, xinter))
plt.setp(axes, ylim=(axis_ymin, axis_ymax), yticks=np.arange(ymin, ymax+yinter/10, yinter))
for ax_i, ax in enumerate(axes.flat):
    category = list(tempD3.keys())[ax_i]
    #Prepare DF
    tempDF2 = tempDF.loc[tempDF['Category']==category]
    #Scatterplot with regression line
    sns.regplot(data=tempDF2, x='b'+xvar+'_Bcoef', y='b'+yvar+'_Bcoef', color=tempD3[category],
                scatter=True, fit_reg=True, ci=95, truncate=False, marker='o',
                scatter_kws={'alpha':0.2, 'edgecolor':'k', 's':25}, ax=ax)
    #Draw Y=X as reference
    #ax.plot([axis_xymin, axis_xymax], [axis_xymin, axis_xymax],
    #        color='black', linestyle=(0, (1, 2)), zorder=0)
    #Draw Y=X=0 as reference
    ax.axvline(x=0, color='black', linestyle=(0, (1, 2)), zorder=0)
    ax.axhline(y=0, color='black', linestyle=(0, (1, 2)), zorder=0)
    #Annotate Pearson's correlation
    pearson_r = tempDF1['Pearson_r'].loc[category]
    r_text = str(Decimal(str(pearson_r)).quantize(Decimal('0.001'), rounding=ROUND_HALF_UP))
    pval = tempDF1['AdjPval'].loc[category]
    below_limit = 0#Initialize
    if pval==1.0:
        pval_text = '1.0'
    else:
        if pval==0.0:#Due to smaller than the float minimum
            pval = sys.float_info.min
            print('P-value was smaller than the float minimum:', pval)
            below_limit = 1
        pval_text = f'{Decimal(str(pval)):.3E}'#Take more digits because rounding is bad here
        significand, exponent = pval_text.split(sep='E-')
        significand = str(Decimal(significand).quantize(Decimal('0.1'), rounding=ROUND_HALF_UP))
        if significand=='10.0':
            significand = '1.0'
            exponent = str(int(exponent)-1)
        if int(exponent)>2:
            pval_text = significand+r'$\times$'+'10'+r'$^{{-{0}}}$'.format(exponent)##Font is different in r'$ $'...
        elif int(exponent)>0:
            pval_text = '0.'+'0'*(int(exponent)-1)+significand.replace('.', '')
        else:
            pval_text = significand
    if below_limit==1:
        text = 'Pearson\'s '+r'$r$'+' = '+r_text+'\n'+r'$P$'+' < '+pval_text
    else:
        text = 'Pearson\'s '+r'$r$'+' = '+r_text+'\n'+r'$P$'+' = '+pval_text
    ax.annotate(text, xy=(0.05, 0.95), xycoords='axes fraction',
                horizontalalignment='left', verticalalignment='top',
                multialignment='left', fontsize='small', color='k')
    #Facet label
    ax.set_title(category, {'fontsize':'large'})
sns.despine()
#Reset and generate common axis title
plt.setp(axes, xlabel='', ylabel='')
fig.tight_layout(pad=0.75)
fig.text(x=0.54, y=0.02,#Manual adjustment
         s='Mean of '+r'$\beta$'+'-coefficients in '+xvar+' models\n'+xvar_unit,
         fontsize='medium', verticalalignment='top', horizontalalignment='center')
fig.text(x=0.0225, y=0.515,#Manual adjustment
         s='Mean of '+r'$\beta$'+'-coefficients in '+yvar+' models\n'+yvar_unit,
         fontsize='medium', multialignment='center',
         verticalalignment='center', horizontalalignment='right', rotation='vertical')
##Save
fileDir = './ExportFigures/'
ipynbName = '220824_Multiomics-BMI-NatMed1stRevision_bBMI-vs-bWHtR-ver2_'
fileName = 'bcoef-difference_all.tif'
plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04,
                  pil_kwargs={'compression':'tiff_lzw'})
plt.show()

In [None]:
#Variables with non-zero beta-coefficients
tempD1 = {'BMI':metBMI_B_bcoefs, 'WHtR':metWHtR_B_bcoefs}
tempD2 = {'BMI':protBMI_B_bcoefs, 'WHtR':protWHtR_B_bcoefs}
tempD3 = {'BMI':chemBMI_B_bcoefs, 'WHtR':chemWHtR_B_bcoefs}
tempD4 = {'BMI':combiBMI_B_bcoefs, 'WHtR':combiWHtR_B_bcoefs}
tempD1 = {'Met':tempD1, 'Prot':tempD2, 'Chem':tempD3, 'Combi':tempD4}
tempD2 = {'Met':'Metabolomics', 'Prot':'Proteomics', 'Chem':'Clinical labs', 'Combi':'Combined omics'}
tempD3 = {'Metabolomics':'b', 'Proteomics':'r', 'Clinical labs':'g', 'Combined omics':'m'}
xvar = 'WHtR'
xvar_unit = '[log-scaled WHtR per s.d.]'
yvar = 'BMI'
yvar_unit = '[log-scaled BMI (kg m'+r'$^{-2}$'+') per s.d.]'

#Prepare DF
tempD = {}
for category in tempD1.keys():
    tempD4 = tempD1[category]
    tempDF1 = tempD4[xvar]
    tempS1 = tempDF1['Mean']
    tempS1.name = 'b'+xvar+'_Bcoef'
    tempDF2 = tempD4[yvar]
    tempS2 = tempDF2['Mean']
    tempS2.name = 'b'+yvar+'_Bcoef'
    #Variables with non-zero beta-coefficients in bBMI or bWHtR
    tempDF1 = tempDF1.loc[tempDF1['nZeros']!=10]
    tempDF2 = tempDF2.loc[tempDF2['nZeros']!=10]
    tempS = set(tempDF1.index.tolist()) | set(tempDF2.index.tolist())
    tempDF = pd.merge(tempS1.loc[tempS], tempS2.loc[tempS],
                      left_index=True, right_index=True, how='inner')
    tempDF['Category'] = tempD2[category]
    
    tempD[category] = tempDF
    
    print(tempD2[category])
    display(tempDF.describe(include='all'))
tempDF = pd.concat(list(tempD.values()), axis=0)

#Statistical tests
tempDF1 = pd.DataFrame(columns=['N', 'DoF', 'Pearson_r', 'Pval'])
for category in tempD3.keys():
    tempDF2 = tempDF.loc[tempDF['Category']==category]
    #Pearson's correlation
    pearson_r, pval = stats.pearsonr(tempDF2['b'+xvar+'_Bcoef'], tempDF2['b'+yvar+'_Bcoef'])
    size = len(tempDF2)
    dof = size - 2
    tempDF1.loc[category] = [size, dof, pearson_r, pval]
##P-value adjustment by using Benjamini–Hochberg method
tempDF1['AdjPval'] = multi.multipletests(tempDF1['Pval'], alpha=0.05, method='fdr_bh',
                                         is_sorted=False, returnsorted=False)[1]
tempDF1.index.rename('Category', inplace=True)
tempDF1['N'] = tempDF1['N'].astype('int64')#Otherwise, float64!
tempDF1['DoF'] = tempDF1['DoF'].astype('int64')#Otherwise, float64!
display(tempDF1)
##Save
fileDir = './ExportData/'
ipynbName = '220824_Multiomics-BMI-NatMed1stRevision_bBMI-vs-bWHtR-ver2_'
fileName = 'bcoef-difference_non-zero-in-any.tsv'
tempDF1.to_csv(fileDir+ipynbName+fileName, sep='\t', index=True)

#Plot
sns.set(style='ticks', font='Arial', context='talk')
fig, axes = plt.subplots(nrows=2, ncols=2,
                         figsize=(3.5*2, 3.5*2-0.2), sharex=True, sharey=True)
axis_xmin = -0.0425
axis_xmax = 0.041
xmin = -0.04
xmax = 0.04
xinter = 0.02
axis_ymin = -0.0425
axis_ymax = 0.050
ymin = -0.04
ymax = 0.04
yinter = 0.02
#Set axis range first; otherwise, regression line can be truncated differently
plt.setp(axes, xlim=(axis_xmin, axis_xmax), xticks=np.arange(xmin, xmax+xinter/10, xinter))
plt.setp(axes, ylim=(axis_ymin, axis_ymax), yticks=np.arange(ymin, ymax+yinter/10, yinter))
for ax_i, ax in enumerate(axes.flat):
    category = list(tempD3.keys())[ax_i]
    #Prepare DF
    tempDF2 = tempDF.loc[tempDF['Category']==category]
    #Scatterplot with regression line
    sns.regplot(data=tempDF2, x='b'+xvar+'_Bcoef', y='b'+yvar+'_Bcoef', color=tempD3[category],
                scatter=True, fit_reg=True, ci=95, truncate=False, marker='o',
                scatter_kws={'alpha':0.2, 'edgecolor':'k', 's':25}, ax=ax)
    #Draw Y=X as reference
    #ax.plot([axis_xymin, axis_xymax], [axis_xymin, axis_xymax],
    #        color='black', linestyle=(0, (1, 2)), zorder=0)
    #Draw Y=X=0 as reference
    ax.axvline(x=0, color='black', linestyle=(0, (1, 2)), zorder=0)
    ax.axhline(y=0, color='black', linestyle=(0, (1, 2)), zorder=0)
    #Annotate Pearson's correlation
    pearson_r = tempDF1['Pearson_r'].loc[category]
    r_text = str(Decimal(str(pearson_r)).quantize(Decimal('0.001'), rounding=ROUND_HALF_UP))
    pval = tempDF1['AdjPval'].loc[category]
    below_limit = 0#Initialize
    if pval==1.0:
        pval_text = '1.0'
    else:
        if pval==0.0:#Due to smaller than the float minimum
            pval = sys.float_info.min
            print('P-value was smaller than the float minimum:', pval)
            below_limit = 1
        pval_text = f'{Decimal(str(pval)):.3E}'#Take more digits because rounding is bad here
        significand, exponent = pval_text.split(sep='E-')
        significand = str(Decimal(significand).quantize(Decimal('0.1'), rounding=ROUND_HALF_UP))
        if significand=='10.0':
            significand = '1.0'
            exponent = str(int(exponent)-1)
        if int(exponent)>2:
            pval_text = significand+r'$\times$'+'10'+r'$^{{-{0}}}$'.format(exponent)##Font is different in r'$ $'...
        elif int(exponent)>0:
            pval_text = '0.'+'0'*(int(exponent)-1)+significand.replace('.', '')
        else:
            pval_text = significand
    if below_limit==1:
        text = 'Pearson\'s '+r'$r$'+' = '+r_text+'\n'+r'$P$'+' < '+pval_text
    else:
        text = 'Pearson\'s '+r'$r$'+' = '+r_text+'\n'+r'$P$'+' = '+pval_text
    ax.annotate(text, xy=(0.05, 0.95), xycoords='axes fraction',
                horizontalalignment='left', verticalalignment='top',
                multialignment='left', fontsize='small', color='k')
    #Facet label
    ax.set_title(category, {'fontsize':'large'})
sns.despine()
#Reset and generate common axis title
plt.setp(axes, xlabel='', ylabel='')
fig.tight_layout(pad=0.75)
fig.text(x=0.54, y=0.02,#Manual adjustment
         s='Mean of '+r'$\beta$'+'-coefficients in '+xvar+' models\n'+xvar_unit,
         fontsize='medium', verticalalignment='top', horizontalalignment='center')
fig.text(x=0.0225, y=0.515,#Manual adjustment
         s='Mean of '+r'$\beta$'+'-coefficients in '+yvar+' models\n'+yvar_unit,
         fontsize='medium', multialignment='center',
         verticalalignment='center', horizontalalignment='right', rotation='vertical')
##Save
fileDir = './ExportFigures/'
ipynbName = '220824_Multiomics-BMI-NatMed1stRevision_bBMI-vs-bWHtR-ver2_'
fileName = 'bcoef-difference_non-zero-in-any.tif'
plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04,
                  pil_kwargs={'compression':'tiff_lzw'})
plt.show()

In [None]:
#Variables with non-zero beta-coefficient in all 10 models
tempD1 = {'BMI':metBMI_B_bcoefs, 'WHtR':metWHtR_B_bcoefs}
tempD2 = {'BMI':protBMI_B_bcoefs, 'WHtR':protWHtR_B_bcoefs}
tempD3 = {'BMI':chemBMI_B_bcoefs, 'WHtR':chemWHtR_B_bcoefs}
tempD4 = {'BMI':combiBMI_B_bcoefs, 'WHtR':combiWHtR_B_bcoefs}
tempD1 = {'Met':tempD1, 'Prot':tempD2, 'Chem':tempD3, 'Combi':tempD4}
tempD2 = {'Met':'Metabolomics', 'Prot':'Proteomics', 'Chem':'Clinical labs', 'Combi':'Combined omics'}
tempD3 = {'Metabolomics':'b', 'Proteomics':'r', 'Clinical labs':'g', 'Combined omics':'m'}
xvar = 'WHtR'
xvar_unit = '[log-scaled WHtR per s.d.]'
yvar = 'BMI'
yvar_unit = '[log-scaled BMI (kg m'+r'$^{-2}$'+') per s.d.]'

#Prepare DF
tempD = {}
for category in tempD1.keys():
    tempD4 = tempD1[category]
    tempDF1 = tempD4[xvar]
    tempS1 = tempDF1['Mean']
    tempS1.name = 'b'+xvar+'_Bcoef'
    tempDF2 = tempD4[yvar]
    tempS2 = tempDF2['Mean']
    tempS2.name = 'b'+yvar+'_Bcoef'
    #Variables with non-zero beta-coefficients in bBMI or bWHtR
    tempDF1 = tempDF1.loc[tempDF1['nZeros']==0]
    tempDF2 = tempDF2.loc[tempDF2['nZeros']==0]
    tempS = set(tempDF1.index.tolist()) | set(tempDF2.index.tolist())
    tempDF = pd.merge(tempS1.loc[tempS], tempS2.loc[tempS],
                      left_index=True, right_index=True, how='inner')
    tempDF['Category'] = tempD2[category]
    
    tempD[category] = tempDF
    
    print(tempD2[category])
    display(tempDF.describe(include='all'))
tempDF = pd.concat(list(tempD.values()), axis=0)

#Statistical tests
tempDF1 = pd.DataFrame(columns=['N', 'DoF', 'Pearson_r', 'Pval'])
for category in tempD3.keys():
    tempDF2 = tempDF.loc[tempDF['Category']==category]
    #Pearson's correlation
    pearson_r, pval = stats.pearsonr(tempDF2['b'+xvar+'_Bcoef'], tempDF2['b'+yvar+'_Bcoef'])
    size = len(tempDF2)
    dof = size - 2
    tempDF1.loc[category] = [size, dof, pearson_r, pval]
##P-value adjustment by using Benjamini–Hochberg method
tempDF1['AdjPval'] = multi.multipletests(tempDF1['Pval'], alpha=0.05, method='fdr_bh',
                                         is_sorted=False, returnsorted=False)[1]
tempDF1.index.rename('Category', inplace=True)
tempDF1['N'] = tempDF1['N'].astype('int64')#Otherwise, float64!
tempDF1['DoF'] = tempDF1['DoF'].astype('int64')#Otherwise, float64!
display(tempDF1)
##Save
fileDir = './ExportData/'
ipynbName = '220824_Multiomics-BMI-NatMed1stRevision_bBMI-vs-bWHtR-ver2_'
fileName = 'bcoef-difference_non-zero-in-all.tsv'
tempDF1.to_csv(fileDir+ipynbName+fileName, sep='\t', index=True)

#Plot
sns.set(style='ticks', font='Arial', context='talk')
fig, axes = plt.subplots(nrows=2, ncols=2,
                         figsize=(3.5*2, 3.5*2-0.2), sharex=True, sharey=True)
axis_xmin = -0.0425
axis_xmax = 0.041
xmin = -0.04
xmax = 0.04
xinter = 0.02
axis_ymin = -0.0425
axis_ymax = 0.050
ymin = -0.04
ymax = 0.04
yinter = 0.02
#Set axis range first; otherwise, regression line can be truncated differently
plt.setp(axes, xlim=(axis_xmin, axis_xmax), xticks=np.arange(xmin, xmax+xinter/10, xinter))
plt.setp(axes, ylim=(axis_ymin, axis_ymax), yticks=np.arange(ymin, ymax+yinter/10, yinter))
for ax_i, ax in enumerate(axes.flat):
    category = list(tempD3.keys())[ax_i]
    #Prepare DF
    tempDF2 = tempDF.loc[tempDF['Category']==category]
    #Scatterplot with regression line
    sns.regplot(data=tempDF2, x='b'+xvar+'_Bcoef', y='b'+yvar+'_Bcoef', color=tempD3[category],
                scatter=True, fit_reg=True, ci=95, truncate=False, marker='o',
                scatter_kws={'alpha':0.2, 'edgecolor':'k', 's':25}, ax=ax)
    #Draw Y=X as reference
    #ax.plot([axis_xymin, axis_xymax], [axis_xymin, axis_xymax],
    #        color='black', linestyle=(0, (1, 2)), zorder=0)
    #Draw Y=X=0 as reference
    ax.axvline(x=0, color='black', linestyle=(0, (1, 2)), zorder=0)
    ax.axhline(y=0, color='black', linestyle=(0, (1, 2)), zorder=0)
    #Annotate Pearson's correlation
    pearson_r = tempDF1['Pearson_r'].loc[category]
    r_text = str(Decimal(str(pearson_r)).quantize(Decimal('0.001'), rounding=ROUND_HALF_UP))
    pval = tempDF1['AdjPval'].loc[category]
    below_limit = 0#Initialize
    if pval==1.0:
        pval_text = '1.0'
    else:
        if pval==0.0:#Due to smaller than the float minimum
            pval = sys.float_info.min
            print('P-value was smaller than the float minimum:', pval)
            below_limit = 1
        pval_text = f'{Decimal(str(pval)):.3E}'#Take more digits because rounding is bad here
        significand, exponent = pval_text.split(sep='E-')
        significand = str(Decimal(significand).quantize(Decimal('0.1'), rounding=ROUND_HALF_UP))
        if significand=='10.0':
            significand = '1.0'
            exponent = str(int(exponent)-1)
        if int(exponent)>2:
            pval_text = significand+r'$\times$'+'10'+r'$^{{-{0}}}$'.format(exponent)##Font is different in r'$ $'...
        elif int(exponent)>0:
            pval_text = '0.'+'0'*(int(exponent)-1)+significand.replace('.', '')
        else:
            pval_text = significand
    if below_limit==1:
        text = 'Pearson\'s '+r'$r$'+' = '+r_text+'\n'+r'$P$'+' < '+pval_text
    else:
        text = 'Pearson\'s '+r'$r$'+' = '+r_text+'\n'+r'$P$'+' = '+pval_text
    ax.annotate(text, xy=(0.05, 0.95), xycoords='axes fraction',
                horizontalalignment='left', verticalalignment='top',
                multialignment='left', fontsize='small', color='k')
    #Facet label
    ax.set_title(category, {'fontsize':'large'})
sns.despine()
#Reset and generate common axis title
plt.setp(axes, xlabel='', ylabel='')
fig.tight_layout(pad=0.75)
fig.text(x=0.54, y=0.02,#Manual adjustment
         s='Mean of '+r'$\beta$'+'-coefficients in '+xvar+' models\n'+xvar_unit,
         fontsize='medium', verticalalignment='top', horizontalalignment='center')
fig.text(x=0.0225, y=0.515,#Manual adjustment
         s='Mean of '+r'$\beta$'+'-coefficients in '+yvar+' models\n'+yvar_unit,
         fontsize='medium', multialignment='center',
         verticalalignment='center', horizontalalignment='right', rotation='vertical')
##Save
fileDir = './ExportFigures/'
ipynbName = '220824_Multiomics-BMI-NatMed1stRevision_bBMI-vs-bWHtR-ver2_'
fileName = 'bcoef-difference_non-zero-in-all.tif'
plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04,
                  pil_kwargs={'compression':'tiff_lzw'})
plt.show()

## 2. Difference between measured and predicted values

### 2-1. Clean the biological WHtR dataframes

In [None]:
#Import and merge WHtR and biological WHtR
fileDir = './ExportData/'
ipynbName = '220822_Multiomics-BMI-NatMed1stRevision_WHtR-baseline-LASSO-ver2_'
tempL1 = ['log_BaseWHtR', 'BaseWHtR', 'Testing']
tempL2 = ['MetWHtR', 'ProtWHtR', 'ChemWHtR', 'CombiWHtR']
for bwhtr in tempL2:
    fileName = bwhtr+'-BothSex.tsv'
    tempDF1 = pd.read_csv(fileDir+ipynbName+fileName, sep='\t', dtype={'public_client_id':str})
    tempDF1 = tempDF1.set_index('public_client_id')
    #Put aside common part
    if bwhtr==tempL2[0]:
        tempDF = tempDF1[tempL1]
    tempDF1 = tempDF1.drop(columns=tempL1)
    tempDF = pd.merge(tempDF, tempDF1, left_index=True, right_index=True, how='inner')

display(tempDF)
tempDF1 = tempDF.describe(include=[np.number])
tempDF1.loc['Skewness'] = stats.skew(tempDF.select_dtypes(include=[np.number]))
display(tempDF1)

tempD = {'WHtR':'k', 'MetWHtR':'b', 'ProtWHtR':'r', 'ChemWHtR':'g', 'CombiWHtR':'m'}
for scale in ['log', 'raw']:
    sns.set(style='ticks', font='Arial', context='talk')
    plt.figure(figsize=(4, 3))
    for whtr in tempD.keys():
        if scale=='raw':
            sns.distplot(tempDF['Base'+whtr], label=whtr, color=tempD[whtr])
        elif scale=='log':
            sns.distplot(tempDF['log_Base'+whtr], label=whtr, color=tempD[whtr])
    sns.despine()
    plt.ylabel('Density')
    if scale=='raw':
        plt.xlabel('WHtR')
    elif scale=='log':
        plt.xlabel('WHtR (log-scale)')
    plt.legend(bbox_to_anchor=(1, 0.5), loc='center left', borderaxespad=1)
    plt.show()

whtrDF = tempDF

In [None]:
tempDF = whtrDF

#Add the covariates info to make summary dataframe
fileDir = './ExportData/'
ipynbName = '220621_Multiomics-BMI-NatMedRevision_WHtR-DataCleaning_'
fileName = 'baseline-WHtR-final-cohort.tsv'
tempDF1 = pd.read_csv(fileDir+ipynbName+fileName, sep='\t', dtype={'public_client_id':str})
tempDF1 = tempDF1.set_index('public_client_id')
tempL = ['log_BaseBMI', 'Sex', 'BaseAge', 'PC1', 'PC2', 'PC3', 'PC4', 'PC5']#Without Race in this study
tempDF1 = tempDF1[tempL]
tempDF = pd.merge(tempDF, tempDF1, left_index=True, right_index=True, how='inner')

#Obesity classification (based on BMI)
tempDF['BaseBMI'] = np.e**tempDF['log_BaseBMI']
tempDF = tempDF.drop(columns=['log_BaseBMI'])
tempL = []
for row_i in range(len(tempDF)):
    value = tempDF['BaseBMI'].iloc[row_i]
    if np.isnan(value):
        tempL.append('NotCalculated')
    elif value < 18.5:
        tempL.append('Underweight')
    elif value < 25:
        tempL.append('Normal')
    elif value < 30:
        tempL.append('Overweight')
    elif value >= 30:
        tempL.append('Obese')
    else:#Just in case
        tempL.append('Error?')
tempDF['BaseBMI_class'] = tempL
##Confirmation
print('BaseBMI_class:')
tempS = tempDF['BaseBMI_class'].value_counts()
tempDF1 = pd.DataFrame({'Count':tempS, 'Percentage':tempS/len(tempDF)*100})
display(tempDF1)

display(tempDF)

#Save
fileDir = './ExportData/'
ipynbName = '220824_Multiomics-BMI-NatMed1stRevision_bBMI-vs-bWHtR-ver2_'
fileName = 'biologicalWHtR-baseline-summary-BothSex.tsv'
tempDF.to_csv(fileDir+ipynbName+fileName, sep='\t', index=True)

whtrDF = tempDF

### 2-2. Calculate ∆WHtR

> In contrast to biological age while similarly to BMI, the difference between measured and predicted values are dependent on the absoculte WHtR value. Therefore, ∆WHtR is defined with the rate of difference: (bWHtR - WHtR) / WHtR.  

In [None]:
tempDF = whtrDF.copy()

#Calculate the rate of difference
tempL = ['MetWHtR', 'ProtWHtR', 'ChemWHtR', 'CombiWHtR']
for bwhtr in tempL:
    #Calculate difference rate
    tempDF[bwhtr+'–WHtR'] = (tempDF['Base'+bwhtr] - tempDF['BaseWHtR']) / tempDF['BaseWHtR'] * 100

tempD = {'MetWHtR–WHtR':'b', 'ProtWHtR–WHtR':'r', 'ChemWHtR–WHtR':'g', 'CombiWHtR–WHtR':'m'}
tempDF1 = tempDF[list(tempD.keys())]
tempDF2 = tempDF1.describe()
tempDF2.loc['Skewness'] = stats.skew(tempDF1)
display(tempDF2)

sns.set(style='ticks', font='Arial', context='talk')
plt.figure(figsize=(4, 3))
for col_n in tempD.keys():
    sns.distplot(tempDF[col_n], label=col_n, color=tempD[col_n])
sns.despine()
plt.ylabel('Density')
plt.xlabel(r'$\Delta$'+'WHtR [% WHtR]')
plt.legend(bbox_to_anchor=(1, 0.5), loc='center left', borderaxespad=1)
plt.show()

#Update
whtrDF = tempDF

### 2-3. Difference in ∆WHtR between bWHtRs

> In this version, P-values from Pearson's correlation tests are adjusted across all comparisons.  

In [None]:
#Prepare DF
tempL = ['MetWHtR–WHtR', 'ProtWHtR–WHtR', 'ChemWHtR–WHtR', 'CombiWHtR–WHtR', 'BaseBMI_class']
tempDF = whtrDF[tempL]
tempDF.columns = tempDF.columns.str.replace('–WHtR', '')

#Check correlation matrix and extract lower triangle matrix
tempDF1 = tempDF.select_dtypes(include=[np.number])
tempDF1 = tempDF1.corr(method='pearson')
print('Pearson\'s r:')
display(tempDF1)
tempDF1 = tempDF1.where(np.tril(np.ones(tempDF1.shape), k=-1).astype(np.bool), other=np.nan)
tempDF1.index.rename('Variable1', inplace=True)
tempDF1 = tempDF1.reset_index().melt(var_name='Variable2', value_name='Pearson_r', id_vars=['Variable1'])
tempDF1 = tempDF1.dropna()

#Statistical tests
tempDF2 = pd.DataFrame(columns=['Xvar', 'Yvar', 'N', 'DoF', 'Pearson_r', 'Pval'])
for row_i in range(len(tempDF1)):
    xvar = tempDF1['Variable2'].iloc[row_i]
    yvar = tempDF1['Variable1'].iloc[row_i]
    #Pearson's correlation
    pearson_r, pval = stats.pearsonr(tempDF[xvar], tempDF[yvar])
    size = len(tempDF)
    dof = size - 2
    tempDF2.loc[xvar+'-vs-'+yvar] = [xvar, yvar, size, dof, pearson_r, pval]
##P-value adjustment by using Benjamini–Hochberg method
tempDF2['AdjPval'] = multi.multipletests(tempDF2['Pval'], alpha=0.05, method='fdr_bh',
                                         is_sorted=False, returnsorted=False)[1]
tempDF2.index.rename('ComparisonLabel', inplace=True)
display(tempDF2)
##Save
fileDir = './ExportData/'
ipynbName = '220824_Multiomics-BMI-NatMed1stRevision_bBMI-vs-bWHtR-ver2_'
fileName = 'bWHtR-WHtR-difference.tsv'
tempDF2.to_csv(fileDir+ipynbName+fileName, sep='\t', index=True)

#Visualization
tempD = {'Underweight':'blue', 'Normal':'green', 'Overweight':'orange', 'Obese':'red'}
sns.set(style='ticks', font='Arial', context='talk')
p = sns.PairGrid(tempDF, hue='BaseBMI_class', hue_order=list(tempD.keys()), palette=tempD,
                 height=2, aspect=1, layout_pad=0.0)
p.map_lower(sns.scatterplot, edgecolor='0.3', alpha=0.5, s=25)
p.map_diag(sns.distplot, axlabel=False, kde_kws={'alpha':0.8}, hist_kws={'edgecolor':'white', 'alpha':0.5})
for i, j in zip(*np.triu_indices_from(p.axes, 1)):
    p.axes[i, j].set_visible(False)
for i, j in zip(*np.tril_indices_from(p.axes, 0)):
    p.axes[i, j].set(xlim=(-35, 47.5), xticks=np.arange(-20, 40.1, 20),
                     ylim=(-35, 47.5), yticks=np.arange(-20, 40.1, 20))
for i, j in zip(*np.tril_indices_from(p.axes, -1)):
    p.axes[i, j].grid(axis='both', linestyle='--', color='gray', alpha=0.3)
    #Annotate Pearson's correlation
    xvar = tempDF.columns.tolist()[j]
    yvar = tempDF.columns.tolist()[i]
    pearson_r = tempDF2['Pearson_r'].loc[xvar+'-vs-'+yvar]
    r_text = str(Decimal(str(pearson_r)).quantize(Decimal('0.001'), rounding=ROUND_HALF_UP))
    pval = tempDF2['AdjPval'].loc[xvar+'-vs-'+yvar]
    below_limit = 0#Initialize
    if pval==1.0:
        pval_text = '1.0'
    else:
        if pval==0.0:#Due to smaller than the float minimum
            pval = sys.float_info.min
            print('P-value was smaller than the float minimum:', pval)
            below_limit = 1
        pval_text = f'{Decimal(str(pval)):.3E}'#Take more digits because rounding is bad here
        significand, exponent = pval_text.split(sep='E-')
        significand = str(Decimal(significand).quantize(Decimal('0.1'), rounding=ROUND_HALF_UP))
        if significand=='10.0':
            significand = '1.0'
            exponent = str(int(exponent)-1)
        if int(exponent)>2:
            pval_text = significand+r'$\times$'+'10'+r'$^{{-{0}}}$'.format(exponent)##Font is different in r'$ $'...
        elif int(exponent)>0:
            pval_text = '0.'+'0'*(int(exponent)-1)+significand.replace('.', '')
        else:
            pval_text = significand
    if below_limit==1:
        text = 'Pearson\'s '+r'$r$'+' = '+r_text+'\n'+r'$P$'+' < '+pval_text
    else:
        text = 'Pearson\'s '+r'$r$'+' = '+r_text+'\n'+r'$P$'+' = '+pval_text
    p.axes[i, j].annotate(text, xy=(0.025, 1.0), xycoords='axes fraction',
                          horizontalalignment='left', verticalalignment='top',
                          multialignment='left', fontsize='x-small', color='k')
pl = plt.legend(bbox_to_anchor=(0.75, 2.1), loc='lower right', title='BMI class')#Adjustment for paper
##Add sample size in lagend
for row_i in range(len(pl.get_texts())):
    bmi_class = pl.get_texts()[row_i].get_text()
    count = len(tempDF.loc[tempDF['BaseBMI_class']==bmi_class])
    pl.get_texts()[row_i].set_text(bmi_class+' ('+r'$n$'+' = '+f'{count:,}'+')')
##Add xy label annotation
label = r'$\Delta$'+'WHtR (predicted '+r'$-$'+' measured) [% WHtR]'
p.fig.text(x=0.545, y=0.0,#Manual adjustment
           s=label, fontsize='large',
           verticalalignment='top', horizontalalignment='center')
p.fig.text(x=-0.01, y=0.545,#Manual adjustment
           s=label, fontsize='large',
           verticalalignment='center', horizontalalignment='right', rotation='vertical')
##Save
fileDir = './ExportFigures/'
ipynbName = '220824_Multiomics-BMI-NatMed1stRevision_bBMI-vs-bWHtR-ver2_'
fileName = 'bWHtR-WHtR-difference.tif'
plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04,
                  pil_kwargs={'compression':'tiff_lzw'})
plt.show()

### 2-4. Difference between ∆BMI and ∆WHtR

In [None]:
tempDF1 = whtrDF

#Import cleaned table for baseline measured and biological BMIs
fileDir = './ExportData/'
ipynbName = '220803_Multiomics-BMI-NatMed1stRevision_DeltaBMI-misclassification_'
fileName = 'biologicalBMI-baseline-summary-BothSex.tsv'
tempDF = pd.read_csv(fileDir+ipynbName+fileName, sep='\t', dtype={'public_client_id': str})
tempDF = tempDF.set_index('public_client_id')
print('Original shape:', tempDF.shape)

#Extract the individuals having WHtR
tempDF = tempDF.loc[tempDF1.index.tolist()]

display(tempDF)

bmiDF = tempDF

In [None]:
tempDF = bmiDF.copy()

#Calculate the rate of difference
tempL = ['MetBMI', 'ProtBMI', 'ChemBMI', 'CombiBMI']
for bbmi in tempL:
    tempDF[bbmi+'–BMI'] = (tempDF['Base'+bbmi] - tempDF['BaseBMI']) / tempDF['BaseBMI'] * 100

tempD = {'MetBMI–BMI':'b', 'ProtBMI–BMI':'r', 'ChemBMI–BMI':'g', 'CombiBMI–BMI':'m'}
tempDF1 = tempDF[list(tempD.keys())]
tempDF2 = tempDF1.describe()
tempDF2.loc['Skewness'] = stats.skew(tempDF1)
display(tempDF2)

sns.set(style='ticks', font='Arial', context='talk')
plt.figure(figsize=(4, 3))
for col_n in tempD.keys():
    sns.distplot(tempDF[col_n], label=col_n, color=tempD[col_n])
sns.despine()
plt.ylabel('Density')
plt.xlabel(r'$\Delta$'+'BMI [% BMI]')
plt.legend(bbox_to_anchor=(1, 0.5), loc='center left', borderaxespad=1)
plt.show()

#Update
bmiDF = tempDF

In [None]:
tempD1 = {'WHtR':whtrDF, 'BMI':bmiDF}
tempD2 = {'Met':'Metabolomics', 'Prot':'Proteomics', 'Chem':'Clinical labs', 'Combi':'Combined omics'}
tempD3 = {'Metabolomics':'b', 'Proteomics':'r', 'Clinical labs':'g', 'Combined omics':'m'}
xvar = 'WHtR'
yvar = 'BMI'

#Prepare DF
tempD = {}
for measure in tempD1.keys():
    tempDF = tempD1[measure]
    tempDF = tempDF.loc[:, tempDF.columns.str.contains('–'+measure)]
    tempDF.columns = tempDF.columns.str.replace(measure+'–'+measure, '')
    tempDF.columns = tempDF.columns.map(tempD2)
    tempDF = tempDF.reset_index().melt(var_name='Category', value_name='Delta'+measure,
                                       id_vars='public_client_id')
    tempD['Delta'+measure] = tempDF
tempDF = pd.merge(tempD['Delta'+xvar], tempD['Delta'+yvar],
                  on=['public_client_id', 'Category'], how='left')

#Statistical tests
tempDF1 = pd.DataFrame(columns=['N', 'DoF', 'Pearson_r', 'Pval'])
for category in tempD3.keys():
    tempDF2 = tempDF.loc[tempDF['Category']==category]
    #Pearson's correlation
    pearson_r, pval = stats.pearsonr(tempDF2['Delta'+xvar], tempDF2['Delta'+yvar])
    size = len(tempDF2)
    dof = size - 2
    tempDF1.loc[category] = [size, dof, pearson_r, pval]
##P-value adjustment by using Benjamini–Hochberg method
tempDF1['AdjPval'] = multi.multipletests(tempDF1['Pval'], alpha=0.05, method='fdr_bh',
                                         is_sorted=False, returnsorted=False)[1]
tempDF1.index.rename('Category', inplace=True)
tempDF1['N'] = tempDF1['N'].astype('int64')#Otherwise, float64!
tempDF1['DoF'] = tempDF1['DoF'].astype('int64')#Otherwise, float64!
display(tempDF1)
##Save
fileDir = './ExportData/'
ipynbName = '220824_Multiomics-BMI-NatMed1stRevision_bBMI-vs-bWHtR-ver2_'
fileName = 'dBMI-dWHtR-difference.tsv'
tempDF1.to_csv(fileDir+ipynbName+fileName, sep='\t', index=True)

#Plot
sns.set(style='ticks', font='Arial', context='talk')
fig, axes = plt.subplots(nrows=2, ncols=2,
                         figsize=(3.5*2, 3.5*2+0.15), sharex=True, sharey=True)
axis_xmin = -37.5
axis_xmax = 50
xmin = -20
xmax = 40
xinter = 20
axis_ymin = -42.5
axis_ymax = 87.5
ymin = -25
ymax = 75
yinter = 25
#Set axis range first; otherwise, regression line can be truncated differently
plt.setp(axes, xlim=(axis_xmin, axis_xmax), xticks=np.arange(xmin, xmax+xinter/10, xinter))
plt.setp(axes, ylim=(axis_ymin, axis_ymax), yticks=np.arange(ymin, ymax+yinter/10, yinter))
for ax_i, ax in enumerate(axes.flat):
    category = list(tempD3.keys())[ax_i]
    #Prepare DF
    tempDF2 = tempDF.loc[tempDF['Category']==category]
    #Scatterplot with regression line
    sns.regplot(data=tempDF2, x='Delta'+xvar, y='Delta'+yvar, color=tempD3[category],
                scatter=True, fit_reg=True, ci=95, truncate=False, marker='o',
                scatter_kws={'alpha':0.2, 'edgecolor':'k', 's':25}, ax=ax)
    #Draw Y=X as reference
    #ax.plot([axis_xymin, axis_xymax], [axis_xymin, axis_xymax],
    #        color='black', linestyle=(0, (1, 2)), zorder=0)
    #Draw Y=X=0 as reference
    ax.axvline(x=0, color='black', linestyle=(0, (1, 2)), zorder=0)
    ax.axhline(y=0, color='black', linestyle=(0, (1, 2)), zorder=0)
    #Annotate Pearson's correlation
    pearson_r = tempDF1['Pearson_r'].loc[category]
    r_text = str(Decimal(str(pearson_r)).quantize(Decimal('0.001'), rounding=ROUND_HALF_UP))
    pval = tempDF1['AdjPval'].loc[category]
    below_limit = 0#Initialize
    if pval==1.0:
        pval_text = '1.0'
    else:
        if pval==0.0:#Due to smaller than the float minimum
            pval = sys.float_info.min
            print('P-value was smaller than the float minimum:', pval)
            below_limit = 1
        pval_text = f'{Decimal(str(pval)):.3E}'#Take more digits because rounding is bad here
        significand, exponent = pval_text.split(sep='E-')
        significand = str(Decimal(significand).quantize(Decimal('0.1'), rounding=ROUND_HALF_UP))
        if significand=='10.0':
            significand = '1.0'
            exponent = str(int(exponent)-1)
        if int(exponent)>2:
            pval_text = significand+r'$\times$'+'10'+r'$^{{-{0}}}$'.format(exponent)##Font is different in r'$ $'...
        elif int(exponent)>0:
            pval_text = '0.'+'0'*(int(exponent)-1)+significand.replace('.', '')
        else:
            pval_text = significand
    if below_limit==1:
        text = 'Pearson\'s '+r'$r$'+' = '+r_text+'\n'+r'$P$'+' < '+pval_text
    else:
        text = 'Pearson\'s '+r'$r$'+' = '+r_text+'\n'+r'$P$'+' = '+pval_text
    ax.annotate(text, xy=(0.05, 0.95), xycoords='axes fraction',
                horizontalalignment='left', verticalalignment='top',
                multialignment='left', fontsize='small', color='k')
    #Facet label
    ax.set_title(category, {'fontsize':'large'})
sns.despine()
#Reset and generate common axis title
plt.setp(axes, xlabel='', ylabel='')
fig.tight_layout(pad=0.75)
fig.text(x=0.54, y=0.02,#Manual adjustment
         s=r'$\Delta$'+xvar+' (predicted '+r'$-$'+' measured) [% '+xvar+']',
         fontsize='medium', verticalalignment='top', horizontalalignment='center')
fig.text(x=0.0225, y=0.515,#Manual adjustment
         s=r'$\Delta$'+yvar+' (predicted '+r'$-$'+' measured) [% '+yvar+']',
         fontsize='medium', verticalalignment='center', horizontalalignment='right', rotation='vertical')
##Save
fileDir = './ExportFigures/'
ipynbName = '220824_Multiomics-BMI-NatMed1stRevision_bBMI-vs-bWHtR-ver2_'
fileName = 'dBMI-dWHtR-difference.tif'
plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04,
                  pil_kwargs={'compression':'tiff_lzw'})
plt.show()

## 3. ∆WHtR and clinical definition-based misclassification

### 3-1. Prepare datasets

#### 3-1-1. ∆WHtR and covariates

> Reset the above whtrDF

In [None]:
#Import cleaned table for baseline measured and biological WHtRs
fileDir = './ExportData/'
ipynbName = '220824_Multiomics-BMI-NatMed1stRevision_bBMI-vs-bWHtR-ver2_'
fileName = 'biologicalWHtR-baseline-summary-BothSex.tsv'
tempDF = pd.read_csv(fileDir+ipynbName+fileName, sep='\t', dtype={'public_client_id': str})
tempDF = tempDF.set_index('public_client_id')

#Clean to handle easier in this notebook
tempDF.columns = tempDF.columns.str.replace('Base', '')

#Calculate the rate of difference
tempL = ['MetWHtR', 'ProtWHtR', 'ChemWHtR', 'CombiWHtR']
for bwhtr in tempL:
    tempDF['Delta'+bwhtr] = (tempDF[bwhtr] - tempDF['WHtR']) / tempDF['WHtR'] * 100

#Select the misclassification and covariates (just for the display in Jupyter notebook)
tempL1 = tempDF.loc[:, tempDF.columns.str.contains('Delta')].columns.tolist()
tempL2 = tempDF.loc[:, tempDF.columns.str.contains('_class')].columns.tolist()
tempL3 = ['WHtR', 'BMI', 'Sex', 'Age', 'PC1', 'PC2', 'PC3', 'PC4', 'PC5']
tempL = [col_n for sublist in [tempL1, tempL2, tempL3] for col_n in sublist]
tempDF = tempDF[tempL]

display(tempDF)
display(tempDF.describe(include='all'))
print('NaN in DF:', tempDF.isnull().to_numpy().sum(axis=None))

whtrDF = tempDF

> –> ∆WHtR can be surly assumed as normal distribution (see the above section 2-2).  

#### 3-1-2. Metabolic health condition

In [None]:
tempDF1 = whtrDF

#Import cleaned table for metabolic health condition
fileDir = './ExportData/'
ipynbName = '220720_Multiomics-BMI-NatMedRevision_Misclassification_'
fileName = 'metabolic-health-summary.tsv'
tempDF = pd.read_csv(fileDir+ipynbName+fileName, sep='\t', dtype={'public_client_id': str})
tempDF = tempDF.set_index('public_client_id')
print('Original DF:', tempDF.shape)

#Extract the individuals having WHtR
tempDF = tempDF.loc[tempDF.index.isin(tempDF1.index.tolist())]

display(tempDF)
display(tempDF.describe(include='all'))
print('NaN in DF:', tempDF.isnull().to_numpy().sum(axis=None))

metabDF = tempDF

> –> Five participants had NaN for the metabolic health condition

In [None]:
tempDF1 = whtrDF
tempDF2 = metabDF

#Check
tempDF = pd.merge(tempDF1, tempDF2['Metabolically'], left_index=True, right_index=True, how='inner')
for bmi_class in ['Normal', 'Obese']:
    tempDF3 = tempDF.loc[tempDF['BMI_class']==bmi_class]
    print(bmi_class+': n =', len(tempDF3))
    for measure in ['WHtR', 'BMI']:
        print(' - '+measure)
        display(tempDF3.groupby('Metabolically')[measure].describe())
    print('')

> –> It would be safer to adjust the baseline WHtR or BMI in the statistical tests.  

### 3-2. Regression analysis for ∆WHtR

> Of note, because ∆WHtR values are assumed as normal distribution, OLS linear regression (i.e., GLM with Gaussian family) can be used simply.  

#### 3-2-1. Perform OLS linear regression

> Model: ∆WHtR ~ b0 + b1\*C(MetabolicCondition) + b2\*WHtR + b3\*C(Sex) + b4\*Age + b5\*AncestryPCs  
> Main aim: Assess the difference in each ∆WHtR between the metabolically healthy and unhealthy groups.
> 
> –> Given the high colinearlity between WHtR and BMI, not BMI but only WHtR is adjusted for assesing ∆WHtR.  

In [None]:
tempDF1 = whtrDF
tempDF2 = metabDF
tempL1 = ['Normal', 'Obese']
tempL2 = ['MetWHtR', 'ProtWHtR', 'ChemWHtR', 'CombiWHtR']

t_start = time.time()
tempD1 = {}
for bmi_class in tempL1:
    #Processing for OLS linear regression
    ##Gather all necessary variables into a single DF
    tempS = tempDF2['Metabolically']
    tempDF = pd.merge(tempDF1, tempS, left_index=True, right_index=True, how='left')
    ##Select the target participants
    tempDF = tempDF.loc[tempDF['BMI_class']==bmi_class]
    ##Drop NaN in the metabolic health condition
    tempDF = tempDF.dropna()
    ##Z-score transformation
    tempDF3 = tempDF.select_dtypes(include=[np.number])
    scaler = StandardScaler(copy=True, with_mean=True, with_std=True)
    tempA = scaler.fit_transform(tempDF3)#Column direction
    tempDF3 = pd.DataFrame(data=tempA, index=tempDF3.index, columns=tempDF3.columns)
    ###Recover the categorical variables
    tempDF4 = tempDF.select_dtypes(exclude=[np.number])
    tempDF = pd.merge(tempDF3, tempDF4, left_index=True, right_index=True, how='left')
    ##Add a constant for the intercept
    ###–> In statsmodels, a constant is automatically added as well as R!
    ##Sort to make bcoef = 0 and 1 for Healthy and Unhealthy
    tempDF = tempDF.sort_values(by='Metabolically', ascending=True)
    ##One-hot encoding for categorical covariates
    ###–> In statsmodels, categorical variables are automatically recognized!
    
    tempD2 = {}
    for bwhtr in tempL2:
        #OLS linear regression
        ##Fit univariate model
        formula = 'Delta'+bwhtr+' ~ C(Metabolically)'
        fit_res1 = smf.ols(formula, data=tempDF).fit()
        ##Fit full model
        formula = 'Delta'+bwhtr+' ~ C(Metabolically)'\
            '+ WHtR + C(Sex) + Age + PC1 + PC2 + PC3 + PC4 + PC5'
        fit_res2 = smf.ols(formula, data=tempDF).fit()
        
        #Summarize the result
        tempDF3 = pd.DataFrame({'DeltaWHtR':[bwhtr]})
        ##Save the sample size for each group
        tempDF3['N'] = len(tempDF)
        tempDF3['nHealthy'] = len(tempDF.loc[tempDF['Metabolically']=='Healthy'])
        tempDF3['nUnhealthy'] = len(tempDF.loc[tempDF['Metabolically']=='Unhealthy'])
        ##Save R2 [%]
        tempDF3['UnivarR2'] = fit_res1.rsquared*100
        tempDF3['R2'] = fit_res2.rsquared*100
        ##Save beta-coefficient of the target variable
        tempDF3['Bcoef'] = fit_res2.params['C(Metabolically)[T.Unhealthy]']
        tempDF3['BcoefSE'] = fit_res2.bse['C(Metabolically)[T.Unhealthy]']
        ##Save t-statistic of the target variable
        tempDF3['tStat'] = fit_res2.tvalues['C(Metabolically)[T.Unhealthy]']
        ##Save residual degrees of freedom
        tempDF3['DoF'] = int(fit_res2.df_resid)
        ##Save P-value of the target variable
        tempDF3['Pval'] = fit_res2.pvalues['C(Metabolically)[T.Unhealthy]']
        
        tempD2[bwhtr] = tempDF3
    
    #Clean the results (pd.DataFrame) across bWHtRs
    tempDF = pd.concat(list(tempD2.values()), axis=0)
    tempDF['BMIclass'] = bmi_class
    
    tempD1[bmi_class] = tempDF
t_elapsed = time.time() - t_start
print('Elapsed time for',
      len(tempL1)*len(tempL2), 'OLS linear regressions (',
      len(tempL1), 'BMI classes x',
      len(tempL2), 'bWHtRs):',
      round(t_elapsed//60), 'min', round(t_elapsed%60, 1), 'sec')

#Clean the results (pd.DataFrame) across BMI classes
tempDF = pd.concat(list(tempD1.values()), axis=0)
##Clean the column order by setting index
tempDF = tempDF.set_index(['BMIclass', 'DeltaWHtR'])

#P-value adjustment (across BMI classes within each bWHtR) by using Benjamini–Hochberg method
tempD = {}
for bwhtr in tempL2:
    tempL = [(bmi_class, bwhtr) for bmi_class in tempL1]
    tempS = tempDF['Pval'].loc[tempL]
    tempA = multi.multipletests(tempS, alpha=0.05, method='fdr_bh',
                                is_sorted=False, returnsorted=False)[1]
    tempS = pd.Series(tempA, index=tempS.index, name='AdjPval_within')
    tempD[bwhtr] = tempS
tempS = pd.concat(list(tempD.values()), axis=0)
tempDF = pd.merge(tempDF, tempS, left_index=True, right_index=True, how='left')

#P-value adjustment (across all tests) by using Benjamini–Hochberg method
tempDF['AdjPval_all'] = multi.multipletests(tempDF['Pval'], alpha=0.05, method='fdr_bh',
                                            is_sorted=False, returnsorted=False)[1]

display(tempDF)

#Save
fileDir = './ExportData/'
ipynbName = '220824_Multiomics-BMI-NatMed1stRevision_bBMI-vs-bWHtR-ver2_'
fileName = 'DeltaWHtR-clinical-misclassification.tsv'
tempDF.to_csv(fileDir+ipynbName+fileName, sep='\t', index=True)

resDF = tempDF

#### 3-2-2. Visualization

In [None]:
tempD1 = {'MetWHtR':'b', 'ProtWHtR':'r', 'ChemWHtR':'g', 'CombiWHtR':'m'}
tempD2 = {'Healthy':'0.8', 'Unhealthy':'crimson'}
tempL1 = ['Normal', 'Obese']
tempDF1 = whtrDF
tempDF2 = metabDF
tempDF3 = resDF

#Prepare DF
tempS = tempDF2['Metabolically']
tempDF = pd.merge(tempDF1, tempS, left_index=True, right_index=True, how='left')
##Select the target participants
tempDF = tempDF.loc[tempDF['BMI_class'].isin(tempL1)]
##Drop NaN in the metabolic health condition
tempDF = tempDF.dropna()

#Check sample size
print('N (total):', len(tempDF))
print(' - BMI class:', tempDF['BMI_class'].value_counts().sort_index(ascending=True).to_dict())
for bmi_class in tempL1:
    tempDF1 = tempDF.loc[tempDF['BMI_class']==bmi_class]
    print('   - '+bmi_class+' BMI class - Metabolic condition:',
          tempDF1['Metabolically'].value_counts().sort_index(ascending=True).to_dict())

#Visualization
sns.set(style='ticks', font='Arial', context='talk')
fig, axes = plt.subplots(nrows=1, ncols=len(tempD1),
                         figsize=(8, 3), sharex=True, sharey=True,
                         gridspec_kw={'width_ratios':[1, 1, 1, 1]})
axis_ymin = -32.5
axis_ymax = 42.5
ymin = -30
ymax = 40
yinter = 15
margin = 0.49
#Set shared axis range
plt.setp(axes, ylim=(axis_ymin, axis_ymax), yticks=np.arange(ymin, ymax+yinter/10, yinter))
plt.setp(axes, xlim=(0-margin, len(tempD2)-1+margin))#To eliminate excess white space
for ax_i, ax in enumerate(axes.flat):
    bwhtr = list(tempD1.keys())[ax_i]
    sns.boxplot(data=tempDF, y='Delta'+bwhtr, x='BMI_class', order=tempL1,
                hue='Metabolically', hue_order=tempD2.keys(), dodge=True, palette=tempD2,
                showfliers=False,#flierprops={'marker':'o', 'markerfacecolor':'gray', 'alpha':0.4},
                showcaps=True, notch=True, ax=ax)
    #Axis setting
    if ax_i==0:
        plt.setp(ax, xlabel='', ylabel=r'$\Delta$'+'WHtR [% WHtR]')
    else:
        plt.setp(ax.get_yticklabels(), visible=False)
        plt.setp(ax, xlabel='', ylabel='')
    sns.despine()
    plt.setp(ax.get_xticklabels(), rotation=70,
             horizontalalignment='right', verticalalignment='center', rotation_mode='anchor')
    #P-value annotation
    lines = ax.get_lines()#Line2D: [[Q1, Q1-1.5IQR], [Q3, Q3+1.5IQR], [Q1, Q1], [Q3, Q3], [Med, Med], [flier]]
    lines_unit = 5 + int(False)#showfliers=False
    for class_i in range(len(tempL1)):
        #Healthy
        whisker_0 = lines[class_i*lines_unit*len(tempD2) + lines_unit*0 + 1]
        xcoord_0 = whisker_0._x[1]#Q3+1.5IQR
        ycoord_0 = whisker_0._y[1]#Q3+1.5IQR
        #Unhealthy
        whisker_1 = lines[class_i*lines_unit*len(tempD2) + lines_unit*1 + 1]
        xcoord_1 = whisker_1._x[1]#Q3+1.5IQR
        ycoord_1 = whisker_1._y[1]#Q3+1.5IQR
        #Standard point for annotation
        xcoord = (xcoord_0+xcoord_1)/2
        ycoord = max(ycoord_0, ycoord_1)
        #Add annotation lines
        aline_offset = yinter/5
        aline_length = yinter/5 + aline_offset/2
        ax.plot([xcoord_0, xcoord_0, xcoord_1, xcoord_1],
                [ycoord+aline_offset, ycoord+aline_length, ycoord+aline_length, ycoord+aline_offset],
                lw=1.5, c='k')
        #Retrieve P-value
        bmi_class = tempL1[class_i]
        pval = tempDF3.loc[(bmi_class, bwhtr), 'AdjPval_all']
        if pval<0.001:
            label = '***'
        elif pval<0.01:
            label = '**'
        elif pval<0.05:
            label = '*'
        else:
            pval_text = str(Decimal(pval).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP))
            label = r'$P$'+' = '+pval_text
        #Add annotation text
        if label in ['***', '**', '*']:
            text_offset = yinter/12
            text_size = 'medium'
        else:
            text_offset = yinter/3
            text_size = 'x-small'
        ax.annotate(label, xy=(xcoord, ycoord+text_offset),
                    horizontalalignment='center', verticalalignment='bottom',
                    fontsize=text_size, color='k')
    #Facet settings
    ax.set_title(bwhtr, {'fontsize':'medium'})
    xoff = 0.025
    yoff = 0.01
    rect = plt.Rectangle((xoff, 1+yoff), 1-xoff, 0.15,#Manual adjustment
                         transform=ax.transAxes, facecolor=tempD1[bwhtr], alpha=0.3,
                         clip_on=False, linewidth=0, zorder=0.5)
    ax.add_patch(rect)
    #Change the default boxplot settings
    for line in lines:
        line.set_color('k')
    for box in ax.artists:
        box.set_edgecolor('k')
    #Legend
    if ax_i==1:#Adjustment for paper
        ax.legend(title='Metabolic condition', title_fontsize='medium', fontsize='medium',
                  bbox_to_anchor=(1.1, -0.425), loc='upper center', borderaxespad=0,
                  handlelength=1.5, handletextpad=0.5, ncol=2, columnspacing=1.0)
    else:
        ax.get_legend().remove()
##Save
fileDir = './ExportFigures/'
ipynbName = '220824_Multiomics-BMI-NatMed1stRevision_bBMI-vs-bWHtR-ver2_'
fileName = 'DeltaWHtR-clinical-misclassification.tif'
plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04,
                  pil_kwargs={'compression':'tiff_lzw'})
plt.show()

# — End of this notebook —