# Multiomics BMI Paper — Relationships between ∆BMI-based Misclassification and Gut Microbiome Alpha-diversity

***Analyzed by Tomasz Wilmanski originally, and modified by Kengo Watanabe***  

This Jupyter Notebook (with Python 3 kernel) assessed the relationships between ∆BMI-based misclassification (i.e., BMI class vs. biological BMI class) and gut microbiome alpha-diversity metrics using regression analysis (in the baseline Arivale cohort).  

Input files:  
* Arivale baseline biological BMIs and covariates: 220803_Multiomics-BMI-NatMed1stRevision_DeltaBMI-misclassification_biologicalBMI-baseline-summary-BothSex.tsv  
* Arivale baseline gut microbiome alpha-diversity metrics: 220902_Multiomics-BMI-NatMed1stRevision_Microbiome-DataCleaning_AlphaDiversity-and-TaxonAbundance_final.tsv  

Output figures and tables:  
* Figure 4b  
* Table for Supplementary Data 6  

Original notebook (memo for my future tracing):  
* wenceslaus:\[JupyterLab HOME\]/220621_Multiomics-BMI-NatMedRevision/220905_Multiomics-BMI-NatMed1stRevision_Microbiome-DeltaBMI-wenceslaus.ipynb  

In [1]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
#For Arial font
#!conda install -c conda-forge -y mscorefonts
##-> The below was also needed in matplotlib 3.4.2
#import shutil
#import matplotlib
#shutil.rmtree(matplotlib.get_cachedir())
import warnings
warnings.filterwarnings('ignore')
from IPython.display import display
import time

from sklearn.preprocessing import StandardScaler
import statsmodels.formula.api as smf
from statsmodels.stats import multitest as multi
from decimal import Decimal, ROUND_HALF_UP

!conda list

# packages in environment at /opt/conda/envs/arivale-py3:
#
# Name                    Version                   Build  Channel
_libgcc_mutex             0.1                 conda_forge    conda-forge
_openmp_mutex             4.5                       1_gnu    conda-forge
analytics                 0.1                      pypi_0    pypi
argon2-cffi               21.1.0           py39h3811e60_0    conda-forge
arivale-data-interface    0.1.0                    pypi_0    pypi
async_generator           1.10                       py_0    conda-forge
atk-1.0                   2.36.0               h3371d22_4    conda-forge
attrs                     21.2.0             pyhd8ed1ab_0    conda-forge
backcall                  0.2.0              pyh9f0ad1d_0    conda-forge
backports                 1.0                        py_2    conda-forge
backports.functools_lru_cache 1.6.4              pyhd8ed1ab_0    conda-forge
biopython                 1.79             py39h3811e60_0    conda-forge
bleach 

## 1. Prepare datasets

> The necessary files were copied from the dalek server in advance.  

### 1-1. Alpha-diversity

In [None]:
#Import cleaned table for baseline gut microbiome data
fileDir = './ImportData/'
ipynbName = '220902_Multiomics-BMI-NatMed1stRevision_Microbiome-DataCleaning_'
fileName = 'AlphaDiversity-and-TaxonAbundance_final.tsv'
tempDF = pd.read_csv(fileDir+ipynbName+fileName, sep='\t', dtype={'public_client_id': str})
tempDF = tempDF.set_index('public_client_id')

#Take only the alpha-diversity metrics
tempL = ['Observed', 'Shannon', 'Chao1']
tempDF = tempDF[tempL]

display(tempDF)
tempDF1 = tempDF.describe()
tempDF1.loc['Skewness'] = stats.skew(tempDF)
display(tempDF1)

divDF = tempDF

> –> Based on the Tom's original analysis, square transformation is applied to Shannon, while square root transformation is applied to Observed and Chao1.  

In [None]:
tempDF = divDF

#Transform to less skewed distribution
tempDF['Observed_tf'] = np.sqrt(tempDF['Observed'])
tempDF['Shannon_tf'] = tempDF['Shannon']**2
tempDF['Chao1_tf'] = np.sqrt(tempDF['Chao1'])

display(tempDF)
tempDF1 = tempDF.describe()
tempDF1.loc['Skewness'] = stats.skew(tempDF)
display(tempDF1)

#Update
divDF = tempDF

In [None]:
#Check distribution and probability plot
tempDF = divDF
tempD1 = {'Observed':'tab:blue', 'Shannon':'tab:orange', 'Chao1':'tab:green',
          'Observed_tf':'tab:blue', 'Shannon_tf':'tab:orange', 'Chao1_tf':'tab:green'}
tempD2 = {'Observed':'Raw value', 'Shannon':'Raw value', 'Chao1':'Raw value',
          'Observed_tf':'Transformed value', 'Shannon_tf':'Transformed value', 'Chao1_tf':'Transformed value'}

sns.set(style='ticks', font='Arial', context='notebook')
fig, axes = plt.subplots(nrows=2, ncols=6, figsize=(15, 5), sharex=False, sharey=False)
for ax_i, ax in enumerate(axes.flat):
    col_i = (ax_i//12)*6 + ax_i%6
    if col_i<len(tempDF.columns):
        col_n = tempDF.columns.tolist()[col_i]
        if (ax_i//6)%2 == 0:
            sns.set(style='ticks', font='Arial', context='notebook')
            sns.distplot(tempDF[col_n], color=tempD1[col_n], ax=ax)
            sns.despine()
            ax.set_title(col_n, fontsize='large')
            ax.set_xlabel(tempD2[col_n])
        else:
            sns.set(style='whitegrid', font='Arial', context='notebook')
            stats.probplot(tempDF[col_n], plot=ax)
            ax.get_lines()[0].set_markerfacecolor(tempD1[col_n])
            ax.get_lines()[0].set_markeredgecolor(tempD1[col_n])
            ax.get_lines()[1].set_color('k')
            ax.get_lines()[1].set_linewidth(3)
            skewness = stats.skew(tempDF[col_n])
            ax.set_title('Skewness: '+str(round(skewness, 3)), fontsize='large')
    else:
        ax.plot([0, 1], [0, 1], color='k')
        ax.plot([0, 1], [1, 0], color='k')
fig.tight_layout()
plt.show()

### 1-2. ∆BMI-derived misclassification and covariates

In [None]:
#Import cleaned table for baseline measured and biological BMIs
fileDir = './ImportData/'
ipynbName = '220803_Multiomics-BMI-NatMed1stRevision_DeltaBMI-misclassification_'
fileName = 'biologicalBMI-baseline-summary-BothSex.tsv'
tempDF = pd.read_csv(fileDir+ipynbName+fileName, sep='\t', dtype={'public_client_id': str})
tempDF = tempDF.set_index('public_client_id')
print('Original:', len(tempDF))

#Take the participants having alpha-diversity
tempDF = tempDF.loc[tempDF.index.isin(divDF.index.tolist())]
print(' -> with alpha-diversity:', len(tempDF))

#Clean to handle easier in this notebook
tempDF.columns = tempDF.columns.str.replace('Base', '')

#Misclassification
tempL = ['MetBMI', 'ProtBMI', 'ChemBMI', 'CombiBMI']
for bbmi in tempL:
    tempL1 = []
    for row_i in range(len(tempDF)):
        bmi_class = tempDF['BMI_class'].iloc[row_i]
        bbmi_class = tempDF[bbmi+'_class'].iloc[row_i]
        if bmi_class==bbmi_class:
            tempL1.append('Matched')
        else:
            tempL1.append('Mismatched')
    tempDF['vs_'+bbmi+'_class'] = tempL1

display(tempDF)
print('NaN in DF:', tempDF.isnull().to_numpy().sum(axis=None))

bmiDF = tempDF

In [None]:
tempDF = bmiDF

#Check
for bmi_class in ['Normal', 'Obese']:
    print(bmi_class)
    tempDF1 = tempDF.loc[tempDF['BMI_class']==bmi_class]
    for bbmi in tempL:
        display(tempDF1.groupby('vs_'+bbmi+'_class')['BMI'].describe())

> –> It would be safer to adjust the baseline BMI in the statistical tests.  

In [None]:
tempDF = bmiDF

#Select the misclassification and covariates (just for the display in Jupyter notebook)
tempL1 = tempDF.loc[:, tempDF.columns.str.contains('_class')].columns.tolist()
tempL2 = ['BMI', 'Sex', 'Age', 'PC1', 'PC2', 'PC3', 'PC4', 'PC5']
tempL = [col_n for sublist in [tempL1, tempL2] for col_n in sublist]
tempDF1 = tempDF[tempL]

display(tempDF1)

#Update
bmiDF = tempDF1

## 2. Regression analysis for the alpha-diversity metrics

> Because the datasets are almost ready for use, only the simple processing steps are required for OLS linear regression:  
> * ~~Missingness: To maximize the sample size for each regression, dropping NaN is performed after selecting metric.~~  
> * Centering: standardization is applied to both dependent and independent variables (including covariates).  
>
> Hence, the remaining processing steps are implemented during the for-loop for each regression.  

### 2-1. Perform OLS linear regression

> Model: Metric ~ b0 + b1\*C(Misclassification) + b2\*BMI + b3\*C(Sex) + b4\*Age + b5\*AncestryPCs  
> Main aim: Assess the difference in each metric between the matched and mismatched BMI class.  

In [None]:
tempDF1 = divDF.loc[:, divDF.columns.str.contains('_tf')]#Transformed metric values
tempDF2 = bmiDF
tempL1 = ['Normal', 'Obese']
tempL2 = ['MetBMI', 'ProtBMI', 'ChemBMI', 'CombiBMI']

t_start = time.time()
tempD1 = {}
for metric in tempDF1.columns.tolist():
    tempD2 = {}
    for bmi_class in tempL1:
        #Processing for OLS linear regression
        ##Gather all necessary variables into a single DF
        tempS = tempDF1[metric]
        tempDF = pd.merge(tempS, tempDF2, left_index=True, right_index=True, how='left')
        ##Select the target participants
        tempDF = tempDF.loc[tempDF['BMI_class']==bmi_class]
        ##Drop NaN in the metric values
        #tempDF = tempDF.dropna()
        ##Z-score transformation
        tempDF4 = tempDF.select_dtypes(include=[np.number])
        scaler = StandardScaler(copy=True, with_mean=True, with_std=True)
        tempA = scaler.fit_transform(tempDF4)#Column direction
        tempDF4 = pd.DataFrame(data=tempA, index=tempDF4.index, columns=tempDF4.columns)
        ###Recover the categorical variables
        tempDF5 = tempDF.select_dtypes(exclude=[np.number])
        tempDF = pd.merge(tempDF4, tempDF5, left_index=True, right_index=True, how='left')
        ##Rename the dependent variable
        tempDF = tempDF.rename(columns={metric:'Metric'})
        ##Add a constant for the intercept
        ###–> In statsmodels, a constant is automatically added as well as R!
        
        tempD3 = {}
        for bbmi in tempL2:
            #Processing specific to each bBMI
            ##Sort to make bcoef = 0 and 1 for Matched and Mismatched
            tempDF = tempDF.sort_values(by='vs_'+bbmi+'_class', ascending=True)
            ##One-hot encoding for categorical covariates
            ###–> In statsmodels, categorical variables are automatically recognized!
            
            #OLS linear regression
            ##Fit univariate model
            formula = 'Metric ~ C(vs_'+bbmi+'_class)'
            fit_res1 = smf.ols(formula, data=tempDF).fit()
            ##Fit full model
            formula = 'Metric ~ C(vs_'+bbmi+'_class)'\
                '+ BMI + C(Sex) + Age + PC1 + PC2 + PC3 + PC4 + PC5'
            fit_res2 = smf.ols(formula, data=tempDF).fit()
            
            #Summarize the result
            tempS = pd.Series().astype('float64')
            ##Save the sample size for each group
            size1 = len(tempDF.loc[tempDF['vs_'+bbmi+'_class']=='Matched'])
            size2 = len(tempDF.loc[tempDF['vs_'+bbmi+'_class']=='Mismatched'])
            tempS.loc['vs'+bbmi+'class_nMatched'] = size1
            tempS.loc['vs'+bbmi+'class_nMismatched'] = size2
            ##Save R2 [%]
            tempS.loc['vs'+bbmi+'class_UnivarR2'] = fit_res1.rsquared*100
            tempS.loc['vs'+bbmi+'class_R2'] = fit_res2.rsquared*100
            ##Save beta-coefficient of the target variable
            tempS.loc['vs'+bbmi+'class_Bcoef'] = fit_res2.params['C(vs_'+bbmi+'_class)[T.Mismatched]']
            tempS.loc['vs'+bbmi+'class_BcoefSE'] = fit_res2.bse['C(vs_'+bbmi+'_class)[T.Mismatched]']
            ##Save t-statistic of the target variable
            tempS.loc['vs'+bbmi+'class_tStat'] = fit_res2.tvalues['C(vs_'+bbmi+'_class)[T.Mismatched]']
            ##Save P-value of the target variable
            tempS.loc['vs'+bbmi+'class_Pval'] = fit_res2.pvalues['C(vs_'+bbmi+'_class)[T.Mismatched]']
            ##Add dummy adjusted P-value rows for now
            tempS.loc['vs'+bbmi+'class_AdjPval_within1'] = 1.0
            tempS.loc['vs'+bbmi+'class_AdjPval_within2'] = 1.0
            tempS.loc['vs'+bbmi+'class_AdjPval_all'] = 1.0
            
            tempD3[bbmi] = tempS
        
        #Clean the results (pd.Series) across bBMIs
        ##Prepare common summary metrics: sample size, residual degrees of freedom
        tempS1 = pd.Series().astype('float64')
        tempS1.loc['N'] = len(tempDF)
        tempS1.loc['DoF'] = int(fit_res2.df_resid)#Use the last result object but same b/w bBMIs
        ##Combine each result
        tempS2 = pd.concat(list(tempD3.values()), axis=0)
        tempS = pd.concat([tempS1, tempS2], axis=0)
        ##Convert to DF while transposing
        tempDF = pd.DataFrame(tempS.to_dict(), index=[0])
        ##Clean DF
        tempDF['N'] = tempDF['N'].astype('int64')
        tempDF['DoF'] = tempDF['DoF'].astype('int64')
        tempL = tempDF.loc[:, tempDF.columns.str.contains('_nM.*ed')].columns.tolist()
        for col_n in tempL:
            tempDF[col_n] = tempDF[col_n].astype('int64')
        tempDF['BMIclass'] = bmi_class
        
        tempD2[bmi_class] = tempDF
    
    #Clean the results (pd.DataFrame) across BMI classes
    tempDF = pd.concat(list(tempD2.values()), axis=0)
    tempDF['Metric'] = metric.replace('_tf', '')
    
    #P-value adjustment (across BMI classes and bBMIs within the metric) by using Benjamini–Hochberg method
    tempL = tempDF.loc[:, tempDF.columns.str.contains('_Pval')].columns.tolist()
    tempDF4 = tempDF.reset_index().melt(var_name='bBMI', value_name='Pval', value_vars=tempL,
                                        id_vars=['BMIclass', 'Metric'])
    tempDF4['AdjPval'] = multi.multipletests(tempDF4['Pval'], alpha=0.05, method='fdr_bh',
                                             is_sorted=False, returnsorted=False)[1]
    tempDF4 = tempDF4.pivot(index=['BMIclass', 'Metric'], columns='bBMI', values='AdjPval')
    tempDF4.columns = tempDF4.columns.str.replace('_Pval', '_AdjPval_within1')
    ##Replace the dummy values with the adjusted p-values
    tempL = [(bmi_class, metric.replace('_tf', '')) for bmi_class in tempL1]
    tempDF4 = tempDF4.loc[tempL]#Sort just in case
    tempL = tempDF.loc[:, tempDF.columns.str.contains('_AdjPval_within1')].columns.tolist()
    for col_n in tempL:
        tempDF[col_n] = tempDF4[col_n].tolist()
    
    tempD1[metric] = tempDF
t_elapsed = time.time() - t_start
print('Elapsed time for',
      len(tempDF1.columns)*len(tempL1)*len(tempL2), 'OLS linear regressions (',
      len(tempDF1.columns), 'metrics x',
      len(tempL1), 'BMI classes x',
      len(tempL2), 'bBMIs):',
      round(t_elapsed//60), 'min', round(t_elapsed%60, 1), 'sec')

#Clean the results (pd.DataFrame) across metrics
tempDF = pd.concat(list(tempD1.values()), axis=0)
##Clean the column order by setting index
tempDF = tempDF.set_index(['BMIclass', 'Metric'])

#P-value adjustment (across BMI classes and metrics within each bBMI) by using Benjamini–Hochberg method
for bbmi in tempL2:
    tempDF['vs'+bbmi+'class_AdjPval_within2'] = multi.multipletests(tempDF['vs'+bbmi+'class_Pval'],
                                                                    alpha=0.05, method='fdr_bh',
                                                                    is_sorted=False, returnsorted=False)[1]

#P-value adjustment (across all tests) by using Benjamini–Hochberg method
tempL = tempDF.loc[:, tempDF.columns.str.contains('_Pval')].columns.tolist()
tempDF4 = tempDF.reset_index().melt(var_name='bBMI', value_name='Pval', value_vars=tempL,
                                    id_vars=['BMIclass', 'Metric'])
tempDF4['AdjPval'] = multi.multipletests(tempDF4['Pval'], alpha=0.05, method='fdr_bh',
                                         is_sorted=False, returnsorted=False)[1]
tempDF4 = tempDF4.pivot(index=['BMIclass', 'Metric'], columns='bBMI', values='AdjPval')
tempDF4.columns = tempDF4.columns.str.replace('_Pval', '_AdjPval_all')
##Replace the dummy values with the adjusted p-values
tempL = tempDF.loc[:, tempDF.columns.str.contains('_AdjPval_all')].columns.tolist()
for col_n in tempL:
    tempDF[col_n] = tempDF4[col_n]

tempDF = tempDF.sort_index(axis=0, ascending=True, key=lambda x:x.str.lower())
display(tempDF)

#Save
fileDir = './ExportData/'
ipynbName = '220905_Multiomics-BMI-NatMed1stRevision_Microbiome-DeltaBMI-wenceslaus_'
fileName = 'result-summary.tsv'
tempDF.to_csv(fileDir+ipynbName+fileName, sep='\t', index=True)

resDF = tempDF

### 2-2. Significantly different metrics by misclassification

In [None]:
tempDF = resDF
tempL1 = ['Normal', 'Obese']
tempL2 = ['MetBMI', 'ProtBMI', 'ChemBMI', 'CombiBMI']

#Significantly different metrics
print('Significantly different metrics by misclassification (FDR < 0.05)')
tempD = {}
for bmi_class in tempL1:
    tempDF1 = tempDF.loc[bmi_class]#MultiIndex
    print(' - '+bmi_class)
    tempL = []
    for bbmi in tempL2:
        #Extact significant metrics
        tempDF2 = tempDF1.loc[tempDF1['vs'+bbmi+'class_AdjPval_all']<0.05]
        tempL.append(tempDF2.index.tolist())
        
        #Cf. Adjustment across BMI classes and bBMIs within the metric
        tempDF3 = tempDF1.loc[tempDF1['vs'+bbmi+'class_AdjPval_within1']<0.05]
        #Cf. Adjustment across BMI classes and metrics within each bBMI
        tempDF4 = tempDF1.loc[tempDF1['vs'+bbmi+'class_AdjPval_within2']<0.05]
        #Cf. Nominal P-value
        tempDF5 = tempDF1.loc[tempDF1['vs'+bbmi+'class_Pval']<0.05]
        #tempL.append(tempDF5.index.tolist())
        
        print('   - '+bbmi+':', len(tempDF2),
              '(cf. within-adjustment 1:', len(tempDF3),
              ', within-adjustment 2:', len(tempDF4),
              ', nominal P-value:', len(tempDF5), ')')
    #Flatten and drop multiplicates
    tempL = list(set(row_n for sublist in tempL for row_n in sublist))
    tempDF1 = tempDF1.loc[tempL]
    
    print('   - Union:', len(tempL))
    tempD[bmi_class] = tempL
    
    #Clean (just for the display in Jupyter notebook)
    tempL3 = tempDF1.loc[:, tempDF1.columns.str.contains('Bcoef$')]
    tempL4 = tempDF1.loc[:, tempDF1.columns.str.contains('Pval')]
    tempL = [col_n for sublist in [tempL3, tempL4] for col_n in sublist]
    tempDF1 = tempDF1[tempL]
    tempDF1 = tempDF1.sort_values(by='vsMetBMIclass_Pval', ascending=True)
    display(tempDF1)

#Flatten and drop multiplicates
tempL = list(set(row_n for sublist in tempD.values() for row_n in sublist))
print(' - Union:', len(tempL))
tempL = [(bmi_class, metric) for bmi_class in tempL1 for metric in tempL]
tempDF1 = tempDF.loc[tempL]
#Clean (just for the display in Jupyter notebook)
tempL3 = tempDF1.loc[:, tempDF1.columns.str.contains('Bcoef$')]
tempL4 = tempDF1.loc[:, tempDF1.columns.str.contains('Pval')]
tempL = [col_n for sublist in [tempL3, tempL4] for col_n in sublist]
tempDF1 = tempDF1[tempL]
tempDF1 = tempDF1.sort_values(by='vsMetBMIclass_Pval', ascending=True)
display(tempDF1)

### 2-3. Visualization

In [None]:
tempD1 = {'Observed_tf':'Observed ASVs',
          'Shannon_tf':'Shannon\'s index',
          'Chao1_tf':'Chao1 diversity'}
tempD2 = {'Underweight':'blue', 'Normal':'green', 'Overweight':'orange', 'Obese':'red'}
tempD3 = {'MetBMI':'b', 'ProtBMI':'r', 'ChemBMI':'g', 'CombiBMI':'m'}
tempD4 = {'Observed_tf':'tab:blue',
          'Shannon_tf':'tab:blue',
          'Chao1_tf':'tab:blue'}
tempL1 = ['Normal', 'Obese']
tempDF1 = divDF
tempDF2 = bmiDF
tempDF3 = resDF

for metric_i, metric in enumerate(tempD1.keys()):
    #Prepare DF
    tempS = tempDF1[metric]
    tempDF = pd.merge(tempS, tempDF2, left_index=True, right_index=True, how='left')
    ##Drop NaN in the metric values
    #tempDF = tempDF.dropna()
    ##Z-score transformation (based on the whole distribution)
    tempDF4 = tempDF.select_dtypes(include=[np.number])
    scaler = StandardScaler(copy=True, with_mean=True, with_std=True)
    tempA = scaler.fit_transform(tempDF4)#Column direction
    tempDF4 = pd.DataFrame(data=tempA, index=tempDF4.index, columns=tempDF4.columns)
    ###Recover the categorical variables
    tempDF5 = tempDF.select_dtypes(exclude=[np.number])
    tempDF = pd.merge(tempDF4, tempDF5, left_index=True, right_index=True, how='left')
    
    #Check sample size
    print(tempD1[metric])
    print('N (total):', len(tempDF))
    print(' - BMI class:', tempDF['BMI_class'].value_counts().sort_index(ascending=True).to_dict())
    for bmi_class in tempL1:
        print('   - '+bmi_class+' BMI class')
        for bbmi in tempD3.keys():
            tempS = tempDF['vs_'+bbmi+'_class'].loc[tempDF['BMI_class']==bmi_class]
            print('     - vs. '+bbmi+' class:',
                  tempS.value_counts().sort_index(ascending=True).to_dict())
    
    #Visualization
    sns.set(style='ticks', font='Arial', context='talk')
    fig, axes = plt.subplots(nrows=1, ncols=1+len(tempD3),
                             figsize=(9, 3), sharex=False, sharey=True,
                             gridspec_kw={'width_ratios':[1, 1, 1, 1, 1]})
    axis_ymin = -3.5
    axis_ymax = 3.75
    ymin = -3
    ymax = 3
    yinter = 1
    margin = 0.49
    #Set shared axis range
    plt.setp(axes, ylim=(axis_ymin, axis_ymax), yticks=np.arange(ymin, ymax+yinter/10, yinter))
    tempL = []#For legend handles and labels
    for ax_i, ax in enumerate(axes.flat):
        if ax_i==0:#Overall
            sns.boxplot(data=tempDF, y=metric, x='BMI_class', order=tempD2.keys(),
                        hue='BMI_class', hue_order=tempD2.keys(), dodge=False, palette=tempD2,
                        showfliers=False,#flierprops={'marker':'o', 'markerfacecolor':'gray', 'alpha':0.4},
                        showcaps=True, notch=True, ax=ax)
            tempL.append(ax.get_legend_handles_labels())
        else:#Misclassification
            bbmi = list(tempD3.keys())[ax_i-1]
            tempDF4 = tempDF.loc[tempDF['BMI_class'].isin(tempL1)]
            tempD = {'Matched':'0.8', 'Mismatched':tempD3[bbmi]}
            sns.boxplot(data=tempDF4, y=metric, x='BMI_class', order=tempL1,
                        hue='vs_'+bbmi+'_class', hue_order=tempD.keys(), dodge=True, palette=tempD,
                        showfliers=False,#flierprops={'marker':'o', 'markerfacecolor':'gray', 'alpha':0.4},
                        showcaps=True, notch=True, ax=ax)
            tempL.append(ax.get_legend_handles_labels())
        #Axis settings
        if ax_i==0:
            plt.setp(ax, xlim=(0-margin, len(tempD2)-1+margin))#To eliminate excess white space
            plt.setp(ax, xlabel='', ylabel='Tarnsformed value [a.u.]\n('+r'$Z$'+'-score)')
        else:
            plt.setp(ax, xlim=(0-margin, len(tempD)-1+margin))#To eliminate excess white space
            plt.setp(ax.get_yticklabels(), visible=False)
            plt.setp(ax, xlabel='', ylabel='')
        #ax.grid(axis='y', linestyle='--', color='black')
        sns.despine()
        plt.setp(ax.get_xticklabels(), rotation=70,
                 horizontalalignment='right', verticalalignment='center', rotation_mode='anchor')
        #Annotation
        lines = ax.get_lines()#Line2D: [[Q1, Q1-1.5IQR], [Q3, Q3+1.5IQR], [Q1, Q1], [Q3, Q3], [Med, Med], [flier]]
        if ax_i!=0:
            #P-value annotation
            lines_unit = 5 + int(False)#showfliers=False
            for class_i in range(len(tempL1)):
                #Matched
                whisker_0 = lines[class_i*lines_unit*len(tempD) + lines_unit*0 + 1]
                xcoord_0 = whisker_0._x[1]#Q3+1.5IQR
                ycoord_0 = whisker_0._y[1]#Q3+1.5IQR
                #Mismatched
                whisker_1 = lines[class_i*lines_unit*len(tempD) + lines_unit*1 + 1]
                xcoord_1 = whisker_1._x[1]#Q3+1.5IQR
                ycoord_1 = whisker_1._y[1]#Q3+1.5IQR
                #Standard point for annotation
                xcoord = (xcoord_0+xcoord_1)/2
                ycoord = max(ycoord_0, ycoord_1)
                #Add annotation lines
                aline_offset = yinter/5
                aline_length = yinter/5 + aline_offset/2
                ax.plot([xcoord_0, xcoord_0, xcoord_1, xcoord_1],
                        [ycoord+aline_offset, ycoord+aline_length, ycoord+aline_length, ycoord+aline_offset],
                        lw=1.5, c='k')
                #Retrieve P-value
                bmi_class = tempL1[class_i]
                pval = tempDF3.loc[(bmi_class, metric.replace('_tf', '')), 'vs'+bbmi+'class_AdjPval_all']
                if pval<0.001:
                    label = '***'
                elif pval<0.01:
                    label = '**'
                elif pval<0.05:
                    label = '*'
                else:
                    pval_text = str(Decimal(pval).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP))
                    label = r'$P$'+' = '+pval_text
                #Add annotation text
                if label in ['***', '**', '*']:
                    text_offset = yinter/12
                    text_size = 'medium'
                else:
                    text_offset = yinter/3
                    text_size = 'x-small'
                ax.annotate(label, xy=(xcoord, ycoord+text_offset),
                            horizontalalignment='center', verticalalignment='bottom',
                            fontsize=text_size, color='k')
        #Facet settings
        if ax_i==0:
            ax.set_title('Overall', {'fontsize':'medium'})
        else:
            ax.set_title(bbmi, {'fontsize':'medium'})
            xoff = 0.025
            yoff = 0.01
            rect = plt.Rectangle((xoff, 1+yoff), 1-xoff, 0.15,#Manual adjustment
                                 transform=ax.transAxes, facecolor=tempD3[bbmi], alpha=0.3,
                                 clip_on=False, linewidth=0, zorder=0.5)
            ax.add_patch(rect)
        #Change the default boxplot settings
        for line in lines:
            line.set_color('k')
        for box in ax.artists:
            box.set_edgecolor('k')
        #Remove the default legend
        ax.get_legend().remove()
        #Save ax position for figure title
        if ax_i==0:
            ax_pos_l = ax.get_position().bounds
        elif ax_i==len(tempD3):
            ax_pos_r = ax.get_position().bounds
    #Add legend manually
    for legend_i in range(len(tempL)):
        h, l = tempL[legend_i]
        if legend_i==0:
            title_text = 'BMI class (overall)'
            position = (0.075, -0.3)
        else:
            bbmi =list(tempD3.keys())[legend_i-1]
            title_text = 'vs. '+bbmi+' class'
            position = (0.35+0.25*((legend_i-1)//2), -0.3-0.35*((legend_i-1)%2))
        legend = fig.legend(handles=h, labels=l, fontsize='medium',
                            title=title_text, title_fontsize='medium',
                            bbox_to_anchor=position, loc='upper left',
                            labelspacing=0.25, handletextpad=0.5,
                            borderaxespad=1, frameon=False)
        plt.gca().add_artist(legend)
    #Add figure title
    xcoord_0 = ax_pos_l[0]
    xcoord_1 = ax_pos_r[0]+ax_pos_r[2]
    ycoord = ax_pos_l[1]+ax_pos_l[3]
    yoff = 0.15
    if tempD4[metric] in ['tab:red', 'tab:blue']:
        text_color = 'white'
    else:
        text_color = 'black'
    fig.suptitle(tempD1[metric], x=(xcoord_0+xcoord_1)/2, y=ycoord+yoff,
                 fontsize='large', fontweight='bold', color=text_color,
                 horizontalalignment='center', verticalalignment='bottom')
    yoff = yoff - 0.015#Manual adjustment
    rect = plt.Rectangle((xcoord_0, ycoord+yoff), xcoord_1-xcoord_0, 0.125,#Manual adjustment
                         transform=fig.transFigure, facecolor=tempD4[metric],
                         clip_on=False, linewidth=0, zorder=0)
    fig.patches.extend([rect])
    ##Save
    fileDir = './ExportFigures/'
    ipynbName = '220905_Multiomics-BMI-NatMed1stRevision_Microbiome-DeltaBMI-wenceslaus_'
    fileName = metric.replace('_tf', '')+'.tif'
    plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04,
                      pil_kwargs={'compression':'tiff_lzw'})
    plt.show()
    print('')

# — End of this notebook —