# Multiomics BMI Paper — Regression Analysis of Measured and Biological BMIs on Numeric Features

***by Kengo Watanabe***  

This Jupyter Notebook (with Python 3 kernel) regressed the measured and omics-inferred BMIs independently on each of the available numeric physiological features (in the baseline Arivale cohort).  

Input files:  
* Arivale baseline biological BMIs and covariates: 220803_Multiomics-BMI-NatMed1stRevision_DeltaBMI-misclassification_biologicalBMI-baseline-summary-BothSex.tsv  
* Arivale baseline numeric physiological features (preprocessed): 210106_Biological-BMI-paper_data-cleaning_OLS-regression_numDF.tsv  
* Numeric feature metadata: 210727_feature-label-correspondence.csv  

Output figures and tables:  
* Figure 1e  
* Table for Supplementary Data 4  

Original notebook (memo for my future tracing):  
* dalek:\[JupyterLab HOME\]/220621_Multiomics-BMI-NatMedRevision/220803_Multiomics-BMI-NatMed1stRevision_BMI-regression_NumericFeatures.ipynb  

In [1]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
#For Arial font
#!conda install -c conda-forge -y mscorefonts
##-> The below was also needed in matplotlib 3.4.2
#import shutil
#import matplotlib
#shutil.rmtree(matplotlib.get_cachedir())
import warnings
warnings.filterwarnings('ignore')
from IPython.display import display
import time

from sklearn.preprocessing import StandardScaler
import statsmodels.formula.api as smf
from statsmodels.stats import multitest as multi

!conda list

# packages in environment at /opt/conda/envs/arivale-py3:
#
# Name                    Version                   Build  Channel
_libgcc_mutex             0.1                 conda_forge    conda-forge
_openmp_mutex             4.5                       1_gnu    conda-forge
analytics                 0.1                      pypi_0    pypi
argon2-cffi               21.1.0           py39h3811e60_0    conda-forge
arivale-data-interface    0.1.0                    pypi_0    pypi
async_generator           1.10                       py_0    conda-forge
atk-1.0                   2.36.0               h3371d22_4    conda-forge
attrs                     21.2.0             pyhd8ed1ab_0    conda-forge
backcall                  0.2.0              pyh9f0ad1d_0    conda-forge
backports                 1.0                        py_2    conda-forge
backports.functools_lru_cache 1.6.4              pyhd8ed1ab_0    conda-forge
biopython                 1.79             py39h3811e60_0    conda-forge
bleach 

## 1. Processing for OLS linear regression

### 1-1. Prepare datasets

In [None]:
#Import cleaned table for baseline measured and biological BMIs
fileDir = './ExportData/'
ipynbName = '220803_Multiomics-BMI-NatMed1stRevision_DeltaBMI-misclassification_'
fileName = 'biologicalBMI-baseline-summary-BothSex.tsv'
tempDF = pd.read_csv(fileDir+ipynbName+fileName, sep='\t', dtype={'public_client_id': str})
tempDF = tempDF.set_index('public_client_id')

#Clean to handle easier in this notebook
tempDF.columns = tempDF.columns.str.replace('Base', '')

display(tempDF)
print('NaN in DF:', tempDF.isnull().to_numpy().sum(axis=None))

bmiDF = tempDF

In [None]:
#Import the cleaned DF of baseline numeric features
fileDir = '../210104_Biological-BMI-paper/ExportData/'
ipynbName = '210106_Biological-BMI-paper_data-cleaning_OLS-regression_'
fileName = 'numDF.tsv'
tempDF = pd.read_csv(fileDir+ipynbName+fileName, sep='\t', dtype={'public_client_id': str})
tempDF = tempDF.set_index('public_client_id')

#Clean to handle easier in this notebook
tempDF = tempDF.rename(columns={'BaseAge':'Age'})
tempDF = tempDF.drop(columns=['Race'])#Race has NaN in this cohort

#Eliminate the exceptional feature that should have been eliminated during data cleaning
feature = 'activities.calories'
tempDF = tempDF.drop(columns=[feature])

display(tempDF)
print('NaN in DF:', tempDF.isnull().to_numpy().sum(axis=None))

numDF = tempDF

In [None]:
#Prepare the shortened feature label for visualization
fileDir = '../210104_Biological-BMI-paper/ImportData/'
fileName = '210727_feature-label-correspondence.csv'
tempDF = pd.read_csv(fileDir+fileName)
tempDF = tempDF.rename(columns={'OriginalName':'FeatureID', 'LabelForFigure':'FeatureLabel'})
tempDF = tempDF.set_index('FeatureID')

display(tempDF)
print(' -> Unique label:', len(tempDF['FeatureLabel'].unique()))

numDF_meta = tempDF

In [None]:
#Define the list of covariates
covarL = ['Sex', 'Age', 'PC1', 'PC2', 'PC3', 'PC4', 'PC5']

### 1-2. Split DF for each regression, and then drop NaN

> Sample size is different b/w numeric fetuares.  
> ***–> To maximize the sample size for each regression, dropping NaN is performed after splitting.***  

In [None]:
featureL = numDF.drop(columns=covarL).columns.tolist()

#Split DF for each regression and drop NaN
DF_splitL = []
for feature in featureL:
    tempL = [col_n for sublist in [[feature], covarL] for col_n in sublist]
    tempDF = numDF[tempL].dropna()
    DF_splitL.append(tempDF)

print('The number of features:', len(DF_splitL))

In [None]:
#Check examples for confirmation
print(featureL[0])
display(DF_splitL[0])
print(featureL[1])
display(DF_splitL[1])
print(featureL[-1])
display(DF_splitL[-1])

In [None]:
#Check each sample size
tempL = []
for feature_i in range(len(featureL)):
    tempL.append(len(DF_splitL[feature_i]))
tempS = pd.Series(tempL, index=featureL)
tempS = tempS.sort_values(ascending=False)

display(tempS)
display(tempS.describe())

#Distribution
sns.set(style='ticks', font='Arial', context='notebook')
plt.figure(figsize=(4, 3))
sns.distplot(tempS)
sns.despine()
plt.xlabel('Sample size')
plt.ylabel('Density')
plt.show()

### 1-3. Eliminate features with small sample size

> No need for further elimination in this study cohort!  

### 1-4. Standardization of continuous numeric features and covariates

> Because all features are continuous numeric features this time, age is also standardized.

In [None]:
tempL = []
for feature_i in range(len(featureL)):
    tempDF = DF_splitL[feature_i]
    #Z-score transformation
    tempDF1 = tempDF.select_dtypes(include=[np.number])
    scaler = StandardScaler(copy=True, with_mean=True, with_std=True)
    tempA = scaler.fit_transform(tempDF1)#Column direction
    tempDF1 = pd.DataFrame(data=tempA, index=tempDF1.index, columns=tempDF1.columns)
    ##Recover categorical covariates
    tempDF2 = tempDF.select_dtypes(exclude=[np.number])
    tempDF = pd.merge(tempDF1, tempDF2, left_index=True, right_index=True, how='left')
    tempL.append(tempDF)
DF_splitL = tempL#Update/overwrite

In [None]:
#Confirmation
tempL = []
for feature_i in range(len(featureL)):
    tempL.append(DF_splitL[feature_i].loc[:, featureL[feature_i]])
display(pd.concat(tempL, axis=1).describe())#Length is different but enforce merging to see the summary

#Check distribution of some example numeric features
sns.set(style='ticks', font='Arial', context='notebook')
plt.figure(figsize=(4, 3))
for feature_i in range(0, len(tempL), round(len(tempL)/4)):
    sns.distplot(tempL[feature_i], label=tempL[feature_i].name)
sns.despine()
plt.xlabel('Z-score')
plt.ylabel('Density')
plt.legend(bbox_to_anchor=(1, 0.5), loc='center left', borderaxespad=1)
plt.show()

### 1-5. One-hot encoding for categorical covariates

> category_encoders is more useful way than using sklearn.preprocessing or pandas.get_dummies.  
> ***–> In statsmodels, categorical variables are automatically recognized!***  
> –> Hence, this step is not needed anymore.

In [None]:
#Check final DFs of examples for confirmation
print(featureL[0])
display(DF_splitL[0])
print(featureL[1])
display(DF_splitL[1])
print(featureL[-1])
display(DF_splitL[-1])

## 2. Regression analysis

### 2-1. Perform OLS linear regression

> Model: (b)BMI ~ b0 + b1\*Feature + b2\*Sex + b3\*Age + b4\*AncestryPCs  
> Main aim: Independently find associated features with BMI or biological BMI while adjusting sex, age and ancestry PCs as covariates  

In [None]:
tempL = ['BMI', 'MetBMI', 'ProtBMI', 'ChemBMI', 'CombiBMI']

#Initialize the result summary DF
olsDF = pd.DataFrame(index=pd.Index(featureL, name='FeatureID'))
olsDF = pd.merge(olsDF, numDF_meta['FeatureLabel'],
                 left_index=True, right_index=True, how='left')
olsDF['N'] = 1#Dummy int to keep the 2nd column, because it's same b/w BMI types in this cohort
olsDF['DoF'] = 1#Dummy int to keep the 3rd column, because it's same b/w BMI types in this cohort

#Perform OLS linear regression
for bmi in tempL:
    tempL1 = []#For R2 in the univariate model
    tempL2 = []#For R2
    tempL3 = []#For beta-coefficient
    tempL4 = []#For SE of beta-coefficient
    tempL5 = []#For 95% CI lower point of beta-coefficient (to use during visualization)
    tempL6 = []#For 95% CI higher point of beta-coefficient (to use during visualization)
    tempL7 = []#For t-statistic
    tempL8 = []#For P-value
    tempL9 = []#For sample size
    tempL10 = []#For residual degrees of freedom
    t_start = time.time()
    for feature_i in range(len(featureL)):
        feature = featureL[feature_i]
        #Prepare DF
        tempDF = DF_splitL[feature_i]
        tempS = bmiDF['log_'+bmi]#Unstandardized
        tempDF = pd.merge(tempDF, tempS, left_index=True, right_index=True, how='left')
        #Rename dependent/independent variables
        tempDF = tempDF.rename(columns={'log_'+bmi:'log_BMI', feature:'Feature'})
        ##Add a constant for the intercept -> Similar to R, smf automatically add a constant
        
        #Fit univariate model
        formula = 'log_BMI ~ Feature'
        fit_res = smf.ols(formula, data=tempDF).fit()
        #Save R2 [%]
        tempL1.append(fit_res.rsquared*100)
        
        #Fit full model
        formula = 'log_BMI ~ Feature + C(Sex) + Age + PC1 + PC2 + PC3 + PC4 + PC5'
        fit_res = smf.ols(formula, data=tempDF).fit()
        #Save R2 [%]
        tempL2.append(fit_res.rsquared*100)
        #Save beta-coefficient of the variable
        tempL3.append(fit_res.params['Feature'])
        tempL4.append(fit_res.bse['Feature'])
        tempL5.append(fit_res.conf_int(alpha=0.05).loc['Feature', 0])
        tempL6.append(fit_res.conf_int(alpha=0.05).loc['Feature', 1])
        #Save t-statistic of the variable
        tempL7.append(fit_res.tvalues['Feature'])
        #Save P-value of the variable
        tempL8.append(fit_res.pvalues['Feature'])
        
        #Save the common values b/w BMI types
        tempL9.append(len(tempDF))#Same b/w BMI types in this cohort
        tempL10.append(int(fit_res.df_resid))#Same b/w BMI types in this cohort
    t_elapsed = time.time() - t_start
    print('Elapsed time for', len(featureL), 'OLS linear regressions of log_'+bmi+':',
          round(t_elapsed//60), 'min', round(t_elapsed%60, 1), 'sec')
    
    #Clean the results
    tempDF = pd.DataFrame({bmi+'_UnivarR2':tempL1, bmi+'_R2':tempL2,
                           bmi+'_Bcoef':tempL3, bmi+'_BcoefSE':tempL4,
                           bmi+'_BcoefCIlow':tempL5, bmi+'_BcoefCIhigh':tempL6,
                           bmi+'_tStat':tempL7, bmi+'_Pval':tempL8},
                          index=pd.Index(featureL, name='FeatureID'))
    ##P-value adjustment (within each BMI type) by using Benjamini–Hochberg method
    tempDF[bmi+'_AdjPval_within'] = multi.multipletests(tempDF[bmi+'_Pval'], alpha=0.05, method='fdr_bh',
                                                        is_sorted=False, returnsorted=False)[1]
    ##Add dummy column for now
    tempDF[bmi+'_AdjPval_all'] = 1.0#Dummy value
    
    #Merge the cleaned results
    olsDF = pd.merge(olsDF, tempDF, left_index=True, right_index=True)

#P-value adjustment (across all tests) by using Benjamini–Hochberg method
tempL1 = olsDF.loc[:, olsDF.columns.str.contains('_Pval')].columns.tolist()
tempDF = olsDF.reset_index().melt(var_name='BMItype', value_name='Pval', value_vars=tempL1,
                                  id_vars=['FeatureID'])
tempDF['AdjPval'] = multi.multipletests(tempDF['Pval'], alpha=0.05, method='fdr_bh',
                                        is_sorted=False, returnsorted=False)[1]
tempDF = tempDF.pivot(index='FeatureID', columns='BMItype', values='AdjPval')
tempDF.columns =tempDF.columns.str.replace('_Pval', '_AdjPval_all')
##Replace the dummy values with the adjusted p-values
tempL1 = olsDF.loc[:, olsDF.columns.str.contains('_AdjPval_all')].columns.tolist()
for col_n in tempL1:
    olsDF[col_n] = tempDF[col_n]

#Clean the result summary table
olsDF['N'] = tempL9#Replace the dummy int values with the last values during the for-loop
olsDF['DoF'] = tempL10#Replace the dummy int values with the last values during the for-loop
olsDF = olsDF.sort_values(by=['BMI_Pval'], ascending=True)

display(olsDF)
#Save
fileDir = './ExportData/'
ipynbName = '220803_Multiomics-BMI-NatMed1stRevision_BMI-regression_NumericFeatures_'
fileName = 'result-summary.tsv'
olsDF.to_csv(fileDir+ipynbName+fileName, sep='\t', index=True)

### 2-2. Significantly associated features

In [None]:
#Significantly associated features
print('Significantly associated features (FDR < 0.05):')
tempD = {'BMI':'0.3', 'MetBMI':'b', 'ProtBMI':'r', 'ChemBMI':'g', 'CombiBMI':'m'}
tempL = []
for bmi in list(tempD.keys()):
    #Extact significantly associated features
    tempDF = olsDF.loc[olsDF[bmi+'_AdjPval_all']<0.05]
    tempL.append(tempDF.index.tolist())
    
    #Cf. Adjustment within each BMI type
    tempDF1 = olsDF.loc[olsDF[bmi+'_AdjPval_within']<0.05]
    print(' - '+bmi+':', len(tempDF), '(cf.', len(tempDF1), 'by the within-adjustment)')
##Flatten and drop multiplicates
tempL = list(set(item for sublist in tempL for item in sublist))
tempDF = olsDF.loc[olsDF.index.isin(tempL)]
display(tempDF)

#Add sample size to feature label
tempDF = tempDF.reset_index()
tempDF['N'] = [f'{item:,}' for item in tempDF['N'].tolist()]
tempDF['FeatureID'] = tempDF['FeatureID'].str.cat(tempDF['N'], sep=' ('+r'$n$'+' = ')
tempDF['FeatureID'] = tempDF['FeatureID'].str.cat(np.repeat(')', len(tempDF)), sep='')
tempDF = tempDF.set_index('FeatureID')

#Visualize R2
tempDF1 = tempDF[[item+'_R2' for item in list(tempD.keys())]].sort_values(by=['BMI_R2'], ascending=False)
tempL = tempDF1.index.tolist()#Save order
tempDF1 = tempDF1.reset_index().melt(var_name='BMItype', value_name='R2', id_vars=['FeatureID'])
tempDF1['BMItype'] = tempDF1['BMItype'].str.replace('_R2', '')
##Style and annotation info
tempL1 = []
tempL2 = []
for row_i in range(len(tempDF1)):
    bmi = tempDF1.iloc[row_i]['BMItype']
    feature = tempDF1.iloc[row_i]['FeatureID']
    #Association
    if tempDF.loc[feature, bmi+'_Bcoef']>0:
        tempL1.append('Positive association')
    elif tempDF.loc[feature, bmi+'_Bcoef']<0:
        tempL1.append('Negative association')
    else:#just in case
        tempL1.append('No association')
    #Significance
    if tempDF.loc[feature, bmi+'_AdjPval_all']<0.05:
        tempL2.append('FDR<0.05')
    else:
        tempL2.append('n.s.')
tempDF1['Association'] = tempL1
tempDF1['Signif'] = tempL2
##Plot
sns.set(style='ticks', font='Arial', context='talk')
plt.figure(figsize=(7.5, 18))
p = sns.barplot(data=tempDF1, x='R2', y='FeatureID', order=tempL, hue='BMItype', hue_order=tempD.keys(),
                palette=tempD, dodge=True, edgecolor='black', linewidth=1)
p.grid(axis='x', linestyle='--', color='black')
for rect, row_i in zip(p.patches, range(len(tempDF1))):
    xcoord = rect.get_width()
    ycoord = rect.get_y() + rect.get_height()/2
    #Add annotation
    label = tempDF1.iloc[row_i]['Signif']
    bmi = tempDF1.iloc[row_i]['BMItype']
    if label=='n.s.':
        text_offset = 50
        hue_order = list(tempD.keys()).index(bmi)
        if xcoord > 0:
            if hue_order%2 == 0:
                offset = +text_offset/2
            else:
                offset = +text_offset
            halign = 'left'
        else:
            if hue_order%2 == 0:
                offset = -text_offset/2
            else:
                offset = -text_offset
            halign = 'right'
        p.annotate(label, xy=(xcoord, ycoord), xytext=(offset, 0), textcoords='offset points',
                   horizontalalignment=halign, verticalalignment='center',
                   fontsize='x-small', color='black',
                   arrowprops={'arrowstyle':'-', 'color':tempD[bmi], 'linewidth':1,
                               'shrinkA':offset/25, 'shrinkB':offset/5})
sns.despine()
plt.xlabel('Ratio of explained variance [%]')
plt.ylabel('')
for ycoord in range(len(tempDF)):
    if ycoord%2 == 0:
        plt.axhspan(ymin=ycoord-0.5, ymax=ycoord+0.5, facecolor='k', alpha=0.2)
plt.margins(0.02, 0.005, tight=True)
plt.legend(loc='lower right')
plt.show()

#Visualize beta-coefficient
tempDF1 = tempDF[[item+'_Bcoef' for item in list(tempD.keys())]].sort_values(by=['BMI_Bcoef'], ascending=False)
tempL = tempDF1.index.tolist()#Save order
tempDF1 = tempDF1.reset_index().melt(var_name='BMItype', value_name='Bcoef', id_vars=['FeatureID'])
tempDF1['BMItype'] = tempDF1['BMItype'].str.replace('_Bcoef', '')
##Annotation info
tempL1 = []
tempL2 = []
for row_i in range(len(tempDF1)):
    bmi = tempDF1.iloc[row_i]['BMItype']
    feature = tempDF1.iloc[row_i]['FeatureID']
    #SE
    tempL1.append(tempDF.loc[feature, bmi+'_BcoefSE'])
    #Significance
    if tempDF.loc[feature, bmi+'_AdjPval_all']<0.05:
        tempL2.append('FDR<0.05')
    else:
        tempL2.append('n.s.')
tempDF1['BcoefSE'] = tempL1
tempDF1['Signif'] = tempL2
##Plot
sns.set(style='ticks', font='Arial', context='talk')
plt.figure(figsize=(7.5, 18))
p = sns.barplot(data=tempDF1, x='Bcoef', y='FeatureID', order=tempL, hue='BMItype', hue_order=tempD.keys(),
                palette=tempD, dodge=True, edgecolor='white', linewidth=0.5, saturation=1)
p.grid(axis='x', linestyle='--', color='black')
for rect, row_i in zip(p.patches, range(len(tempDF1))):
    xcoord = rect.get_width()
    ycoord = rect.get_y() + rect.get_height()/2
    #Add SEM
    sem = tempDF1.iloc[row_i]['BcoefSE']
    plt.errorbar(x=xcoord, y=ycoord, xerr=sem, fmt='none', ecolor='k', elinewidth=1, capsize=2)
    #Add annotation
    label = tempDF1.iloc[row_i]['Signif']
    bmi = tempDF1.iloc[row_i]['BMItype']
    if label=='n.s.':
        text_offset = 50
        hue_order = list(tempD.keys()).index(bmi)
        if xcoord > 0:
            if hue_order%2 == 0:
                offset = +text_offset/2
            else:
                offset = +text_offset
            halign = 'left'
            xcoord = xcoord + sem
        else:
            if hue_order%2 == 0:
                offset = -text_offset/2
            else:
                offset = -text_offset
            halign = 'right'
            xcoord = xcoord - sem
        p.annotate(label, xy=(xcoord, ycoord), xytext=(offset, 0), textcoords='offset points',
                   horizontalalignment=halign, verticalalignment='center',
                   fontsize='x-small', color='black',
                   arrowprops={'arrowstyle':'-', 'color':tempD[bmi], 'linewidth':1,
                               'shrinkA':offset/25, 'shrinkB':offset/5})
sns.despine()
plt.xlabel(r'$\beta$'+'-coefficient')
plt.ylabel('')
for ycoord in range(len(tempDF)):
    if ycoord%2 == 0:
        plt.axhspan(ymin=ycoord-0.5, ymax=ycoord+0.5, facecolor='k', alpha=0.2)
plt.margins(0.02, 0.005, tight=True)
plt.legend(loc='lower right')
plt.show()

### 2-3. For paper figure

> Basically, P < 0.05 should be described. However, because of tiny figure, it is abbreviated and the exact P-value will be provided as a supplementary table.  

In [None]:
#Significantly associated features
print('Significantly associated features (FDR < 0.05):')
tempD = {'BMI':'0.3', 'MetBMI':'b', 'ProtBMI':'r', 'ChemBMI':'g', 'CombiBMI':'m'}
tempL = []
for bmi in list(tempD.keys()):
    #Extact significantly associated features
    tempDF = olsDF.loc[olsDF[bmi+'_AdjPval_all']<0.05]
    tempL.append(tempDF.index.tolist())
    print(' - '+bmi+':', len(tempDF))
##Flatten and drop multiplicates
tempL = list(set(item for sublist in tempL for item in sublist))
tempDF = olsDF.loc[olsDF.index.isin(tempL)]

#Add sample size to feature label
tempDF = tempDF.reset_index()
tempDF['N'] = [f'{item:,}' for item in tempDF['N'].tolist()]
tempDF['FeatureLabel'] = tempDF['FeatureLabel'].str.cat(tempDF['N'], sep='\n('+r'$n$'+' = ')
tempDF['FeatureLabel'] = tempDF['FeatureLabel'].str.cat(np.repeat(')', len(tempDF)), sep='')
tempDF = tempDF.set_index('FeatureLabel')

#Visualize beta-coefficient with forest plot
##Sort feature
tempDF1 = tempDF[[item+'_Bcoef' for item in tempD.keys()]].sort_values(by=['BMI_Bcoef'], ascending=False)
tempL = tempDF1.index.tolist()#Save order
##Clean DF for plot
tempDF1 = tempDF.reset_index().melt(var_name='BMItype', value_name='Bcoef', id_vars=['FeatureLabel'],
                                    value_vars=[item+'_Bcoef' for item in tempD.keys()])
tempDF1['BMItype'] = tempDF1['BMItype'].str.replace('_Bcoef', '')
tempDF2 = tempDF.reset_index().melt(var_name='BMItype', value_name='BcoefCIlow', id_vars=['FeatureLabel'],
                                    value_vars=[item+'_BcoefCIlow' for item in tempD.keys()])
tempDF2['BMItype'] = tempDF2['BMItype'].str.replace('_BcoefCIlow', '')
tempDF1 = pd.merge(tempDF1, tempDF2, on=['FeatureLabel', 'BMItype'], how='left')
tempDF2 = tempDF.reset_index().melt(var_name='BMItype', value_name='BcoefCIhigh', id_vars=['FeatureLabel'],
                                    value_vars=[item+'_BcoefCIhigh' for item in tempD.keys()])
tempDF2['BMItype'] = tempDF2['BMItype'].str.replace('_BcoefCIhigh', '')
tempDF1 = pd.merge(tempDF1, tempDF2, on=['FeatureLabel', 'BMItype'], how='left')
##Convert CI points to difference values
tempDF1['BcoefCIlow'] = tempDF1['Bcoef'] - tempDF1['BcoefCIlow']
tempDF1['BcoefCIhigh'] = tempDF1['BcoefCIhigh'] - tempDF1['Bcoef']
##Significance annotation
tempL1 = []
for row_i in range(len(tempDF1)):
    bmi = tempDF1['BMItype'].iloc[row_i]
    feature = tempDF1['FeatureLabel'].iloc[row_i]
    #Significance
    if tempDF.loc[feature, bmi+'_AdjPval_all']<0.001:
        tempL1.append('***')
    elif tempDF.loc[feature, bmi+'_AdjPval_all']<0.01:
        tempL1.append('**')
    elif tempDF.loc[feature, bmi+'_AdjPval_all']<0.05:
        tempL1.append('*')
    else:
        tempL1.append('n.s.')
tempDF1['Signif'] = tempL1
##Color for edge
tempDF1['EdgeColor'] = tempDF1['BMItype'].map(tempD)
##Plot
ncols = 2
sns.set(style='ticks', font='Arial', context='talk')
fig, axes = plt.subplots(nrows=1, ncols=ncols, figsize=(14, 11), sharex=False, sharey=False)
for ax_i, ax in enumerate(axes.flat):
    half = int(len(tempL)/ncols)
    tempL1 = tempL[half*ax_i:half*(ax_i+1)]
    tempDF2 = tempDF1.loc[tempDF1['FeatureLabel'].isin(tempL1)].reset_index()
    #Point
    sns.pointplot(data=tempDF2, x='Bcoef', y='FeatureLabel', order=tempL1,
                  hue='BMItype', hue_order=tempD.keys(), palette=tempD,
                  dodge=0.7, join=False, ci=None, markers='o', scale=0.6, ax=ax)
    #Get coordinate of each point
    xcoordL = []
    ycoordL = []
    for coord in ax.collections:
        for x, y in coord.get_offsets():
            xcoordL.append(x)
            ycoordL.append(y)
    #Add errorbars and annotation
    for point_i in range(len(tempDF2)):
        #Be careful about order of ax.collections: tempL -> tempD.keys()
        label = tempL1[point_i%len(tempL1)]
        bmi = list(tempD.keys())[point_i//len(tempL1)]
        row_i = tempDF2.loc[(tempDF2['FeatureLabel']==label)&(tempDF2['BMItype']==bmi)].index.tolist()[0]
        #Add errorbars manually
        ax.errorbar(x=xcoordL[point_i], y=ycoordL[point_i],
                    xerr=[[tempDF2['BcoefCIlow'].iloc[row_i]], [tempDF2['BcoefCIhigh'].iloc[row_i]]],
                    fmt='', ecolor=tempDF2['EdgeColor'].iloc[row_i], elinewidth=4, capsize=0, capthick=4,
                    linestyle='', zorder=1)
        #Add significance
        signif = tempDF2['Signif'].iloc[row_i]
        if signif!='n.s.':
            text_offset = 5
            if xcoordL[point_i] > 0:
                xoffset = +text_offset
                halign = 'left'
                xcoord = xcoordL[point_i] + tempDF2['BcoefCIhigh'].iloc[row_i]
            else:
                xoffset = -text_offset
                halign = 'right'
                xcoord = xcoordL[point_i] - tempDF2['BcoefCIlow'].iloc[row_i]
            ycoord = ycoordL[point_i]
            yoffset = -4#Because asterisk looks upper shift
            ax.annotate(signif, xy=(xcoord, ycoord), xytext=(xoffset, yoffset), textcoords='offset points',
                        horizontalalignment=halign, verticalalignment='center',
                        fontsize='medium', color=tempD[bmi], zorder=2)
    #Add reference line
    #p.grid(axis='x', linestyle='--', color='k')
    ax.axvline(x=0, **{'linestyle':'--', 'color':'k', 'zorder':0})
    #Add shading
    for ycoord in range(len(tempL1)):
        if ycoord%2 == 0:
            ax.axhspan(ymin=ycoord-0.5, ymax=ycoord+0.5, facecolor='k', alpha=0.2, zorder=0)
    #Ax-dependent setting
    if ax_i == 0:
        #Range
        ax.set(xlim=(-0.02, 0.22), xticks=np.arange(0.0, 0.21, 0.1))
        #Add legend
        ax.get_legend().remove()
    elif ax_i ==1:
        #Range
        ax.set(xlim=(-0.12, 0.12), xticks=np.arange(-0.1, 0.11, 0.1))
        #Add legend
        ax.legend(title='BMI type', loc='lower right', handlelength=1.0, handletextpad=0.5)
sns.despine()
plt.setp(axes, ylim=(len(tempL1)-0.5, -0.5))#Otherwise, axes are extended
plt.setp(axes, xlabel='', ylabel='')
fig.tight_layout()
fig.text(x=0.6175, y=0.0175,#Manual adjustment
         s=r'$\beta$'+'-coefficient [log-scaled measured or omics-inferred BMI (kg m'+r'$^{-2}$'+') per s.d.]',
         fontsize='medium', verticalalignment='top', horizontalalignment='center')
##Save
fileDir = './ExportFigures/'
ipynbName = '220803_Multiomics-BMI-NatMed1stRevision_BMI-regression_NumericFeatures_'
fileName = 'bcoef.tif'
plt.gcf().savefig(fileDir+ipynbName+fileName, dpi=300, bbox_inches='tight', pad_inches=0.04,
                  pil_kwargs={'compression':'tiff_lzw'})
plt.show()

# — End of this notebook —