In [None]:
# data storage and handling
import pyreadr
import pandas as pd
import numpy as np

# plotting
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# Translate from Dutch
from googletrans import Translator
translator = Translator()

pd.set_option('display.max_rows', 500)

### RECODE ETHNICITY

In [None]:
# Define position
datadir = "" # <== w - JAMA Psych rev 1

# Read in original ethnicity variables and new ones 
o = pyreadr.read_r(f'{datadir}/PCM_allvars.rds')[None]  # Dataset with ELS variables
e = pd.read_spss(f'{datadir}/PARENTALBIRTHCOUNTRY_15102018.sav') # Ethnicity data

# Merge
d = o.merge(e, how='left',on='IDM')

print(d.shape)
# (d.shape[0] - d.isna().sum()) # observation count

In [None]:
# Follow the same approach as original paper: if both parents are born abroad take mother nationality
d['child_ethn'] = [np.nan if (pd.isna(mom)& pd.isna(dad))
                   else 'Nederland' if mom==dad=='Nederland' 
                   else dad if (pd.isna(mom) or mom=='Nederland') & ~(pd.isna(dad) or dad=='Anders, zie tekstveld')
                   else mom 
                   for mom,dad in zip(d['GBLMOE_C'],d['GBLVAD_C'])]

# Recode east/west and "old" Germany as Germany and "old" Russia as Russia
d.loc[d['child_ethn'].isin(['Duitsland (oud)','Bondsrepubliek Duitsland','Duitse Democr. Rep']),'child_ethn']='Duitsland'
d.loc[d['child_ethn']=='Rusland (oud)','child_ethn'] = 'Rusland'

# Group contries according groups defined in dictionary
def reassign_child(var_name, ethn_dict):
    d[var_name] = [[k for k, v in ethn_dict.items() if x in v][0] if isinstance(x, str) else np.nan 
                      for x in d['child_ethn'] ]
    summ = d[var_name].value_counts(dropna=False)
    for e in range(summ.shape[0]):
        name = str(summ.index[e])
        sep = '\t\t' if len(name)> 15 else '\t\t\t' if len(name) > 7 else '\t\t\t\t'
        print(name+sep+str(summ.iloc[e])+' ('+str(round(summ.iloc[e]/summ.sum()*100,1))+'%)')


In [None]:
# ORIGINAL (GENR) ETHNICITY ENCODING
reass_old = {'Dutch':['Nederland'],
           'Turkish':['Turkije'],
        'Surinamese':['Suriname'],
          'Moroccan':['Marokko'],
      'Cape Verdean':['Kaapverdische Eilanden'],
   'Dutch Antillean':['Nederlandse Antillen'],
            'Europe':['Duitsland','Belgie','Frankrijk','Zwitserland','Oostenrijk','Luxemburg','Monaco',
                      'Grootbrittannie','Ierland','Zweden','Noorwegen','Denemarken','Finland','Man','IJsland',
                      'Estland','Letland','Litouwen',
                      'Portugal','Spanje','Italie','Griekenland','Canarische eilanden','Madeira-eilanden',
                      'Polen','Tsjechie','Oekra‹ne','Rusland','Wit-Rusland','Tsjetsjenië','Moldavie','Hongarije',
                      'Bulgarije','Roemenie','Slowakije','Slovenie','Joegoslavie','Bosnie-Herzegovina','Kroatie',
                      'Servië-Montenegro','Kosovo','Macedonie','Albanie'], 
  'Asia and Oceania':['China','Hongkong','Taiwan','Macau','Zuid-Korea','Korea','Japan','Kazachstan','Turkmenistan',
                      'Afghanistan','Pakistan','India','Sri Lanka','Nepal','Bangladesh',
                      'Thailand','Vietnam','Maleisie','Singapore','Oost-Timor','Filippijnen','Indonesie',
                      'Australie','Nieuw-Zeeland','Ned. Nieuwguinea','Vanuatu'],                        
'Africa and Middle East':['Algerije','Tunesie','Egypte','Soedan','Eritrea',
                      'Mauritanie','Mali','Senegal','Gambia','Guinee Bissau','Guinee','Sierra Leone','Liberia',
                      'Ivoorkust','Ghana','Togo','Benin','Nigeria','Kameroen','Gabon','São Tom‚ and Principe',
                      'Kongo','Kongo Kinshasa','Frans Kongo','Zaire','Dem. Rep. Congo','Burundi','Rwanda','Uganda',
                      'Ethiopie','Somalie','Kenia','Tanzania','Malawi','Mozambique','Zimbabwe','Mauritius','Angola',
                      'Zuid-Afrika','Afrika',
                      'Syrie','Irak','Iran','Georgie','Armenie','Azerbeidzjan','Libanon','Israel','Palestina',
                      'Saoedi-Arabie','Koeweit','Bahrein','Adjman','Jemen'],
     'North America':['Ver. Staten v. Amerika','Canada'],
     'Latin America':['Mexico','Belize','El Salvador','Costa Rica','Cuba','Haiti','Dominicaanse Republiek',
                      'Guadeloupe','Martinique','Trinidad en Tobago',
                      'Ecuador','Colombia','Venezuela','Guyana','Peru','Brazilie','Bolivia','Paraguay',
                      'Chili','Argentinie']}

reassign_child('ethn_genr', reass_old)

In [None]:
d['ethn_genr'].value_counts().index

In [None]:
# Define reassignment 
reass_new = { # Western Europe = (e.g. Greece, Sweden, United Kingdom) ---------------------------------------
             'Dutch':['Nederland'],
    'Western Europe':['Duitsland','Belgie','Frankrijk','Zwitserland','Oostenrijk','Luxemburg','Monaco',
                      'Grootbrittannie','Ierland','Zweden','Noorwegen','Denemarken','Finland','Man','IJsland',
                      'Estland','Letland','Litouwen',
                      'Portugal','Spanje','Italie','Griekenland','Canarische eilanden','Madeira-eilanden'], 
#   'Western Europe':['Nederland','Duitsland','Belgie','Frankrijk','Zwitserland','Oostenrijk','Luxemburg','Monaco'],
#  'Northern Europe':['Grootbrittannie','Ierland','Zweden','Noorwegen','Denemarken','Finland','Man','IJsland',
#                       'Estland','Letland','Litouwen'], 
#  'Southern Europe':['Portugal','Spanje','Italie','Griekenland','Canarische eilanden','Madeira-eilanden'], 
    'Eastern Europe':['Polen','Tsjechie','Oekra‹ne','Rusland','Wit-Rusland','Tsjetsjenië','Moldavie','Hongarije',
                      'Bulgarije','Roemenie','Slowakije','Slovenie','Joegoslavie','Bosnie-Herzegovina','Kroatie',
                      'Servië-Montenegro','Kosovo','Macedonie','Albanie'], # (e.g. Hungary, Poland, Russia)
           # AFRICA ---------------------------------------------------------------------------------------
      'North Africa':['Marokko','Algerije','Tunesie','Egypte','Soedan','Eritrea', # (e.g. Egypt, Morocco, Sudan)
                      'Kaapverdische Eilanden'],
'Sub-Saharan Africa':['Mauritanie','Mali','Senegal','Gambia','Guinee Bissau','Guinee','Sierra Leone','Liberia',
                      'Ivoorkust','Ghana','Togo','Benin','Nigeria','Kameroen','Gabon','São Tom‚ and Principe',
                      'Kongo','Kongo Kinshasa','Frans Kongo','Zaire','Dem. Rep. Congo','Burundi','Rwanda','Uganda',
                      'Ethiopie','Somalie','Kenia','Tanzania','Malawi','Mozambique','Zimbabwe','Mauritius','Angola',
                      'Zuid-Afrika','Afrika'], #  (e.g. Kenya, Nigeria, South Africa)
            # ASIA ----------------------------------------------------------------------------------------
 'West Asia / Middle East':['Turkije','Syrie','Irak','Iran', # (e.g. Iran, Israel, Saudi Arabia)
                            'Georgie','Armenie','Azerbeidzjan','Libanon','Israel','Palestina',
                            'Saoedi-Arabie','Koeweit','Bahrein','Adjman','Jemen'], 
'South and Southeast Asia':['Afghanistan','Pakistan','India','Sri Lanka','Nepal','Bangladesh',#  (e.g. India, Indonesia, Singapore)
                            'Thailand','Vietnam','Maleisie','Singapore','Indonesie','Oost-Timor','Filippijnen'],
   'East and Central Asia':['China','Hongkong','Taiwan','Macau','Zuid-Korea','Korea','Japan',
                            'Kazachstan','Turkmenistan'], #  (e.g. China, Japan, Uzbekistan)
         # OCEANIA ---------------------------------------------------------------------------------------
 'Pacific / Oceania':['Australie','Nieuw-Zeeland','Ned. Nieuwguinea','Vanuatu'], # (e.g. Australia, Papua New Guinea, Fiji)
         # AMERICA ---------------------------------------------------------------------------------------
                'North America':['Ver. Staten v. Amerika','Canada'], # (Canada, United States)
'Central America and Caribbean':['Mexico','Belize','El Salvador','Costa Rica',#  (e.g. Jamaica, Mexico, Panama)
                                 'Cuba','Haiti','Dominicaanse Republiek','Nederlandse Antillen',
                                 'Guadeloupe','Martinique','Trinidad en Tobago'], 
                'South America':['Ecuador','Colombia','Venezuela','Guyana','Suriname',# (e.g. Brazil, Chile, Colombia)
                                 'Peru','Brazilie','Bolivia','Paraguay','Chili','Argentinie']}
# Perform the reassignment
reassign_child('ethn_jcap', reass_new)

In [None]:
# Migration backgroud
d['child_migr'] = ['' if pd.isna(mom) & pd.isna(dad)
                   else 0 if mom==dad=='Nederland' 
                   else 1 if (mom=='Nederland' or dad=='Nederland') 
                   else 2
                   for mom,dad in zip(d['GBLMOE_C'],d['GBLVAD_C'])]
d['child_migr'].value_counts(dropna=False)

In [None]:
dutch_bp = sorted(list(d['child_ethn'].value_counts().index))

# Some countries cannot be translated properly, list exceptions
bad_trans = {'Adjman':'United Arab Emirates',
             'Bahrain':'Saudi Arabia',
             'Chili':'Chile',
             'Frans Guyana':'French Guyana',
             'Frans Kongo':'French Congo',
             'Georgie':'Georgia',
             'Great Britain':'United Kingdom',
             'Irak':'Iraq',
             'Ivory Coast':"Côte d'Ivoire",
             'Kongo':'Congo',
             'Congo Kinshasa':'Congo',
             'Luxemburg':'Luxembourg',
             'Macau':'China',
             'fruit salads':'North Macedonia',
             'Man':'United Kingdom',
             'Ned. Nieuwguinea':'Dutch New Guinea',
             'The Netherlands':'Netherlands',
             'The Savior':'El Salvador',
             'Okra‹ne':'Ukraine',
             'Chechnya':'Russia',
             'Far. States of. America':'United States of America'}

countries = dict()
for bp in dutch_bp:
    eng_bp = translator.translate(bp).text
    # if eng_bp not in list(world.name):
    if eng_bp in bad_trans.keys():
        eng_bp = bad_trans[eng_bp]
    countries[bp]=eng_bp
    
d['child_ethn_eng'] = [countries[x] if isinstance(x, str) else np.nan for x in d['child_ethn'] ]

In [None]:
# SAVE DATASET
d[['IDC','child_ethn','child_ethn_eng','child_migr','ethn_genr','ethn_jcap']].to_csv(f'{datadir}/Ethn_rec.csv')

In [None]:
def count_percat(ethn_dict):
    for k, v in ethn_dict.items():
        print(k)
        print( d.loc[d['child_ethn'].isin(v), 'child_ethn'].value_counts() )
        print('\n')
        
# count_percat(reass_old)
# count_percat(reass_new)

In [None]:
def wherefrom(group):
    w = pd.DataFrame(d.loc[d['ethn_cont']==group, ['GBLMOE_C','GBLVAD_C']].value_counts(dropna=False))
    return w.sort_values('GBLMOE_C')

def wherefrom2(group):
    w = pd.DataFrame(d.loc[d['child_ethn']==group,['GBLMOE_C','GBLVAD_C']].value_counts(dropna=False)).sort_values('GBLMOE_C')
    print(w[0].sum()) 
    return w

# wherefrom('Dutch')
# wherefrom('European')
# wherefrom('Turkish')
# wherefrom('Surinamese')
# wherefrom('Maroccan')
# wherefrom('Dutch Antilles')
# wherefrom('African')
# wherefrom('American, non western')
# wherefrom('American, western')
# wherefrom('Indonesian')
# wherefrom('Oceanie')
# wherefrom('Asian, western')
# wherefrom('Asian, non western')

# wherefrom2('Kaapverdische Eilanden')

### Results 

In [None]:
# Read in data and results 
d = pd.read_csv(f'{datadir}/sample_ethn.csv')

def read_res_sheets(file, respath=datadir):
    xl = pd.ExcelFile(f'{respath}/{file}')
    for sheet in xl.sheet_names:
        # print(sheet)
        globals()[sheet] = xl.parse(sheet).set_index('term') # convert to dataframe

read_res_sheets('ETHN_Results.xlsx')

## FIGURES (REVISION 2)

In [None]:
f1=35
f2=31

In [None]:
def pie_ethn(ax, eth_code, order_dict, pop_last=False):
    counts = d[eth_code].value_counts()
    
    lbls_pie = ["{} ({}%)".format(counts.index[i], float(round(counts[i]/4268*100,1)) ) for i in range(len(counts))]
    if pop_last:
        lbls_pie[-1] = lbls_pie[-1]+'\n'
    
    wedges, _ = ax.pie(counts, colors=[order_dict[i] for i in counts.index], labels=lbls_pie,
                       labeldistance=1.05, textprops=dict(fontsize=f1),
                       # autopct='%1.1f%%', pctdistance=1.15, 
                       # explode=[0.02]*len(counts),
                       wedgeprops=dict(width=0.3), startangle=3)
    
    lbls_leg = ["{} \nN={} ({}%)".format(counts.index[i], counts[i],
                                        float(round(counts[i]/4268*100,1)) ) for i in range(len(counts))]
    ax.legend(wedges, lbls_leg, # title="Ethnicity",
             loc="center left", fontsize=f1,
             bbox_to_anchor=(-0.4, 0, 0, 1))
# ==================================================================================================   
def els_byeth(ax, eth_code, order_dict, x, label):
    # datapoints
    sns.stripplot(ax=ax, x=x, y=eth_code, data=d, order=order_dict.keys(), color='black', marker='s',
                  alpha=0.2, jitter=0.1, size=5)
    # boxplots
    sns.boxplot(ax=ax, x=x, y=eth_code, data=d, orient='h', order=order_dict.keys(), palette=order_dict.values(),
                width=.7, flierprops={'marker': ''}, boxprops={'alpha':.95} )
    # mean line 
    ax.axvline(x = 0, color = 'grey', ls='--', alpha=.8, dashes=(15, 5))
    ax.set_xlim(-1.6,4)
    ax.set_ylabel(''); ax.set_xlabel(' ', fontsize=30);
    ax.set_title(label, fontsize=f1, fontweight='bold', pad=15)
    ax.tick_params(axis='both', which='major', labelsize=f1)
# ==================================================================================================   
def pcm_byeth(ax, eth_code, order_dict):
    dg = d.groupby(['risk_groups_perc', eth_code]).size().unstack(0)
    
    clrs={"Healthy":'seagreen',"High internalizing":'royalblue',"High adiposity":'orange',"Comorbid":'indianred'}
    out_order = ['healthy','internalizing_only','cardiometabolic_only','multimorbid']
    
    df_rel = dg[out_order].div(dg.sum(1), 0)*100
    
    ax = df_rel.loc[order_dict.keys()].plot.barh(stacked=True, ax=ax, color=clrs.values())
    ax.invert_yaxis()
    ax.set_ylabel(''); ax.set_xlabel(' ', fontsize=30);
    ax.set_xlim(-1,101)
    ax.tick_params(axis='both', which='major', labelsize=f1)
    ax.set_xticklabels(['{}%'.format(round(i)) for i in ax.get_xticks()])
    
    ax.legend(clrs.keys(), loc="center right", fontsize=f2, bbox_to_anchor=(0, 1.06, 1.05, 0), 
              ncol=4,columnspacing=0.5)


In [None]:
def regr_eths(out, ax, ins=[9], maxor=19):
    
    if out=='com':
        md1 = pre_com.loc[pre_com['y.level']=='H:multim',].loc['prenatal_stress_z']
        md2 = pos_com.loc[pos_com['y.level']=='H:multim',].loc['postnatal_stress_z']
        xmin = -0.5; xmax= maxor; ref = 1; est = 'OR'
        xlab = 'OR estimate (ref = healthy)'
    else:
        md1 = globals()['pre_'+out].loc['prenatal_stress_z']
        md2 = globals()['pos_'+out].loc['postnatal_stress_z'] 
        xmin = -0.399; xmax= 0.7; ref = 0; est = 'estimate'
        xlab = r'Standardized $\beta $ estimate'
    
    def cis(j, o, m = md1):
        ci = [[m['OR'][j]-m['lci'][j]], [m['uci'][j]-m['OR'][j]]] if o == 'com' else [m['std.error'][j]*1.96]
        return(ci)
    
    #color1 = ['black' if p=='*' else 'silver' for p in md1['sign']]
    #color2 = ['red' if p=='*' else 'silver' for p in md2['sign']]
    
    for n in range(len(md1)):
        ax.errorbar([md1[est][n]], n, xerr = cis(n, out), color='grey', alpha=0.7, 
                    label = 'Prenatal stress',
                    fmt = 'o', ms=16,  elinewidth = 3, capsize = 14)
        ax.errorbar([md2[est][n]], n, xerr = cis(n, out, md2), color='black', alpha=0.7, 
                    label = 'Postnatal stress',
                    fmt = 's', ms=16, elinewidth = 3, capsize = 14)
        ax.errorbar(xmin, [md1['ethn_group'][n]], color = list(eth_ordr.values())[n],
                    fmt = 's', ms=35, alpha=.8)
        
    ax.axvline(x = ref, color = 'k', ls='--', alpha=.8, dashes=(10, 5))
    ax.invert_yaxis()
    ax.set_xlim(xmin,xmax)
    ax.tick_params(axis='both', which='major', labelsize=f1)
    ax.set_xlabel(xlab, fontsize=f1, labelpad=10)
    tit = 'Internalizing' if out=='int' else 'Adiposity' if out=='fat' else 'Comorbidity'
    ax.set_title('\n'+tit, fontsize=f1+5, fontweight='bold', pad=12)
    
    if out=='com':
        for i in ins:
            ax.text(1.5, i+0.2, 'Insufficient data', fontsize=f2, fontstyle='italic', color='red')
        ax.legend(['','Prenatal stress','Postnatal stress'], frameon=False,
                  ncol=1, loc="upper right", fontsize=f1, bbox_to_anchor=(1, 1.1));

In [None]:
eth_code = 'ethn_genr'      
eth_ordr = { 'Dutch':'darkblue',
             'Europe':'lightskyblue',
             'Moroccan':'pink',
             'Turkish':'crimson',
             'Cape Verdean':'sienna',
             'Africa and Middle East':'darkorange', 
             'Surinamese':'forestgreen',
             'Dutch Antillean':'gold',
             'Latin America':'lightgreen',
             'North America':'blueviolet',
             'Asia and Oceania':'turquoise'}

f = plt.figure(figsize=(40, 40), constrained_layout = True)
subfigs = f.subfigures(3, 1, hspace=0, height_ratios=[4, 2.2, 2.5])

axs0 = subfigs[0].subplots(1, 1)
pie_ethn(axs0, eth_code, eth_ordr)

axs1 = subfigs[1].subplots(1, 3, sharey=True)
pcm_byeth(axs1[0], eth_code, eth_ordr)
els_byeth(axs1[1], eth_code, eth_ordr, 'prenatal_stress_z','Prenatal ELS (z-score)')
els_byeth(axs1[2], eth_code, eth_ordr, 'postnatal_stress_z','Postnatal ELS (z-score)')

axs2 = subfigs[2].subplots(1, 3, sharey=True)
subfigs[2].patch.set_facecolor('whitesmoke')
subfigs[2].suptitle('\nStatistical analyses', fontsize=f1+5, fontstyle='italic')

regr_eths('int',axs2[0])
regr_eths('fat',axs2[1])
regr_eths('com',axs2[2])

subfigs[0].text(0.005, 0.90, 'A.', fontsize=70, fontweight='bold');
subfigs[1].text(0.002, 1.03, 'B.', fontsize=70, fontweight='bold');
subfigs[2].text(0.005, 0.91, 'C.', fontsize=70, fontweight='bold');

f.savefig('Desc_ethn_genr.pdf', dpi=500, bbox_inches='tight')

In [None]:
read_res_sheets('ETHN_Results_supp.xlsx')

In [None]:
pre_com.loc[pre_com['OR']>100, 'OR'] = np.nan
pos_com.loc[pos_com['OR']>100, 'OR'] = np.nan

In [None]:
eth_code = 'ethn_jcap'
eth_ordr = { 'Dutch':'darkblue',
             'Western Europe':'lightskyblue',
             'Eastern Europe':'mediumpurple',
             'North Africa':'sienna',
             'Sub-Saharan Africa':'darkorange',
             'West Asia / Middle East':'crimson',
             'East and Central Asia':'turquoise',
             'South and Southeast Asia':'teal',
             'North America':'pink',
             'Central America and Caribbean':'gold',
             'South America':'lightgreen',
             'Pacific / Oceania':'silver' }

f = plt.figure(figsize=(40, 40), constrained_layout = True)
subfigs = f.subfigures(3, 1, hspace=0, height_ratios=[4, 2.2, 2.5])

axs0 = subfigs[0].subplots(1, 1)
pie_ethn(axs0, eth_code, eth_ordr, pop_last=True)

axs1 = subfigs[1].subplots(1, 3, sharey=True)
pcm_byeth(axs1[0], eth_code, eth_ordr)
els_byeth(axs1[1], eth_code, eth_ordr, 'prenatal_stress_z','Prenatal ELS (z-score)')
els_byeth(axs1[2], eth_code, eth_ordr, 'postnatal_stress_z','Postnatal ELS (z-score)')

axs2 = subfigs[2].subplots(1, 3, sharey=True)
subfigs[2].patch.set_facecolor('whitesmoke')
subfigs[2].suptitle('\nStatistical analyses', fontsize=35, fontstyle='italic')

regr_eths('int',axs2[0])
regr_eths('fat',axs2[1])
regr_eths('com',axs2[2], ins=[2,6,8,11])

subfigs[0].text(0.005, 0.90, 'A.', fontsize=70, fontweight='bold');
subfigs[1].text(0.002, 1.03, 'B.', fontsize=70, fontweight='bold');
subfigs[2].text(0.005, 0.91, 'C.', fontsize=70, fontweight='bold');

f.savefig('Desc_ethn_jcap.pdf', dpi=500, bbox_inches='tight')

In [None]:
d = pd.read_csv(f'{datadir}/per_imp/imp20.csv')

f1 = 24*2
f2 = 20*2
def plot_eth(outc, ax):
    colors=['mediumpurple','lightpink']*2
    pred = ['Prenatal stress']*2+['Postnatal stress']*2
    lbls = ['non-White','White']*2
    
    if outc == 'Comorbidity':
        xlims = [0.9,3.5]
        xticks = [1,1.5,2,2.5,3,3.5]
        ref = 1
        est = 'OR'
        xlab = 'OR estimate (reference = healthy)'
        md = eth_int_comorb.loc[['prenatal_stress_z','postnatal_stress_z']]
        md = md.loc[md['y.level'].str.contains('Comorbidity'),]
        md['LCI'] = md['OR'] - (md['lci']+0.08)
        md['UCI'] = (md['uci']-0.08) - md['OR'] 
    else:
        xlims = [-0.01,0.4]
        xticks = [0,0.1,0.2,0.3,0.4]
        ref = 0
        est = 'estimate'
        xlab = r'Standardized $\beta $ estimate'
        md = eth_int_single.loc[['prenatal_stress_z','postnatal_stress_z']]
        md = md.loc[md['model'].str.contains(outc),]
        
    def cis(j, o):
        ci = [[md['LCI'][j]], [md['UCI'][j]]] if o == 'Comorbidity' else [md['std.error'][j]*1.96]
        # ci = [np.exp(md['std.error'][j]*1.96)] if o == 'Comorbidity' else [md['std.error'][j]*1.96]
        return(ci)

    for n in range(len(md)):
        ax.errorbar([md[est][n]], [pred[n]], xerr = cis(n, outc), color=colors[n], alpha=0.8,
                    label = lbls[n], ms=11, fmt = 's', elinewidth = 2.5, capsize = 10) # , color = colors[n],
    
    ax.set_ylim(-1,2)
    ax.invert_yaxis()
    ax.set_xticks(xticks)
    ax.tick_params(axis='both', which='major', labelsize=f2)
    # add a vertical dotted line indicating beta = 0 or OR = 1.
    ax.axvline(x = ref, ymin = -1, ymax = 4, color = 'grey', ls = '--', lw = 1)
    ax.set_xlim(xlims)
    # Add x-label
    ax.set_xlabel(xlab, fontsize=f2-4, labelpad=10)
    ax.set_title(outc, fontsize=f2, fontweight='bold', pad=15)
    #ax.legend(fontsize=18, loc='best', ncol=1)
    
# ====================================================================================================================
def plot_elsbyeth(y, ax, label):
    ax.axvline(x=d.loc[d.ethnicity==1, y].mean(), color='lightpink', lw=4, alpha=0.3)
    ax.axvline(x=d.loc[d.ethnicity==0, y].mean(), color='mediumpurple', lw=4, alpha=0.3)
    dist = sns.kdeplot(data=d, x=y, hue='ethnicity', shade=True, ax=ax, palette=['mediumpurple','lightpink'])
    ax.set_ylabel('Density', fontsize=20)
    ax.set_title(label, fontsize=f2, fontweight='bold', pad=10)
    ax.set_xlabel(' ', fontsize=f2, fontweight='bold')
    ax.set_xlim([-2,5])
    ax.tick_params(axis='both', which='major', labelsize=20)
    
    handles,_ = dist.get_legend_handles_labels()
    leg = ax.legend(['White','non-White'], fontsize = f2, loc='upper right', ncol=1)
    for lh in leg.legendHandles: 
        lh.set_alpha(1)

        
eth_order = ['American, western','Asian, western', 'Dutch', 'European',
         'African', 'American, non western','Asian, non western', 'Cape Verdian','Dutch Antilles', 'Indonesian',
         'Maroccan','Oceanie','Surinamese','Turkish']
eth_labels = ['American,\nwestern','Asian,\nwestern', 'Dutch', 'European',
         'African', 'American,\nnon western','Asian,\nnon western', 'Cape Verdian','Dutch\nAntilles', 'Indonesian',
         'Maroccan','Oceanie','Surinamese','Turkish']

def plot_ethn(y, ax, label):
    lbls=['Healthy','High internalizing','High adiposity', 'Comorbidity']
    ax.axhline(y=0, color='grey', lw=4, alpha=0.3)
    #ax.axvline(x=3.5, color='grey', lw=4, alpha=0.3)
    sns.boxplot(x='ethn_cont', y=y, hue='risk_groups_perc', data=d, ax=ax, order=eth_order,
                boxprops={'alpha': 0.5}, width=0.6,
                palette=["C2", "C0", "C1", "C3"], showfliers=False)
   
    #ax.grid(axis='y')
    ax.axhline(y=-2.4, xmin=0.01, xmax=0.28, color='lightpink', lw=4)
    ax.axhline(y=-2.4, xmin=0.29, xmax=0.99, color='mediumpurple', lw=4)
    ax.set_xlabel('')
    ax.set_ylabel(label, fontsize=30, fontweight='bold')
    ax.set_ylim([-2.5,6.5])
    ax.set_xticklabels(eth_labels, fontsize=30)
    ax.tick_params(axis='y', which='major', labelsize=20)
    
    handles,_ = ax.get_legend_handles_labels()
    leg = ax.legend(handles, lbls, fontsize = f2, loc='upper right', ncol=4)
    for lh in leg.legendHandles: 
        lh.set_alpha(0.5)

# ====================================================================================================================
#f, axs = plt.subplots(1, 3, figsize=(20, 1.8), constrained_layout = True)

f = plt.figure(figsize=(40, 35), constrained_layout = True)
subfigs = f.subfigures(2, 1, hspace=0.03, height_ratios=[0.7, 4])
axs0 = subfigs[0].subplots(1, 3)
axs1 = subfigs[1].subplots(4, 1)

plot_eth('Internalizing', axs0[0])
plot_eth('Adiposity', axs0[1])
plot_eth('Comorbidity', axs0[2])

# add legend
handles, labels = axs0[2].get_legend_handles_labels()
axs0[2].legend(handles[0:2], labels[0:2], fontsize=f2, bbox_to_anchor=(0, 1.3), loc='lower left', ncol=2)

#axs0.subplots_adjust(wspace=0.7)
def add_letters(axs, left=-0.45, up=1.1, from_letter=0, s=f1): # add letters to each subplot
    for n, aa in enumerate(axs.flat):
        n = n+from_letter
        aa.text(left, up, string.ascii_uppercase[n]+'.', transform=aa.transAxes, size=s, weight='bold')

add_letters(axs0, left=-0.50, up=1.1)
add_letters(axs1, left=-0.06, up=0.9, from_letter=3)

plot_elsbyeth('prenatal_stress_z', ax=axs1[0],label='\n\nPrenatal ELS (z-score)')
plot_ethn('prenatal_stress_z', ax=axs1[1],label='Prenatal ELS (z-score)')
plot_elsbyeth('postnatal_stress_z', ax=axs1[2],label='\n\nPostnatal ELS (z-score)')
plot_ethn('postnatal_stress_z', ax=axs1[3],label='Postnatal ELS (z-score)')
#plot_elsbyeth('intern_score_13_z', ax=axs[4],label='Internalizing (z-score)')
#plot_elsbyeth('tot_fat_percent_13_z', ax=axs[5],label='Adiposity (z-score)')


# f.savefig(output_path+'/eF5.pdf', format='pdf', bbox_inches='tight', dpi=500)
#f.savefig('/Users/Serena/Desktop/ReviewJAMA/ethn_followup.pdf', format='pdf', bbox_inches='tight', dpi=500)


## OTHER FIGURES

In [None]:
# MAP -------------------------------------------------------------------------------------------------------------
import geopandas 
from googletrans import Translator

world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))

translator = Translator()

In [None]:
def translate_and_map(var): 
    
    dutch_bp = sorted(list(d[var].value_counts().index))
    
    bad_trans = {'Other, see text field': np.nan,
                 'Adjman':'United Arab Emirates',
                 'Africa':'South Africa',
                 'Bahrain':'Saudi Arabia',
                 'Federal Republic of Germany':'Germany',
                 'Bosnia-Herzegovina':'Bosnia and Herz.',
                 'Canary Islands':'Spain',
                 'Chili':'Chile',
                 'Dominican Republic':'Dominican Rep.',
                 'German Democrat. Rep':'Germany',
                 'Germany (old)':'Germany',
                 'Frans Guyana':'Guyana',
                 'Frans Kongo':'Congo',
                 'French West Africa':'Senegal',
                 'Georgie':'Georgia',
                 'Great Britain':'United Kingdom',
                 'Guadeloupe':'Puerto Rico',
                 'Guinea Bissau':'Guinea-Bissau',
                 'Hongkong':'China',
                 'Irak':'Iraq',
                 'Ivory Coast':"Côte d'Ivoire",
                 'Yugoslavia':'Croatia',
                 'Capeverdian islands':'Morocco',
                 'Kongo':'Congo',
                 'Congo Kinshasa':'Congo',
                 'Korea':'South Korea',
                 'Luxemburg':'Luxembourg',
                 'Macau':'China',
                 'fruit salads':'North Macedonia',
                 'Man':'United Kingdom',
                 'Madeira Islands':'Morocco',
                 'Martinique': 'Puerto Rico',
                 'Mauritius':'Madagascar',
                 'Monaco':'France',
                 'Ned. Nieuwguinea':'Guinea',
                 'The Netherlands':'Netherlands',
                 'The Savior':'El Salvador',
                 'Netherlands Antilles':'Dominican Rep.',
                 'Okra‹ne':'Ukraine',
                 'East Timor':'Indonesia',
                 'Russia (old)':'Russia',
                 'Serbia-Montenegro':'Serbia',
                 'Singapore':'Malaysia',
                 'Sao Tom‚ and Principe':'Eq. Guinea',
                 'Czech Republic':'Czechia',
                 'Chechnya':'Russia',
                 'Far. States of. America':'United States of America',
                 'Zaire':'Dem. Rep. Congo'}
    
    countries = dict()
    for bp in dutch_bp:
        eng_bp = translator.translate(bp).text
        # if eng_bp not in list(world.name):
        if eng_bp in bad_trans.keys():
            eng_bp = bad_trans[eng_bp]
        countries[bp]=eng_bp
    
    name = 'mom' if var == 'GBLMOE_C' else 'dad' if var == 'GBLVAD_C' else 'child'
        
    d[name+'_bp_rec'] = [countries[x] if isinstance(x, str) else np.nan for x in d[var] ]
    counts = d[name+'_bp_rec'].value_counts()
    
    if var == 'child_ethn':
        world['continent_rec'] = [d.loc[d['child_bp_rec']==x, 'child_ethn_rec'].value_counts().index[0]
                              if x in counts.index else np.nan for x in world['name']]
    
    world[name+'_cont'] = [counts[x] if x in counts.index else 0 for x in world['name']]
        
    world[name+'_cate'] = pd.cut(world[name+'_cont'], 
                                 bins=[0,15,30,100,200,300,500,700,900,6000], 
                                 labels=['<15','16-30','35-100','100-200','200-300','300-500',
                                         '500-700','700-900','>4.000'])


In [None]:
translate_and_map('GBLMOE_C')
translate_and_map('GBLVAD_C')
translate_and_map('child_ethn')

In [None]:
world1 = world[world['child_cont']>0]

In [None]:
# Excude colonies from french territory
from shapely.geometry import Polygon
europe = world[world.continent == 'Europe']
tmp = [x.replace(')','') for x in str(europe.loc[43,'geometry']).split('((')[1:]][1]
tmp2 = [x.split(' ') for x in tmp.split(', ')][:-1]
tmp3 = [(float(x[0]),float(x[1])) for x in tmp2]
France_mainland = Polygon(tmp3)
world1.loc[world1['name']=='France','geometry'] = France_mainland

In [None]:
def make_map(colors,n=9):
    return matplotlib.colors.LinearSegmentedColormap.from_list("", colors, N=n)
    
w_euro = make_map(['#a6bddb','#0c2c84']) # blue
e_euro = make_map(['#D2CFE7','#8856a7']) # ('#F3ECFF','#5C00F9') # purple
s_euro = make_map(['#a6bddb','#0c2c84']) # ('#e0f3db','#43a2ca') # green-blue
n_euro = make_map(['#a6bddb','#0c2c84']) # ('#e0ecf4','#8856a7') # purple

n_afri = make_map(['#FFF0F4','#99000d']) # ('#fec44f','#cc4c02') # pink darkred
s_afri = make_map(['#FFEFE1','#99000d']) # orange dark red

w_asia = make_map(['#FFFCC2','#F0C802']) # yellow-gold
s_asia = make_map(['#FFE4C4','brown'])
e_asia = make_map(['#e5f5f9','turquoise'])

s_amer = make_map(['#e5f5f9','#2ca25f']) # green
c_amer = make_map(['#FFFCE9','#DAC54D']) # yellow-ocra
n_amer = make_map(['#FEFBFF','pink']) # violet

oceani = make_map(['#ECEBEB','grey']) # grey-black

cmaps = [c_amer,e_asia,e_euro,n_afri,n_amer,n_euro,oceani,s_amer,s_asia,s_euro,s_afri,w_asia,w_euro]

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(20,20))

for i, continent in enumerate(sorted(world1['continent_rec'].value_counts().index)):
    world1.loc[world1['continent_rec']==continent].plot(column='child_cate', ax=ax, 
                                                        edgecolor="white",linewidth=1,
                                                        legend=False, cmap=cmaps[i])

ax.set_axis_off()
ax.set_ylim(-56,77); ax.set_xlim(-170,200);
fig.tight_layout()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(17,17))

world1.plot(column='child_cate', ax=ax, edgecolor="white",linewidth=1, legend=True,
            cmap=make_map(['#ECECEC','#C5E0FF','#95E180','#FFE800','#B00000'],10))
ax.set_axis_off()
ax.set_ylim(-56,77); ax.set_xlim(-170,200);
fig.tight_layout()

fig.savefig('./map.pdf', dpi=500, bbox_inches='tight')

In [None]:
fig, (ax1,ax2) = plt.subplots(2, 1, figsize=(20,20))

world1.plot(column='mom_cate', ax=ax1, legend=True, cax=cax, cmap='Reds') #.legend(fontsize=20, loc="upper right")
world1.plot(column='dad_cate', ax=ax2, legend=True, cax=cax, cmap='Blues')

# ax1.legend(['<10','10-100','200-300','400-500','500-600','>5.000']);
#ax1.legend()
#ax1.set(title='Mothers')

for ax in [ax1,ax2]:
    ax.set_axis_off()
    ax.set_ylim(-56,80); ax.set_xlim(-170,180);
    # handles, labels = ax.get_legend_handles_labels()
    # ax.legend([handles, labels], title="My Title", fontsize=10, title_fontsize=15)
    
fig.tight_layout()