In [None]:
import pandas as pd
import numpy as np
from numpy import sqrt
from matplotlib import pyplot as plt
import seaborn as sns
import os


import scipy.stats as scipystats
from scipy.stats import pearsonr

import sklearn
from sklearn.metrics import mean_squared_error
from sklearn.metrics import confusion_matrix, mean_squared_error


import xgboost as xgb


import statsmodels
import statsmodels.formula.api as smf
import statsmodels.api as sm
import statsmodels.stats as stats 
import statsmodels.stats.stattools as stools
from statsmodels.graphics.regressionplots import *
from tabulate import tabulate
from prettytable import PrettyTable

In [None]:
sns.set_style({'axes.facecolor': 'white',
 'axes.edgecolor': '.15',
 'axes.grid': True,
 'axes.axisbelow': True,
 'axes.labelcolor': '.15',
 'figure.facecolor': 'white',
 'grid.color': '.8',
 'grid.linestyle': '-',
 'text.color': '.15',
 'xtick.color': '.15',
 'ytick.color': '.15',
 'xtick.direction': 'out',
 'ytick.direction': 'out',
 #'lines.solid_capstyle': <CapStyle.round: 'round'>,
 'patch.edgecolor': 'w',
 'patch.force_edgecolor': True,
 'image.cmap': 'rocket',
 'font.family': ['sans-serif'],
 'font.sans-serif': ['Arial',
  'DejaVu Sans',
  'Liberation Sans',
  'Bitstream Vera Sans',
  'sans-serif'],
 'xtick.bottom': False,
 'xtick.top': False,
 'ytick.left': False,
 'ytick.right': False,
 'axes.spines.left': True,
 'axes.spines.bottom': True,
 'axes.spines.right': True,
 'axes.spines.top': True})

In [None]:
sns.set_context({'font.size': 17.0,
 'axes.labelsize': 'medium',
 'axes.titlesize': 'large',
 'xtick.labelsize': 'medium',
 'ytick.labelsize': 'medium',
 'legend.fontsize': 'medium',
 'axes.linewidth': 0.8,
 'grid.linewidth': 0.8,
 'lines.linewidth': 1.5,
 'lines.markersize': 6.0,
 'patch.linewidth': 1.0,
 'xtick.major.width': 0.8,
 'ytick.major.width': 0.8,
 'xtick.minor.width': 0.6,
 'ytick.minor.width': 0.6,
 'xtick.major.size': 3.5,
 'ytick.major.size': 3.5,
 'xtick.minor.size': 2.0,
 'ytick.minor.size': 2.0,
 'legend.title_fontsize': None})

In [None]:
plotdir = 'Plots\\'

## Skip to chapter "Load combined dataset" 

In [None]:
BAGandMenarche = pd.read_csv('processedData\\BAGandMenarche.csv')

In [None]:
X = BAGandMenarche['interview_age_M1']
y = BAGandMenarche['BAG']

X = sm.add_constant(X)

model = sm.OLS(y,X)
results = model.fit()

#print(results.summary())
# Create a PrettyTable object
table = PrettyTable()
table.field_names = ["Variable", "Coefficient", "Std. Error", "t-value", "p-value"]

# Iterate over coefficients and add rows to the table
for idx, coef in enumerate(results.params):
    row = [
        f" {results.params.index[idx]}",
        "{:.4f}".format(coef),
        "{:.4f}".format(results.bse[idx]),
        "{:.4f}".format(results.tvalues[idx]),
        "{:.4f}".format(results.pvalues[idx])
    ]
    table.add_row(row)

# Print the table
print(table)

In [None]:
BAGandMenarche['BAG_resids'] = results.resid

In [None]:
scannerdata = pd.read_csv('ABCDTabular\\abcd_mri01.txt',sep='\s+')
scannerdata = scannerdata.drop(index = 0)

In [None]:
scanred = scannerdata.loc[scannerdata['subjectkey'].isin(BAGandMenarche['subjectkey'])]

In [None]:
scanred = scanred[scanred['eventname'] == '2_year_follow_up_y_arm_1']

In [None]:
scanner = scanred[['subjectkey', 'mri_info_deviceserialnumber']].copy()

In [None]:
BAGandMenarchescanner = BAGandMenarche.merge(scanner, how = 'outer', on = 'subjectkey')

In [None]:
BAGandMenDummies = pd.get_dummies(BAGandMenarchescanner, columns=['mri_info_deviceserialnumber'], drop_first=True, dtype = int)

In [None]:
BAGandMenDummies = BAGandMenDummies.merge(scanner, how = 'outer', on = 'subjectkey')

In [None]:
BAGandMenDummies.rename(columns={'mri_info_deviceserialnumber': 'scanner','interview_age_M1': 'age'}, inplace = True)

In [None]:
BAGandMenDummies.to_csv('processedData\\BainAgePlusScannerFemale2301.csv', index = False)

In [None]:
dummies1 = BAGandMenDummies.filter(regex=".*mri_info_deviceserialnumber.*").columns.to_list()
additionalcolumns = ['age']
dummies1.extend(additionalcolumns)

In [None]:
X = BAGandMenDummies[dummies1]
y = BAGandMenDummies['BAG']

X = sm.add_constant(X)

model = sm.OLS(y,X)
results1 = model.fit()

print(results1.summary())

In [None]:
BAGandMenDummies['BAGresidAgeScanner'] = results1.resid

In [None]:
BAGandMenDummies.rename(columns={'prob post': "probsPost"}, inplace = True)

In [None]:
X = BAGandMenDummies[dummies1]
y = BAGandMenDummies['probsPost']

X = sm.add_constant(X)

model = sm.OLS(y,X)
results2 = model.fit()

print(results2.summary())

In [None]:
BAGandMenDummies['ProbsPostresidAgeScanner'] = results2.resid

In [None]:
X = BAGandMenDummies['ProbsPostresidAgeScanner']
y = BAGandMenDummies['BAGresidAgeScanner']

X = sm.add_constant(X)

model = sm.OLS(y,X)
results = model.fit()

print(results.summary())

In [None]:
menarcheAge = pd.read_csv('processedData\\relevantMenarcheData2year.csv')

In [None]:
menarcheAge.columns

In [None]:
menarcheAgeRed = menarcheAge[menarcheAge['subjectkey'].isin(BAGandMenDummies['subjectkey'])][['subjectkey', 'pds_f5_y', 'pds_f6_y']]

In [None]:
BAGandMenDummiesExtended = BAGandMenDummies.merge(menarcheAgeRed, how = 'outer', on = 'subjectkey')

In [None]:
countdf = BAGandMenDummiesExtended.groupby('pds_f5_y').size().reset_index(name = 'count')

In [None]:
countdf

In [None]:
BAGandMenDummiesExtended.groupby('pds_f5_y')['age'].mean()/12

In [None]:
BAGandMenDummiesExtended.groupby('pds_f5_y')['age'].std()/12

In [None]:
BAGandMenDummiesPost = BAGandMenDummiesExtended[BAGandMenDummiesExtended['pds_f5_y'] == 4]

BAGandMenDummiesPre = BAGandMenDummiesExtended[BAGandMenDummiesExtended['pds_f5_y'] == 1]

In [None]:
BAGandMenDummiesPost['pds_f6_y'].isna().sum()

In [None]:
BAGandMenDummiesPost.dropna(subset = ['pds_f6_y'], inplace=True)

In [None]:
# look at distribution of test data
ageAtMenDist = BAGandMenDummiesPost.groupby(['pds_f6_y']).size().reset_index(name='count')

In [None]:
ageAtMenDist

# statsmodel formula

In [None]:
import statsmodels.formula.api as smf

In [None]:
tempDf = BAGandMenDummiesPost[['probsPost', 'BAG', 'pds_f6_y', 'age', 'scanner']]

In [None]:
classprobsPDSf6 = 'probsPost ~ pds_f6_y + age + C(scanner)'

model2 = smf.ols(formula=classprobsPDSf6, data=tempDf).fit()

In [None]:
summary2 = model2.summary()

In [None]:
print(classprobsPDSf6)
print(int(model2.nobs))
print(model2.df_model, model2.df_resid)
print('R2: ', model2.rsquared)#
print(summary2.tables[1])

In [None]:
classprobsPDSf62 = 'BAG ~ pds_f6_y + age + C(scanner)'

model3 = smf.ols(formula=classprobsPDSf62, data=tempDf).fit()

In [None]:
summary3 = model3.summary()

In [None]:
print(classprobsPDSf62)
print(int(model3.nobs))
print('R2: ', model3.rsquared)#
print(summary3.tables[1])

In [None]:
classprobsPDSf63 = 'pds_f6_y ~ probsPost + BAG + age'

model4 = smf.ols(formula=classprobsPDSf63, data=tempDf).fit()

In [None]:
summary4 = model4.summary()

In [None]:
print(classprobsPDSf63)
print(int(model4.nobs))
print('R2: ', model4.rsquared)#
print(summary4.tables[1])

In [None]:
ProbsPostBAG = 'BAGresidAgeScanner ~ ProbsPostresidAgeScanner'

model4 = smf.ols(formula=ProbsPostBAG, data=BAGandMenDummies).fit()

In [None]:
summary4 = model4.summary()

In [None]:
print(ProbsPostBAG)
print(int(model4.nobs))
print('R2: ', model4.rsquared)#
print(model4.df_model, model4.df_resid)
print(summary4.tables[1])

In [None]:
# 215 rows, sample with replacement to calculate 95% confidence interval 

classprobsPDSf6 = 'probsPost ~ pds_f6_y + age + C(scanner)'

n_bootstraps = 1000
bootstrapped_tvalues = []
bootstrapped_pvalues = []
bootstrapped_coefs = []
bootstrapped_coefs_intercept = []
bootstrapped_confIntlow = []
bootstrapped_confInthigh = []

for i in range(n_bootstraps):

    tempDf_sampled = tempDf.sample(frac = 1, replace = True, random_state = i, axis = 0)
    
    model = smf.ols(formula=classprobsPDSf6, data=tempDf_sampled).fit()
    
    bootstrapped_tvalues.append(model.tvalues[-2])
    bootstrapped_pvalues.append(model.pvalues[-2])
    bootstrapped_coefs.append(model.params[-2])
    bootstrapped_coefs_intercept.append(model.params[0])
    bootstrapped_confIntlow.append(model.conf_int()[0][-2])
    bootstrapped_confInthigh.append(model.conf_int()[1][-2])

#print(bootstrapped_results.mean())
print('mean p: ', np.mean(bootstrapped_pvalues))
print('mean t: ', np.mean(bootstrapped_tvalues))
print('mean coef: ', np.mean(bootstrapped_coefs))
print('mean coef intercept: ', np.mean(bootstrapped_coefs_intercept))
print('mean 0.025 conf int: ', np.mean(bootstrapped_confIntlow))
print('mean 0.975 conf int: ', np.mean(bootstrapped_confInthigh))
#print('95th percentile: ', np.percentile(bootstrapped_p_menarcheClassProbs, 95))
#print('5th percentile: ', np.percentile(bootstrapped_p_menarcheClassProbs, 5))

In [None]:
table = PrettyTable()
table.field_names = ["Classprobabilities ~ Age at Menarche", "t-value", "p-value", "coef", "coef intercept", '[.025', '0.975]']


row1 = [
    'original sample',
    "{:.4f}".format(model2.tvalues[-2]),
    "{:.4f}".format(model2.pvalues[-2]),
    "{:.4f}".format(model2.params[-2]),
    "{:.4f}".format(model2.params[0]),
    "{:.4f}".format(model2.conf_int()[0][-2]),
    "{:.4f}".format(model2.conf_int()[1][-2])
]

table.add_row(row1)

row2= [
    'bootstrapped (means)',
    "{:.4f}".format(np.mean(bootstrapped_tvalues)),
    "{:.4f}".format(np.mean(bootstrapped_pvalues)),
    "{:.4f}".format(np.mean(bootstrapped_coefs)),
    "{:.4f}".format(np.mean(bootstrapped_coefs_intercept)),
    "{:.4f}".format(np.mean(bootstrapped_confIntlow)),
    "{:.4f}".format(np.mean(bootstrapped_confInthigh))
]


table.add_row(row2)

# Print the table
print(table)

In [None]:
# plot the boostrapping distribution of coef values, including 95% CIs and the empirical value

fig, ax = plt.subplots()

ax.hist(bootstrapped_coefs, bins = 25, color = '#ADD9E4')

#ax.set_title('Bootstrap Distribution Coefs')

ax.set_xlabel('coefficient')

ax.set_ylabel('frequency')

ax.axvline(model2.params[-2], color = 'red')
ax.axvline(np.percentile(bootstrapped_coefs, 2.5), color = 'black', linestyle = '--')
ax.axvline(np.percentile(bootstrapped_coefs, 97.5), color = 'black', linestyle = '--')

In [None]:
# plot the boostrapping distribution of coef values, including 95% CIs and the empirical value

fig, ax = plt.subplots()

ax.hist(bootstrapped_tvalues, bins = 30, color = '#ADD9E4')

#ax.set_title('Bootstrap Distribution Coefs')

ax.set_xlabel('t-value')

ax.set_ylabel('frequency')

ax.axvline(model2.tvalues[-2], color = 'red')
ax.axvline(np.percentile(bootstrapped_tvalues, 2.5), color = 'black', linestyle = '--')
ax.axvline(np.percentile(bootstrapped_tvalues, 97.5), color = 'black', linestyle = '--')

#plt.savefig(plotdir + 'BootstrappedTvalues.png', dpi = 1000)

In [None]:
X = BAGandMenDummiesPost[['ProbsPostresidAgeScanner', 'BAGresidAgeScanner']]
y = BAGandMenDummiesPost['pds_f6_y']

X = sm.add_constant(X)

model = sm.OLS(y,X)
results = model.fit()

print(results.summary())

In [None]:
pdsProbsPostBAG = 'pds_f6_y ~ BAGresidAgeScanner + ProbsPostresidAgeScanner'

model5 = smf.ols(formula=pdsProbsPostBAG, data=BAGandMenDummiesPost).fit()

In [None]:
summary5 = model5.summary()

In [None]:
print(pdsProbsPostBAG)
print(int(model5.nobs))
print('R2: ', model5.rsquared)#
print(model5.df_model, model5.df_resid)
print(summary5.tables[1])

In [None]:
hist = sns.histplot(BAGandMenarche['BAG'])
hist.set_xlabel('BAG')
hist.set_title('Brain Age Gap Distribution in ABCD Menarche Subjects')
#plt.savefig('BAGhistFemaleModel.png', dpi = 1000)

In [None]:
ax = sns.regplot(x = BAGandMenarche['prob post'], y = BAGandMenarche['BAG_resids']/12)
#plt.savefig('RegplotBAGresidsMenarche_Mixed.png', dpi = 1000)

In [None]:
dummies = BAGandMenDummiesPost.filter(regex=".*mri_info_deviceserialnumber.*").columns.to_list()
additionalcolumns = ['BAG_resids','age', 'pds_f6_y']
dummies.extend(additionalcolumns)

In [None]:
ax = sns.regplot(x = BAGandMenDummiesPost['pds_f6_y'], y = BAGandMenDummiesPost['ProbsPostresidAgeScanner'], x_ci='ci', x_jitter = 0.2,
            scatter=True, fit_reg=True, ci=95, line_kws={'color': '#eb0735'})

ax.set_xlabel('Age at Menarche')
ax.set_ylabel('Menarche Classprobabilities controlled \n for Age and Scanner')
#plt.savefig('RegplotAgeMenarcheClassprobs_Female.png', dpi = 1000)

# Include puberty categories in the analysis

In [None]:
pubcats = pd.read_csv('C:\\Users\\ninag\\Documents\\MasterarbeitPaper\\Paper\\processedData\\PubertyCats.csv')

In [None]:
pubcatsCaregiver = pd.read_csv('C:\\Users\\ninag\\Documents\\MasterarbeitPaper\\Paper\\processedData\\PubertyCatsCaregiver.csv')

In [None]:
pubcatsTest = pubcats[pubcats['subjectkey'].isin(BAGandMenDummies['subjectkey'])]

In [None]:
pubcatsTestCaregiver = pubcatsCaregiver[pubcatsCaregiver['subjectkey'].isin(BAGandMenDummies['subjectkey'])]

In [None]:
pubcatT = pubcatsTest[['subjectkey', 'gonadal', 'adrenal', 'PDS_mean', 'PDS_cat_score', 'PDS_category']].copy()

In [None]:
pubcatTC = pubcatsTestCaregiver[['subjectkey', 'gonadal', 'adrenal', 'PDS_mean', 'PDS_cat_score', 'PDS_category']].copy()

In [None]:
pubcatTC.rename(columns = {'gonadal' : 'gonadalC', 'adrenal' : 'adrenalC', 'PDS_mean' : 'PDS_meanC', 
                           'PDS_cat_score' : 'PDS_cat_scoreC', 'PDS_category' : 'PDS_categoryC'}, inplace = True)

In [None]:
MenPubBAG = BAGandMenDummiesExtended.merge(pubcatT.merge(pubcatTC, on = 'subjectkey', how = 'outer'),
                                                         on = 'subjectkey', how = 'outer')

In [None]:
def switch_case(pubcat):
    if pubcat == '':
        return None
    elif pubcat == 'prepubertal':
        return 1
    elif pubcat == 'early pubertal':
        return 2
    elif pubcat == 'midpubertal':
        return 3
    elif pubcat == 'late pubertal':
        return 4
    elif pubcat == 'postpubertal':
        return 5
    else:
        return None
    

In [None]:
# Apply the function to create 'Status' column
MenPubBAG['PDS_category_num'] = MenPubBAG['PDS_category'].apply(switch_case)
MenPubBAG['PDS_category_numC'] = MenPubBAG['PDS_categoryC'].apply(switch_case)

MenPubBAG.head()

In [None]:
print(MenPubBAG.isna().sum().to_string())

In [None]:
pubcatsBAG2 = 'BAG ~ PDS_category_num + age + C(scanner)'

model51 = smf.ols(formula=pubcatsBAG2, data=MenPubBAG, missing = 'drop').fit()

In [None]:
summary51 = model51.summary()

In [None]:
print(pubcatsBAG2)
print(int(model51.nobs))
print('R2: ', model51.rsquared)#
print(summary51.tables[1])

In [None]:
pubcatsBAGProbs_C = 'PDS_category_numC ~ BAGresidAgeScanner + ProbsPostresidAgeScanner'

model101 = smf.ols(formula=pubcatsBAGProbs_C, data=MenPubBAG, missing = 'drop').fit()

In [None]:
summary101 = model101.summary()

In [None]:
print(pubcatsBAGProbs_C)
print(int(model101.nobs))
print('R2: ', model101.rsquared)#
print(summary101.tables[1])

In [None]:
pubcatsBAGProbs_Y = 'PDS_category_num ~ BAGresidAgeScanner + ProbsPostresidAgeScanner'

model102 = smf.ols(formula=pubcatsBAGProbs_Y, data=MenPubBAG, missing = 'drop').fit()

In [None]:
summary102 = model102.summary()

In [None]:
print(pubcatsBAGProbs_Y)
print(int(model102.nobs))
print('R2: ', model102.rsquared)#
print(summary102.tables[1])

In [None]:
pubcatsBAG_C2 = 'BAG ~ PDS_category_numC + age + C(scanner)'

model61 = smf.ols(formula=pubcatsBAG_C2, data=MenPubBAG, missing = 'drop').fit()

In [None]:
summary61 = model61.summary()

In [None]:
print(pubcatsBAG_C2)
print(int(model61.nobs))
print('R2: ', model61.rsquared)
print(summary61.tables[1])

In [None]:
pubcatsProbsPost2 = 'probsPost ~ PDS_category_num + age + C(scanner)'

model71 = smf.ols(formula=pubcatsProbsPost2, data=MenPubBAG, missing = 'drop').fit()

In [None]:
summary71 = model71.summary()

In [None]:
print(pubcatsProbsPost2)
print(int(model71.nobs))
print(model71.df_model, model71.df_resid)
print('R2: ', model71.rsquared)
print(summary71.tables[1])

In [None]:
pubcatsProbsPost2_C = 'probsPost ~ PDS_category_numC + age + C(scanner)'

model81 = smf.ols(formula=pubcatsProbsPost2_C, data=MenPubBAG, missing = 'drop').fit()

In [None]:
summary81 = model81.summary()

In [None]:
print(pubcatsProbsPost2_C)
print(int(model81.nobs))
print('R2: ', model81.rsquared)#
print(summary81.tables[1])

In [None]:
MenPubBAG.rename(columns = {'pds_f6_y':'ageMenarche', 'interview_age_M1':'age','PDS_category_num':'PMDS_Youth',
                            'PDS_category_numC':'PMDS_Caregiver'}, inplace = True)

# Controlling Classprobabilites by Age at Menarche Association for SES, BMI and race

In [None]:
ethno = pd.read_csv('C:\\Users\\ninag\\Documents\\MasterarbeitPaper\\Paper\\processedData\\SESBMIrace.csv')

In [None]:
ethno_reduced = ethno[ethno['subjectkey'].isin(MenPubBAG['subjectkey'])]

In [None]:
BAGMenEth = pd.merge(ethno_reduced, MenPubBAG, how = 'outer', on = 'subjectkey')

In [None]:
BAGMenEth.sort_values(by = 'subjectkey', inplace = True)

In [None]:
BAGMenEth.reset_index(drop = True, inplace = True)

In [None]:
BAGMenEth.rename(columns = {'interview_age_M1':'age'}, inplace = True)

In [None]:
temp100 = 'ProbsPostresidAgeScanner ~ BAGresidAgeScanner + bmi + ses + C(ethno)'

modelDemog1 = smf.ols(formula=temp100, data=BAGMenEth).fit()

In [None]:
summary100 = modelDemog1.summary()

In [None]:
print(temp100)
print(int(modelDemog1.nobs))
print(modelDemog1.df_model, modelDemog1.df_resid)
print('R2: ', modelDemog1.rsquared)#
print(summary100.tables[1])

In [None]:
BAGMenEth['ethnoTemp'] = BAGMenEth['ethno'].copy()

In [None]:
BAGMenEth = pd.get_dummies(BAGMenEth, columns=['ethno'], dtype = int)

In [None]:
dummiesRace = BAGMenEth.filter(regex=".*ethno_.*").columns.to_list()
additionalcolumns = ['bmi','ses','BAGresidAgeScanner']
dummiesRace.extend(additionalcolumns)

X = BAGMenEth[dummiesRace]
y = BAGMenEth['ProbsPostresidAgeScanner']

X = sm.add_constant(X)

model = sm.OLS(y,X, missing = 'drop')
results = model.fit()

print(results.summary())

In [None]:
dummiesRace = BAGMenEth.filter(regex=".*ethno_.*").columns.to_list()
dummies2 = BAGMenEth.filter(regex=".*mri_info_deviceserialnumber.*").columns.to_list()
additionalcolumns = ['age','bmi','ses','ageMenarche']
dummiesRace.extend(additionalcolumns)
dummiesRace.extend(dummies2)

X = BAGMenEth[dummiesRace]
y = BAGMenEth['probsPost']

X = sm.add_constant(X)

model = sm.OLS(y,X, missing = 'drop')
results = model.fit()

print(results.summary())

In [None]:
dummiesRace = BAGMenEth.filter(regex=".*ethno_.*").columns.to_list()
dummies2 = BAGMenEth.filter(regex=".*mri_info_deviceserialnumber.*").columns.to_list()
additionalcolumns = ['age','bmi','ses','PDS_cat_score']
dummiesRace.extend(additionalcolumns)
dummiesRace.extend(dummies2)

X = BAGMenEth[dummiesRace]
y = BAGMenEth['probsPost']

X = sm.add_constant(X)

model = sm.OLS(y,X, missing = 'drop')
results = model.fit()

print(results.summary())

In [None]:
dummiesRace = BAGMenEth.filter(regex=".*ethno_.*").columns.to_list()
dummies2 = BAGMenEth.filter(regex=".*mri_info_deviceserialnumber.*").columns.to_list()
additionalcolumns = ['age','bmi','ses','PDS_cat_scoreC']
dummiesRace.extend(additionalcolumns)
dummiesRace.extend(dummies2)

X = BAGMenEth[dummiesRace]
y = BAGMenEth['probsPost']

X = sm.add_constant(X)

model = sm.OLS(y,X, missing = 'drop')
results = model.fit()

print(results.summary())

In [None]:
temp102 = 'probsPost ~ ageMenarche + age + bmi + ses + C(ethnoTemp) + C(scanner)'

modelDemog2 = smf.ols(formula=temp102, data=BAGMenEth, missing = 'drop').fit()

In [None]:
summary102 = modelDemog2.summary()

In [None]:
print(temp102)
print(int(modelDemog2.nobs))
print(modelDemog2.df_model, modelDemog2.df_resid)
print('R2: ', modelDemog2.rsquared)#
print(summary102.tables[1])

In [None]:
temp103 = 'probsPost ~ PDS_cat_score + age + bmi + ses + C(ethnoTemp) + C(scanner)'

modelDemog3 = smf.ols(formula=temp103, data=BAGMenEth, missing = 'drop').fit()

In [None]:
summary103 = modelDemog3.summary()

In [None]:
print(temp103)
print(int(modelDemog3.nobs))
print(modelDemog3.df_model, modelDemog3.df_resid)
print('R2: ', modelDemog3.rsquared)#
print(summary103.tables[1])

In [None]:
temp103 = 'probsPost ~ PDS_cat_scoreC + age + bmi + ses + C(ethnoTemp) + C(scanner)'

modelDemog3 = smf.ols(formula=temp103, data=BAGMenEth, missing = 'drop').fit()

In [None]:
summary103 = modelDemog3.summary()

In [None]:
print(temp103)
print(int(modelDemog3.nobs))
print(modelDemog3.df_model, modelDemog3.df_resid)
print('R2: ', modelDemog3.rsquared)#
print(summary103.tables[1])

# Plots

In [None]:
fig, (ax1,ax2) = plt.subplots(1,2, figsize = [11,5], constrained_layout = True)
#fig.tight_layout(pad = 2.1)
plt.subplots_adjust(wspace = 0.3, bottom = 0.2)

x1 = BAGandMenDummies['age']/12
y1 = BAGandMenDummies['predictedAge']/12

x2 = BAGandMenDummies['ProbsPostresidAgeScanner']
y2 = BAGandMenDummies['BAGresidAgeScanner']


sns.regplot(x = x1, y = y1, ax = ax1,
#     data=combined_noOutliers[combined_noOutliers['actual_cat'] == 'pre'], 
#                 x='probsPost', y='P1', x_ci='ci', scatter=True,
#                 fit_reg=True, ci=95, n_boot=1000, seed=100, truncate=True,
                 scatter_kws={'color': '#469B8A', 'alpha': 1, 's': 15}, line_kws={'color': '#B0324C','linewidth': 3})

#axes_range = [8.5, 13.5] 
#plt.xlim(axes_range)
#plt.ylim(axes_range)
#ax2.set_aspect('equal', adjustable='box')
ax1.text(10.6, 13.7, "a)", fontsize=15)
ax1.set_xlabel('Age')
ax1.set_ylabel('Predicted Age')


sns.regplot(x = x2, y = y2, ax = ax2,
#     data=combined_noOutliers[combined_noOutliers['actual_cat'] == 'pre'], 
#                 x='probsPost', y='P1', x_ci='ci', scatter=True,
#                 fit_reg=True, ci=95, n_boot=1000, seed=100, truncate=True,
                 scatter_kws={'color': '#469B8A', 'alpha': 1, 's': 15}, line_kws={'color': '#B0324C','linewidth': 3})

#ax1.set_aspect('equal', adjustable='box')
ax2.text(-0.65, 27, "b)", fontsize=15)
ax2.set_xlabel('Class Probabilities \n (residualised for age & scanner)')
ax2.set_ylabel('BAGs \n (residualised for age & scanner)')

plt.savefig('C:\\Users\\ninag\\Documents\\MasterarbeitPaper\\Paper\\Plots\\Figure3.png', dpi = 1000)

In [None]:
fig = plt.figure(constrained_layout=True, figsize = (13,12))
#fig3.tight_layout(pad = 5)


gs = fig.add_gridspec(2, 2)#, width_ratios = [1,1], height_ratios = [1,1])
#plt.subplots_adjust(wspace=None, hspace=0.1)


ax3 = fig.add_subplot(gs[0, :])

dens = sns.kdeplot(
   data=BAGandMenDummies, x="probsPost", hue="Actual",
   cut = 0.2, fill=True, multiple = 'layer', palette="rocket",
   alpha=0.4, linewidth=3, hue_order=[1,0], ax=ax3,
    legend = False
)

ax3.set_xlabel('Class probabilities - menarche')
#ax3.set_xlabel('Classprobabilites of \'Post Menarche\' Classification')
ax3.legend(title = 'Menarche',labels = ['pre','post'], loc = 'upper center')
ax3.text(-0.06, 0.9, "a)", fontsize=17)



ax1 = fig.add_subplot(gs[1, 0])

sns.regplot(x = BAGandMenDummiesPost['pds_f6_y'], y = BAGandMenDummiesPost['ProbsPostresidAgeScanner'], x_ci='ci', x_jitter = 0.2,
            scatter=True, fit_reg=True, ci=95, scatter_kws = {'color': '#A9638D', 'alpha': 1},
            line_kws={'color': '#C74255', 'linewidth':3}, ax = ax1)

ax1.set_xlabel('Age at menarche')
ax1.set_ylabel('Class probabilities \n (age + scanner residualised)')#controlled \n for Age and Scanner')
ax1.text(6.67, 0.63, "b)", fontsize=17)


ax2 = fig.add_subplot(gs[1, 1])

sns.violinplot(x = MenPubBAG['PMDS_Youth'], y = BAGandMenDummies['ProbsPostresidAgeScanner'], 
               palette = 'rocket_r', ax = ax2, violinprops=dict(alpha=.5))
plt.setp(ax2.collections, alpha=.8)

ax2.set_xlabel('Pubertal status')
ax2.set_ylabel('Class probabilities \n (age + scanner residualised)')#controlled \n for Age and Scanner')
#ax2.set_xticks([1,2,3,4,5])
ax2.set_xticklabels(['prepubertal', 'early pubertal', 'midpubertal','late pubertal', 'postpubertal'], 
                    fontsize = 15, rotation = 45)
ax2.text(-0.56, 1, "c)", fontsize=17)



#plt.savefig('Plots\\SuplotsKDEaAgeatMenPubCatViolinSwitched1902.pdf', dpi = 1000)
plt.savefig('Plots\\Figure2.pdf', dpi = 1000)