In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
from scipy.stats import ttest_ind
import seaborn as sns
mpl.rcParams['pdf.fonttype']=42

In [None]:
#PROMIS scores for propensity matched samples
regress=pd.read_csv('PROMIS_scores_gen_matched_v2.csv')

In [None]:
regress['CeD2']=regress['CeD'].replace([0,1],['control','CeD'])

In [None]:
regress['PROMIS: Phyisical health']=round(regress['PROMIS-PH'],1)
regress['PROMIS: Mental health']=round(regress['PROMIS-MH'],1)
regress['PROMIS: Life Quality']=round(regress['PROMIS-total'],1)

In [None]:
ced = regress[regress['CeD']==1]
control = regress[regress['CeD']==0]

In [None]:
#ttests by categories
total_ttest=ttest_ind(ced['PROMIS-total'].dropna(),control['PROMIS-total'].dropna())
mental_ttest=ttest_ind(ced['PROMIS-MH'].dropna(),control['PROMIS-MH'].dropna())
phys_ttest=ttest_ind(ced['PROMIS-PH'].dropna(),control['PROMIS-PH'].dropna())

In [None]:
#mean difference
meandiff=control['PROMIS-total'].mean()-ced['PROMIS-total'].mean()

In [None]:
#density plot
plt.figure(figsize=(8, 6))
sns.kdeplot(ced['PROMIS-total'],label='CeD')
sns.kdeplot(control['PROMIS-total'],label='non-CeD')
plt.xlabel('PROMIS: Life Quality')
plt.legend()
# Add text annotations
plt.text(x=8, y=0.033, s=f"mean difference: {round(meandiff,2)}", fontsize=12)
plt.text(x=8, y=0.03, s=f"p-value: {round(total_ttest.pvalue,18)}", fontsize=12)

plt.title('Distribution of overall PROMIS scores')
plt.savefig("PROMIS_density.pdf", format="pdf")

In [None]:
poor_fair_ced_pct=len(ced[(ced['PROMIS-PH']<42)|(ced['PROMIS-MH']<40)])/len(ced[~pd.isna(ced['PROMIS-PH'])|~pd.isna(ced['PROMIS-MH'])])

In [None]:
poor_fair_ctrl_pct=len(control[(control['PROMIS-PH']<42)|(control['PROMIS-MH']<40)])/len(control[~pd.isna(control['PROMIS-PH'])|~pd.isna(control['PROMIS-MH'])])

In [None]:
ced_total=len(ced[~pd.isna(ced['PROMIS-PH'])|~pd.isna(ced['PROMIS-MH'])])
ctrl_total=len(control[~pd.isna(control['PROMIS-PH'])|~pd.isna(control['PROMIS-MH'])])

In [None]:
#chi-square for poor/fair life quality
from scipy.stats import chi2_contingency

#poor/fair as either <40 for mental or <42 for physical health
poor_fair_ced_ct=len(ced[(ced['PROMIS-PH']<42)|(ced['PROMIS-MH']<40)])
poor_fair_ctrl_ct=len(control[(control['PROMIS-PH']<42)|(control['PROMIS-MH']<40)])
normal_ced_ct=ced_total-poor_fair_ced_ct
normal_ctrl_ct=ctrl_total-poor_fair_ctrl_ct

qual = pd.DataFrame({'ced':[poor_fair_ced_ct,normal_ced_ct],'control':[poor_fair_ctrl_ct,normal_ctrl_ct]})
qual.index=['poor/fair','not poor/fair']
res=chi2_contingency(qual)

qual['chisq']=res.statistic
qual['p-value']=res.pvalue
qual['OR']=poor_fair_ced_ct*normal_ctrl_ct/(poor_fair_ctrl_ct*normal_ced_ct)

In [None]:
qual.to_csv('life_quality_table.csv')