In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
from IPython.display import display
from scipy.stats import chi2_contingency
from os.path import exists
from missforest.missforest import MissForest

# ensure reproducibility
np.random.seed(123)


In [2]:
pkl_file = './df.pkl'
df = pd.read_pickle(pkl_file)
df.shape

(608, 255)

In [3]:
col_sleep = [
  'BL_ODI_sleeping',
  '3m_ODI_sleeping',
  '12m_ODI_sleeping',
  '24m_ODI_sleeping',
  '36m_ODI_sleeping'
]

df.loc[:, col_sleep].describe()

Unnamed: 0,BL_ODI_sleeping,3m_ODI_sleeping,12m_ODI_sleeping,24m_ODI_sleeping,36m_ODI_sleeping
count,602.0,537.0,471.0,460.0,157.0
mean,1.965116,0.945996,0.868365,-42.593478,1.0
std,1.309296,1.147218,1.133474,658.655091,1.182132
min,0.0,0.0,0.0,-9999.0,0.0
25%,1.0,0.0,0.0,0.0,0.0
50%,2.0,1.0,0.0,1.0,1.0
75%,3.0,1.0,1.0,1.0,1.0
max,5.0,5.0,5.0,5.0,5.0


In [4]:
df['>24m_ODI_sleeping'] = np.where(
  df.loc[:, col_sleep[-2]].notna(), 
  df.loc[:, col_sleep[-2]], 
  df.loc[:, col_sleep[-1]]
)

In [5]:
df.loc[:, col_sleep + ['>24m_ODI_sleeping']].describe()

Unnamed: 0,BL_ODI_sleeping,3m_ODI_sleeping,12m_ODI_sleeping,24m_ODI_sleeping,36m_ODI_sleeping,>24m_ODI_sleeping
count,602.0,537.0,471.0,460.0,157.0,531.0
mean,1.965116,0.945996,0.868365,-42.593478,1.0,-36.725047
std,1.309296,1.147218,1.133474,658.655091,1.182132,613.134471
min,0.0,0.0,0.0,-9999.0,0.0,-9999.0
25%,1.0,0.0,0.0,0.0,0.0,0.0
50%,2.0,1.0,0.0,1.0,1.0,1.0
75%,3.0,1.0,1.0,1.0,1.0,1.0
max,5.0,5.0,5.0,5.0,5.0,5.0


In [6]:
col_sleep = [
  'BL_ODI_sleeping',
  '3m_ODI_sleeping',
  '12m_ODI_sleeping',
  '>24m_ODI_sleeping'
]
df_sleep = df.dropna(subset=col_sleep[-1])
df_sleep = df_sleep.drop(df_sleep[df_sleep[col_sleep[-1]] < 0].index)
df_sleep.shape

(529, 256)

In [7]:
pkl_file = './df_sleep.pkl'

na_cols = ['3m_ODI_sleeping','12m_ODI_sleeping']

if exists(pkl_file):
  df_mf = pd.read_pickle(pkl_file)
else:
  mf = MissForest()
  df_na_scores_filled = mf.fit_transform(df_sleep[na_cols].copy())
  df_mf = df_sleep.copy()
  df_mf[na_cols] = df_na_scores_filled
  df_mf[col_sleep] = df_mf[col_sleep].round().astype('Int64')
  df_mf.to_pickle(pkl_file)

print('Any null values:', df_mf[col_sleep].isnull().values.any())
display(df_mf[col_sleep].head())

Any null values: True


Unnamed: 0,BL_ODI_sleeping,3m_ODI_sleeping,12m_ODI_sleeping,>24m_ODI_sleeping
0,1,0,0,0
1,0,0,0,0
2,2,0,1,0
9,1,1,1,1
10,1,1,0,0


In [8]:
bool_impaired_BL = df_sleep['BL_ODI_sleeping'] > 0
bool_normal_BL = df_sleep['BL_ODI_sleeping'] == 0

In [9]:
def value_to_pct(col):
  tmp = col.value_counts()
  for i in range(6):
    try:
      tmp.loc[i]
    except KeyError:
      tmp.loc[i] = 0.0
  values = tmp.astype(str)
  pct = col.value_counts(normalize=True).mul(100).round(1).astype(str) + '%'
  return (values + ' (' + pct + ')')

def df_odi_setup(df):
  df_sleep_all = pd.DataFrame({
    'Baseline ODI Item 7': df['BL_ODI_sleeping'].value_counts(),
    '3 mo ODI Item 7': df['3m_ODI_sleeping'].value_counts(),
    '12 mo ODI Item 7': df['12m_ODI_sleeping'].value_counts(),
    '>24 mo ODI Item 7': df['>24m_ODI_sleeping'].value_counts()
  })
  df_sleep_impaired = pd.DataFrame({
    'BL_ODI_sleeping': df[bool_impaired_BL]['BL_ODI_sleeping'].value_counts(),
    '3m_ODI_sleeping': df[bool_impaired_BL]['3m_ODI_sleeping'].value_counts(),
    '12m_ODI_sleeping': df[bool_impaired_BL]['12m_ODI_sleeping'].value_counts(),
    '>24m_ODI_sleeping': df[bool_impaired_BL]['>24m_ODI_sleeping'].value_counts()
  })
  df_sleep_normal = pd.DataFrame({
    'BL_ODI_sleeping': df[bool_normal_BL]['BL_ODI_sleeping'].value_counts(),
    '3m_ODI_sleeping': df[bool_normal_BL]['3m_ODI_sleeping'].value_counts(),
    '12m_ODI_sleeping': df[bool_normal_BL]['12m_ODI_sleeping'].value_counts(),
    '>24m_ODI_sleeping': df[bool_normal_BL]['>24m_ODI_sleeping'].value_counts()
  })

  df_sleep_all_pct = pd.DataFrame({
    'Baseline': value_to_pct(df['BL_ODI_sleeping']),
    '3 mo': value_to_pct(df['3m_ODI_sleeping']),
    '12 mo': value_to_pct(df['12m_ODI_sleeping']),
    '>24 mo': value_to_pct(df['>24m_ODI_sleeping']),
  }).fillna('0 (0%)')
  df_sleep_impaired_pct = pd.DataFrame({
    'Baseline': value_to_pct(df[bool_impaired_BL]['BL_ODI_sleeping']),
    '3 mo': value_to_pct(df[bool_impaired_BL]['3m_ODI_sleeping']),
    '12 mo': value_to_pct(df[bool_impaired_BL]['12m_ODI_sleeping']),
    '>24 mo': value_to_pct(df[bool_impaired_BL]['>24m_ODI_sleeping']),
  }).fillna('0 (0%)')
  df_sleep_normal_pct = pd.DataFrame({
    'Baseline': value_to_pct(df[bool_normal_BL]['BL_ODI_sleeping']),
    '3 mo': value_to_pct(df[bool_normal_BL]['3m_ODI_sleeping']),
    '12 mo': value_to_pct(df[bool_normal_BL]['12m_ODI_sleeping']),
    '>24 mo': value_to_pct(df[bool_normal_BL]['>24m_ODI_sleeping']),
  }).fillna('0 (0%)')

  df_sleep_all_pct.index.name = 'ODI Item 7 Score'
  df_sleep_impaired_pct.index.name = 'ODI Item 7 Score'
  df_sleep_normal_pct.index.name = 'ODI Item 7 Score'

  return (
    (df_sleep_all, df_sleep_impaired, df_sleep_normal), 
    (df_sleep_all_pct, df_sleep_impaired_pct, df_sleep_normal_pct)
  )

In [10]:
(
    (df_sleep_all, df_sleep_impaired, df_sleep_normal), 
    (df_sleep_all_pct, df_sleep_impaired_pct, df_sleep_normal_pct)
) = df_odi_setup(df_sleep)

In [11]:
print('All Patients')
display(df_sleep_all_pct)
print('Patients with Impairment at Baseline')
display(df_sleep_impaired_pct)
print('Patients with No Impairment at Baseline')
display(df_sleep_normal_pct)

All Patients


Unnamed: 0_level_0,Baseline,3 mo,12 mo,>24 mo
ODI Item 7 Score,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,71 (13.5%),226 (47.4%),219 (50.5%),238 (45.0%)
1.0,157 (29.8%),141 (29.6%),121 (27.9%),168 (31.8%)
2.0,130 (24.7%),53 (11.1%),44 (10.1%),63 (11.9%)
3.0,102 (19.4%),38 (8.0%),34 (7.8%),41 (7.8%)
4.0,49 (9.3%),16 (3.4%),12 (2.8%),15 (2.8%)
5.0,17 (3.2%),3 (0.6%),4 (0.9%),4 (0.8%)


Patients with Impairment at Baseline


Unnamed: 0_level_0,Baseline,3 mo,12 mo,>24 mo
ODI Item 7 Score,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,0 (0%),180 (43.6%),173 (46.3%),184 (40.4%)
1.0,157.0 (34.5%),128 (31.0%),112 (29.9%),154 (33.8%)
2.0,130.0 (28.6%),52 (12.6%),41 (11.0%),59 (13.0%)
3.0,102.0 (22.4%),35 (8.5%),33 (8.8%),40 (8.8%)
4.0,49.0 (10.8%),15 (3.6%),11 (2.9%),14 (3.1%)
5.0,17.0 (3.7%),3 (0.7%),4 (1.1%),4 (0.9%)


Patients with No Impairment at Baseline


Unnamed: 0_level_0,Baseline,3 mo,12 mo,>24 mo
ODI Item 7 Score,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,71.0 (100.0%),46.0 (71.9%),46.0 (76.7%),51.0 (71.8%)
1.0,0 (0%),13.0 (20.3%),9.0 (15.0%),14.0 (19.7%)
2.0,0 (0%),1.0 (1.6%),3.0 (5.0%),4.0 (5.6%)
3.0,0 (0%),3.0 (4.7%),1.0 (1.7%),1.0 (1.4%)
4.0,0 (0%),1.0 (1.6%),1.0 (1.7%),1.0 (1.4%)
5.0,0 (0%),0 (0%),0 (0%),0 (0%)


In [12]:
with pd.ExcelWriter("./figures/figure1.xlsx") as writer:
  df_sleep_all_pct.to_excel(writer, sheet_name='All')
  df_sleep_impaired_pct.to_excel(writer, sheet_name='Impairment at Baseline')
  df_sleep_normal_pct.to_excel(writer, sheet_name='No Impairment at Baseline')

In [13]:
chisqt = pd.crosstab(df_sleep['3m_ODI_sleeping'] == 0, df_sleep['3m_ODI_sleeping'])
value = np.array([chisqt.iloc[0].values, chisqt.iloc[1].values])
chi2, p, dof, _ = chi2_contingency(value)
if p < 0.001:
  p = "<0.001"
else:
  p = '{:.3f}'.format(p)
print('p =', p)
print('chi2 =', chi2)
print('DOF =', dof)

p = <0.001
chi2 = 476.99999999999994
DOF = 5


In [14]:
chisqt = pd.crosstab(df_sleep['>24m_ODI_sleeping'] == 0, df_sleep['>24m_ODI_sleeping'])
value = np.array([chisqt.iloc[0].values, chisqt.iloc[1].values])
chi2, p, dof, _ = chi2_contingency(value)
if p < 0.001:
  p = "<0.001"
else:
  p = '{:.3f}'.format(p)
print('p =', p)
print('chi2 =', chi2)
print('DOF =', dof)

p = <0.001
chi2 = 529.0
DOF = 5


In [15]:
df_sleep.columns.to_list()

['id',
 'site',
 'site_id',
 'date_sx',
 'date_sx_A',
 'age',
 'sex',
 'principal_spondy_dx',
 'grade_listhesis',
 'height',
 'weight',
 'insurance',
 'prev_sx',
 'smoker',
 'diabetes',
 'cad',
 'anxiety',
 'depression',
 'osteoporosis',
 'main_symptom',
 'motor_deficit',
 'ambulation',
 'symptom_duration',
 'bmi',
 'ethniticity',
 'education',
 'workers_comp',
 'employment',
 'unemployed_status',
 'asa_grade',
 'surg_approach',
 'laminectomy_performed',
 'laminectomy_levels',
 'arthrodesis_performed',
 'arthrodesis_levels',
 'interbody_graft',
 'MIS_decompression',
 'MIS_percutaneous_pedicle_screws',
 'MIS_pedicle_screws',
 'cortical_screws',
 'MIS_interbody',
 'blood_loss',
 'length_of_surgery',
 'los',
 'place_discharged_to',
 'place_discharged_to_facility',
 '3m_pt_satisfaction',
 '12m_pt_satisfaction',
 '24m_pt_satisfaction',
 '36m_pt_satisfaction',
 'latest_pt_satisfaction',
 '3m_return_to_work',
 '12m_return_to_work',
 '24m_return_to_work',
 '36m_return_to_work',
 'latest_return

In [16]:
df_bl_impaired = df_sleep.loc[df_sleep['BL_ODI_sleeping'] > 0]
df_bl_impaired.shape

(455, 256)

In [17]:
np.array(np.unique(np.where(df_bl_impaired['BL_ODI_sleeping'] > df_bl_impaired['>24m_ODI_sleeping'], 1, 0), return_counts=True)).T

array([[  0, 136],
       [  1, 319]])

# Multivariable analysis

In [18]:
multivar_cols = [
  'age',
  'bmi',
  'main_symptom',
  'motor_deficit',
  'asa_grade',
  'BL_ODI',
  'MIS_decompression',
  'MIS_percutaneous_pedicle_screws',
  'MIS_pedicle_screws',
  'MIS_interbody',
  'BL_ODI_sleeping',
  '>24m_ODI_sleeping'
]

df_multivar = df_sleep.loc[df_sleep['BL_ODI_sleeping'] > 0, multivar_cols].copy().dropna()

main_symptom_map = {
  1: 'back dominant',
  2: 'leg dominant',
  3: 'back-leg'
}
df_multivar['leg_dominant'] = (df_multivar['main_symptom'] == 2).astype(int)
df_multivar['motor_deficit'] = df_multivar['motor_deficit'].astype(int)
df_multivar['asa_grade_12_vs_34'] = df_multivar['asa_grade'].apply(lambda x: 0 if x < 3 else 1)

df_multivar_mis_bool = df_multivar.loc[:, [
  'MIS_decompression',
  'MIS_percutaneous_pedicle_screws',
  'MIS_pedicle_screws',
  'MIS_interbody'
]].any(axis=1)
df_multivar['MIS'] = df_multivar_mis_bool.astype(int)
df_multivar['improved_sleep'] = (df_multivar['>24m_ODI_sleeping'] < df_multivar['BL_ODI_sleeping']).astype(int)

multivar_cols2 = [
  'age',
  'bmi',
  'leg_dominant',
  'motor_deficit',
  'asa_grade_12_vs_34',
  'BL_ODI',
  'MIS',
  'improved_sleep'
]
display(df_multivar.loc[:, multivar_cols2].head(3))
print(df_multivar.shape)

Unnamed: 0,age,bmi,leg_dominant,motor_deficit,asa_grade_12_vs_34,BL_ODI,MIS,improved_sleep
0,68.35,30.52,0,1,0,20.0,1,1
2,71.28,28.99,1,0,0,44.444444,0,1
9,84.34,25.2,0,0,1,37.5,1,0


(438, 16)


In [19]:
df_multivar[df_multivar['MIS'].astype(bool) & ~df_multivar['improved_sleep'].astype(bool)].shape

(43, 16)

In [20]:
import statsmodels.formula.api as smf

res = smf.glm("""improved_sleep ~ 
                  bmi +
                  leg_dominant +
                  motor_deficit +
                  asa_grade_12_vs_34 +
                  BL_ODI +
                  MIS
                  """, data=df_multivar).fit()
res.summary()

0,1,2,3
Dep. Variable:,improved_sleep,No. Observations:,438.0
Model:,GLM,Df Residuals:,431.0
Model Family:,Gaussian,Df Model:,6.0
Link Function:,Identity,Scale:,0.20308
Method:,IRLS,Log-Likelihood:,-268.85
Date:,"Tue, 26 Dec 2023",Deviance:,87.527
Time:,13:43:41,Pearson chi2:,87.5
No. Iterations:,3,Pseudo R-squ. (CS):,0.0429
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.5932,0.121,4.898,0.000,0.356,0.831
bmi,-0.0033,0.004,-0.884,0.377,-0.010,0.004
leg_dominant,0.0984,0.055,1.782,0.075,-0.010,0.207
motor_deficit,0.0615,0.053,1.168,0.243,-0.042,0.165
asa_grade_12_vs_34,-0.0782,0.047,-1.659,0.097,-0.170,0.014
BL_ODI,0.0036,0.001,2.526,0.012,0.001,0.006
MIS,0.0835,0.044,1.890,0.059,-0.003,0.170


In [21]:
def print_OR(results):
  conf = results.conf_int()
  conf['OR'] = results.params
  conf.columns = ['5%', '95%', 'OR']
  df_pp = np.exp(conf[['OR', '5%', '95%']])
  df_pp['p'] = results.pvalues
  df_pp = df_pp.round(2)
  df_pp['p'] = df_pp['p'].apply(lambda x: f'{x}**' if x < 0.05 else x)
  return df_pp.iloc[1:]

In [22]:
print_OR(res)

Unnamed: 0,OR,5%,95%,p
bmi,1.0,0.99,1.0,0.38
leg_dominant,1.1,0.99,1.23,0.07
motor_deficit,1.06,0.96,1.18,0.24
asa_grade_12_vs_34,0.92,0.84,1.01,0.1
BL_ODI,1.0,1.0,1.01,0.01**
MIS,1.09,1.0,1.19,0.06


# Subgroup Multivariable Analysis

In [23]:
import statsmodels.formula.api as smf

res_lt_65 = smf.glm("""improved_sleep ~ 
                  bmi +
                  leg_dominant +
                  motor_deficit +
                  asa_grade_12_vs_34 +
                  BL_ODI +
                  MIS
                  """, data=df_multivar, subset=(df_multivar['age'] < 65)).fit()
res_lt_65.summary()

0,1,2,3
Dep. Variable:,improved_sleep,No. Observations:,254.0
Model:,GLM,Df Residuals:,247.0
Model Family:,Gaussian,Df Model:,6.0
Link Function:,Identity,Scale:,0.21135
Method:,IRLS,Log-Likelihood:,-159.47
Date:,"Tue, 26 Dec 2023",Deviance:,52.204
Time:,13:43:41,Pearson chi2:,52.2
No. Iterations:,3,Pseudo R-squ. (CS):,0.06652
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.5322,0.159,3.355,0.001,0.221,0.843
bmi,-0.0022,0.004,-0.481,0.631,-0.011,0.007
leg_dominant,0.1330,0.089,1.499,0.134,-0.041,0.307
motor_deficit,0.1246,0.070,1.787,0.074,-0.012,0.261
asa_grade_12_vs_34,-0.1570,0.064,-2.435,0.015,-0.283,-0.031
BL_ODI,0.0041,0.002,2.280,0.023,0.001,0.008
MIS,0.0483,0.060,0.800,0.424,-0.070,0.166


In [24]:
print_OR(res_lt_65)

Unnamed: 0,OR,5%,95%,p
bmi,1.0,0.99,1.01,0.63
leg_dominant,1.14,0.96,1.36,0.13
motor_deficit,1.13,0.99,1.3,0.07
asa_grade_12_vs_34,0.85,0.75,0.97,0.01**
BL_ODI,1.0,1.0,1.01,0.02**
MIS,1.05,0.93,1.18,0.42


In [25]:
res_gte_65 = smf.glm("""improved_sleep ~ 
                  bmi +
                  leg_dominant +
                  motor_deficit +
                  asa_grade_12_vs_34 +
                  BL_ODI +
                  MIS
                  """, data=df_multivar, subset=(df_multivar['age'] >= 65)).fit()
res_gte_65.summary()

0,1,2,3
Dep. Variable:,improved_sleep,No. Observations:,184.0
Model:,GLM,Df Residuals:,177.0
Model Family:,Gaussian,Df Model:,6.0
Link Function:,Identity,Scale:,0.18851
Method:,IRLS,Log-Likelihood:,-104.01
Date:,"Tue, 26 Dec 2023",Deviance:,33.367
Time:,13:43:41,Pearson chi2:,33.4
No. Iterations:,3,Pseudo R-squ. (CS):,0.04656
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.4703,0.210,2.238,0.025,0.058,0.882
bmi,0.0003,0.007,0.048,0.962,-0.014,0.014
leg_dominant,0.0500,0.072,0.696,0.486,-0.091,0.191
motor_deficit,-0.0309,0.080,-0.386,0.699,-0.188,0.126
asa_grade_12_vs_34,-0.0010,0.069,-0.014,0.989,-0.136,0.134
BL_ODI,0.0045,0.002,1.824,0.068,-0.000,0.009
MIS,0.1273,0.065,1.943,0.052,-0.001,0.256


In [26]:
print_OR(res_gte_65)

Unnamed: 0,OR,5%,95%,p
bmi,1.0,0.99,1.01,0.96
leg_dominant,1.05,0.91,1.21,0.49
motor_deficit,0.97,0.83,1.13,0.7
asa_grade_12_vs_34,1.0,0.87,1.14,0.99
BL_ODI,1.0,1.0,1.01,0.07
MIS,1.14,1.0,1.29,0.05
