In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
from IPython.display import display
from scipy.stats import chi2_contingency
from os.path import exists
from missforest.miss_forest import MissForest

# ensure reproducibility
np.random.seed(123)


In [2]:
pkl_file = './df.pkl'
df = pd.read_pickle(pkl_file)
df.shape

(608, 255)

In [3]:
col_sleep = [
  'BL_ODI_sleeping',
  '3m_ODI_sleeping',
  '12m_ODI_sleeping',
  '24m_ODI_sleeping',
  '36m_ODI_sleeping'
]

df.loc[:, col_sleep].describe()

Unnamed: 0,BL_ODI_sleeping,3m_ODI_sleeping,12m_ODI_sleeping,24m_ODI_sleeping,36m_ODI_sleeping
count,602.0,537.0,471.0,460.0,157.0
mean,1.965116,0.945996,0.868365,-42.593478,1.0
std,1.309296,1.147218,1.133474,658.655091,1.182132
min,0.0,0.0,0.0,-9999.0,0.0
25%,1.0,0.0,0.0,0.0,0.0
50%,2.0,1.0,0.0,1.0,1.0
75%,3.0,1.0,1.0,1.0,1.0
max,5.0,5.0,5.0,5.0,5.0


In [4]:
df['>24m_ODI_sleeping'] = np.where(
  df.loc[:, col_sleep[-2]].notna(), 
  df.loc[:, col_sleep[-2]], 
  df.loc[:, col_sleep[-1]]
)

In [5]:
df.loc[:, col_sleep + ['>24m_ODI_sleeping']].describe()

Unnamed: 0,BL_ODI_sleeping,3m_ODI_sleeping,12m_ODI_sleeping,24m_ODI_sleeping,36m_ODI_sleeping,>24m_ODI_sleeping
count,602.0,537.0,471.0,460.0,157.0,531.0
mean,1.965116,0.945996,0.868365,-42.593478,1.0,-36.725047
std,1.309296,1.147218,1.133474,658.655091,1.182132,613.134471
min,0.0,0.0,0.0,-9999.0,0.0,-9999.0
25%,1.0,0.0,0.0,0.0,0.0,0.0
50%,2.0,1.0,0.0,1.0,1.0,1.0
75%,3.0,1.0,1.0,1.0,1.0,1.0
max,5.0,5.0,5.0,5.0,5.0,5.0


In [6]:
col_sleep = [
  'BL_ODI_sleeping',
  '3m_ODI_sleeping',
  '12m_ODI_sleeping',
  '>24m_ODI_sleeping'
]
df_sleep = df.dropna(subset=col_sleep[-1])
df_sleep = df_sleep.drop(df_sleep[df_sleep[col_sleep[-1]] < 0].index)
df_sleep.shape

(529, 256)

In [7]:
pkl_file = './df_sleep.pkl'

na_cols = ['3m_ODI_sleeping','12m_ODI_sleeping']

if exists(pkl_file):
  df_mf = pd.read_pickle(pkl_file)
else:
  mf = MissForest()
  df_na_scores_filled = mf.fit_transform(df_sleep[na_cols].copy())
  df_mf = df_sleep.copy()
  df_mf[na_cols] = df_na_scores_filled
  df_mf[col_sleep] = df_mf[col_sleep].round().astype('Int64')
  df_mf.to_pickle(pkl_file)

print('Any null values:', df_mf[col_sleep].isnull().values.any())
display(df_mf[col_sleep].head())

Any null values: True


Unnamed: 0,BL_ODI_sleeping,3m_ODI_sleeping,12m_ODI_sleeping,>24m_ODI_sleeping
0,1,0,0,0
1,0,0,0,0
2,2,0,1,0
9,1,1,1,1
10,1,1,0,0


In [8]:
bool_impaired_BL = df_sleep['BL_ODI_sleeping'] > 0
bool_normal_BL = df_sleep['BL_ODI_sleeping'] == 0

In [9]:
def value_to_pct(col):
  tmp = col.value_counts()
  for i in range(6):
    try:
      tmp.loc[i]
    except KeyError:
      tmp.loc[i] = 0.0
  values = tmp.astype(str)
  pct = col.value_counts(normalize=True).mul(100).round(1).astype(str) + '%'
  return (values + ' (' + pct + ')')

def df_odi_setup(df):
  df_sleep_all = pd.DataFrame({
    'Baseline ODI Item 7': df['BL_ODI_sleeping'].value_counts(),
    '3 mo ODI Item 7': df['3m_ODI_sleeping'].value_counts(),
    '12 mo ODI Item 7': df['12m_ODI_sleeping'].value_counts(),
    '>24 mo ODI Item 7': df['>24m_ODI_sleeping'].value_counts()
  })
  df_sleep_impaired = pd.DataFrame({
    'BL_ODI_sleeping': df[bool_impaired_BL]['BL_ODI_sleeping'].value_counts(),
    '3m_ODI_sleeping': df[bool_impaired_BL]['3m_ODI_sleeping'].value_counts(),
    '12m_ODI_sleeping': df[bool_impaired_BL]['12m_ODI_sleeping'].value_counts(),
    '>24m_ODI_sleeping': df[bool_impaired_BL]['>24m_ODI_sleeping'].value_counts()
  })
  df_sleep_normal = pd.DataFrame({
    'BL_ODI_sleeping': df[bool_normal_BL]['BL_ODI_sleeping'].value_counts(),
    '3m_ODI_sleeping': df[bool_normal_BL]['3m_ODI_sleeping'].value_counts(),
    '12m_ODI_sleeping': df[bool_normal_BL]['12m_ODI_sleeping'].value_counts(),
    '>24m_ODI_sleeping': df[bool_normal_BL]['>24m_ODI_sleeping'].value_counts()
  })

  df_sleep_all_pct = pd.DataFrame({
    'Baseline': value_to_pct(df['BL_ODI_sleeping']),
    '3 mo': value_to_pct(df['3m_ODI_sleeping']),
    '12 mo': value_to_pct(df['12m_ODI_sleeping']),
    '>24 mo': value_to_pct(df['>24m_ODI_sleeping']),
  }).fillna('0 (0%)')
  df_sleep_impaired_pct = pd.DataFrame({
    'Baseline': value_to_pct(df[bool_impaired_BL]['BL_ODI_sleeping']),
    '3 mo': value_to_pct(df[bool_impaired_BL]['3m_ODI_sleeping']),
    '12 mo': value_to_pct(df[bool_impaired_BL]['12m_ODI_sleeping']),
    '>24 mo': value_to_pct(df[bool_impaired_BL]['>24m_ODI_sleeping']),
  }).fillna('0 (0%)')
  df_sleep_normal_pct = pd.DataFrame({
    'Baseline': value_to_pct(df[bool_normal_BL]['BL_ODI_sleeping']),
    '3 mo': value_to_pct(df[bool_normal_BL]['3m_ODI_sleeping']),
    '12 mo': value_to_pct(df[bool_normal_BL]['12m_ODI_sleeping']),
    '>24 mo': value_to_pct(df[bool_normal_BL]['>24m_ODI_sleeping']),
  }).fillna('0 (0%)')

  df_sleep_all_pct.index.name = 'ODI Item 7 Score'
  df_sleep_impaired_pct.index.name = 'ODI Item 7 Score'
  df_sleep_normal_pct.index.name = 'ODI Item 7 Score'

  return (
    (df_sleep_all, df_sleep_impaired, df_sleep_normal), 
    (df_sleep_all_pct, df_sleep_impaired_pct, df_sleep_normal_pct)
  )

In [10]:
(
    (df_sleep_all, df_sleep_impaired, df_sleep_normal), 
    (df_sleep_all_pct, df_sleep_impaired_pct, df_sleep_normal_pct)
) = df_odi_setup(df_sleep)

In [11]:
print('All Patients')
display(df_sleep_all_pct)
print('Patients with Impairment at Baseline')
display(df_sleep_impaired_pct)
print('Patients with No Impairment at Baseline')
display(df_sleep_normal_pct)

All Patients


Unnamed: 0_level_0,Baseline,3 mo,12 mo,>24 mo
ODI Item 7 Score,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,71 (13.5%),226 (47.4%),219 (50.5%),238 (45.0%)
1.0,157 (29.8%),141 (29.6%),121 (27.9%),168 (31.8%)
2.0,130 (24.7%),53 (11.1%),44 (10.1%),63 (11.9%)
3.0,102 (19.4%),38 (8.0%),34 (7.8%),41 (7.8%)
4.0,49 (9.3%),16 (3.4%),12 (2.8%),15 (2.8%)
5.0,17 (3.2%),3 (0.6%),4 (0.9%),4 (0.8%)


Patients with Impairment at Baseline


Unnamed: 0_level_0,Baseline,3 mo,12 mo,>24 mo
ODI Item 7 Score,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,0 (0%),180 (43.6%),173 (46.3%),184 (40.4%)
1.0,157.0 (34.5%),128 (31.0%),112 (29.9%),154 (33.8%)
2.0,130.0 (28.6%),52 (12.6%),41 (11.0%),59 (13.0%)
3.0,102.0 (22.4%),35 (8.5%),33 (8.8%),40 (8.8%)
4.0,49.0 (10.8%),15 (3.6%),11 (2.9%),14 (3.1%)
5.0,17.0 (3.7%),3 (0.7%),4 (1.1%),4 (0.9%)


Patients with No Impairment at Baseline


Unnamed: 0_level_0,Baseline,3 mo,12 mo,>24 mo
ODI Item 7 Score,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,71.0 (100.0%),46.0 (71.9%),46.0 (76.7%),51.0 (71.8%)
1.0,0 (0%),13.0 (20.3%),9.0 (15.0%),14.0 (19.7%)
2.0,0 (0%),1.0 (1.6%),3.0 (5.0%),4.0 (5.6%)
3.0,0 (0%),3.0 (4.7%),1.0 (1.7%),1.0 (1.4%)
4.0,0 (0%),1.0 (1.6%),1.0 (1.7%),1.0 (1.4%)
5.0,0 (0%),0 (0%),0 (0%),0 (0%)


In [12]:
with pd.ExcelWriter("./figures/figure1.xlsx") as writer:
  df_sleep_all_pct.to_excel(writer, sheet_name='All')
  df_sleep_impaired_pct.to_excel(writer, sheet_name='Impairment at Baseline')
  df_sleep_normal_pct.to_excel(writer, sheet_name='No Impairment at Baseline')

In [13]:
chisqt = pd.crosstab(df_sleep['3m_ODI_sleeping'] == 0, df_sleep['3m_ODI_sleeping'])
value = np.array([chisqt.iloc[0].values, chisqt.iloc[1].values])
chi2, p, dof, _ = chi2_contingency(value)
if p < 0.001:
  p = "<0.001"
else:
  p = '{:.3f}'.format(p)
print('p =', p)
print('chi2 =', chi2)
print('DOF =', dof)

p = <0.001
chi2 = 476.99999999999994
DOF = 5


In [14]:
chisqt = pd.crosstab(df_sleep['>24m_ODI_sleeping'] == 0, df_sleep['>24m_ODI_sleeping'])
value = np.array([chisqt.iloc[0].values, chisqt.iloc[1].values])
chi2, p, dof, _ = chi2_contingency(value)
if p < 0.001:
  p = "<0.001"
else:
  p = '{:.3f}'.format(p)
print('p =', p)
print('chi2 =', chi2)
print('DOF =', dof)

p = <0.001
chi2 = 529.0
DOF = 5


In [15]:
df_sleep.columns.to_list()

['id',
 'site',
 'site_id',
 'date_sx',
 'date_sx_A',
 'age',
 'sex',
 'principal_spondy_dx',
 'grade_listhesis',
 'height',
 'weight',
 'insurance',
 'prev_sx',
 'smoker',
 'diabetes',
 'cad',
 'anxiety',
 'depression',
 'osteoporosis',
 'main_symptom',
 'motor_deficit',
 'ambulation',
 'symptom_duration',
 'bmi',
 'ethniticity',
 'education',
 'workers_comp',
 'employment',
 'unemployed_status',
 'asa_grade',
 'surg_approach',
 'laminectomy_performed',
 'laminectomy_levels',
 'arthrodesis_performed',
 'arthrodesis_levels',
 'interbody_graft',
 'MIS_decompression',
 'MIS_percutaneous_pedicle_screws',
 'MIS_pedicle_screws',
 'cortical_screws',
 'MIS_interbody',
 'blood_loss',
 'length_of_surgery',
 'los',
 'place_discharged_to',
 'place_discharged_to_facility',
 '3m_pt_satisfaction',
 '12m_pt_satisfaction',
 '24m_pt_satisfaction',
 '36m_pt_satisfaction',
 'latest_pt_satisfaction',
 '3m_return_to_work',
 '12m_return_to_work',
 '24m_return_to_work',
 '36m_return_to_work',
 'latest_return

In [16]:
df_bl_impaired = df_sleep.loc[df_sleep['BL_ODI_sleeping'] > 0]
df_bl_impaired.shape

(455, 256)

In [17]:
np.array(np.unique(np.where(df_bl_impaired['BL_ODI_sleeping'] > df_bl_impaired['>24m_ODI_sleeping'], 1, 0), return_counts=True)).T

array([[  0, 136],
       [  1, 319]])

# Multivariable analysis

In [159]:
multivar_cols = [
  'age',
  'bmi',
  'main_symptom',
  'motor_deficit',
  'asa_grade',
  'BL_ODI',
  'MIS_decompression',
  'MIS_percutaneous_pedicle_screws',
  'MIS_pedicle_screws',
  'MIS_interbody',
  'BL_ODI_sleeping',
  '>24m_ODI_sleeping'
]

df_multivar = df_sleep.loc[df_sleep['BL_ODI_sleeping'] > 0, multivar_cols].copy().dropna()

main_symptom_map = {
  1: 'back dominant',
  2: 'leg dominant',
  3: 'back-leg'
}
df_multivar['leg_dominant'] = (df_multivar['main_symptom'] == 2).astype(int)
df_multivar['motor_deficit'] = df_multivar['motor_deficit'].astype(int)
df_multivar['asa_grade_12_vs_34'] = df_multivar['asa_grade'].apply(lambda x: 0 if x < 3 else 1)

df_multivar_mis_bool = df_multivar.loc[:, [
  'MIS_decompression',
  'MIS_percutaneous_pedicle_screws',
  'MIS_pedicle_screws',
  'MIS_interbody'
]].any(axis=1)
df_multivar['MIS'] = df_multivar_mis_bool.astype(int)
df_multivar['improved_sleep'] = (df_multivar['>24m_ODI_sleeping'] < df_multivar['BL_ODI_sleeping']).astype(int)

multivar_cols2 = [
  'age',
  'bmi',
  'leg_dominant',
  'motor_deficit',
  'asa_grade_12_vs_34',
  'BL_ODI',
  'MIS',
  'improved_sleep'
]
df_multivar.loc[:, multivar_cols2].head(3)

Unnamed: 0,age,bmi,leg_dominant,motor_deficit,asa_grade_12_vs_34,BL_ODI,MIS,improved_sleep
0,68.35,30.52,0,1,0,20.0,1,1
2,71.28,28.99,1,0,0,44.444444,0,1
9,84.34,25.2,0,0,1,37.5,1,0


In [160]:
import statsmodels.formula.api as smf

res = smf.logit("""improved_sleep ~ 
                  bmi +
                  leg_dominant +
                  motor_deficit +
                  asa_grade_12_vs_34 +
                  BL_ODI +
                  MIS
                  """, data=df_multivar).fit()
res.summary()

Optimization terminated successfully.
         Current function value: 0.586288
         Iterations 5


0,1,2,3
Dep. Variable:,improved_sleep,No. Observations:,438.0
Model:,Logit,Df Residuals:,431.0
Method:,MLE,Df Model:,6.0
Date:,"Tue, 19 Dec 2023",Pseudo R-squ.:,0.03592
Time:,16:38:50,Log-Likelihood:,-256.79
converged:,True,LL-Null:,-266.36
Covariance Type:,nonrobust,LLR p-value:,0.003942

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.3213,0.592,0.543,0.587,-0.839,1.481
bmi,-0.0157,0.018,-0.867,0.386,-0.051,0.020
leg_dominant,0.5047,0.290,1.739,0.082,-0.064,1.074
motor_deficit,0.3214,0.273,1.179,0.238,-0.213,0.856
asa_grade_12_vs_34,-0.3814,0.232,-1.645,0.100,-0.836,0.073
BL_ODI,0.0179,0.007,2.514,0.012,0.004,0.032
MIS,0.4242,0.224,1.898,0.058,-0.014,0.862


In [161]:
def print_OR(results):
  conf = results.conf_int()
  conf['OR'] = results.params
  conf.columns = ['5%', '95%', 'OR']
  df_pp = np.exp(conf[['OR', '5%', '95%']])
  df_pp['p'] = results.pvalues
  df_pp = df_pp.round(2)
  df_pp['p'] = df_pp['p'].apply(lambda x: f'{x}**' if x < 0.05 else x)
  return df_pp.iloc[1:]

In [162]:
print_OR(res)

Unnamed: 0,OR,5%,95%,p
bmi,0.98,0.95,1.02,0.39
leg_dominant,1.66,0.94,2.93,0.08
motor_deficit,1.38,0.81,2.35,0.24
asa_grade_12_vs_34,0.68,0.43,1.08,0.1
BL_ODI,1.02,1.0,1.03,0.01**
MIS,1.53,0.99,2.37,0.06


# Subgroup Multivariable Analysis

In [163]:
import statsmodels.formula.api as smf

res_lt_65 = smf.logit("""improved_sleep ~ 
                  bmi +
                  leg_dominant +
                  motor_deficit +
                  asa_grade_12_vs_34 +
                  BL_ODI +
                  MIS
                  """, data=df_multivar, subset=(df_multivar['age'] < 65)).fit()
res_lt_65.summary()

Optimization terminated successfully.
         Current function value: 0.598122
         Iterations 5


0,1,2,3
Dep. Variable:,improved_sleep,No. Observations:,254.0
Model:,Logit,Df Residuals:,247.0
Method:,MLE,Df Model:,6.0
Date:,"Tue, 19 Dec 2023",Pseudo R-squ.:,0.05341
Time:,16:40:14,Log-Likelihood:,-151.92
converged:,True,LL-Null:,-160.49
Covariance Type:,nonrobust,LLR p-value:,0.008771

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0552,0.747,0.074,0.941,-1.409,1.519
bmi,-0.0095,0.021,-0.448,0.654,-0.051,0.032
leg_dominant,0.6379,0.450,1.419,0.156,-0.243,1.519
motor_deficit,0.6329,0.359,1.761,0.078,-0.072,1.337
asa_grade_12_vs_34,-0.7372,0.308,-2.392,0.017,-1.341,-0.133
BL_ODI,0.0196,0.009,2.250,0.024,0.003,0.037
MIS,0.2300,0.291,0.789,0.430,-0.341,0.801


In [164]:
print_OR(res_lt_65)

Unnamed: 0,OR,5%,95%,p
bmi,0.99,0.95,1.03,0.65
leg_dominant,1.89,0.78,4.57,0.16
motor_deficit,1.88,0.93,3.81,0.08
asa_grade_12_vs_34,0.48,0.26,0.88,0.02**
BL_ODI,1.02,1.0,1.04,0.02**
MIS,1.26,0.71,2.23,0.43


In [165]:
res_gte_65 = smf.logit("""improved_sleep ~ 
                  bmi +
                  leg_dominant +
                  motor_deficit +
                  asa_grade_12_vs_34 +
                  BL_ODI +
                  MIS
                  """, data=df_multivar, subset=(df_multivar['age'] >= 65)).fit()
res_gte_65.summary()

Optimization terminated successfully.
         Current function value: 0.544404
         Iterations 6


0,1,2,3
Dep. Variable:,improved_sleep,No. Observations:,184.0
Model:,Logit,Df Residuals:,177.0
Method:,MLE,Df Model:,6.0
Date:,"Tue, 19 Dec 2023",Pseudo R-squ.:,0.04193
Time:,16:40:15,Log-Likelihood:,-100.17
converged:,True,LL-Null:,-104.55
Covariance Type:,nonrobust,LLR p-value:,0.1871

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.3696,1.152,-0.321,0.748,-2.627,1.888
bmi,0.0018,0.039,0.047,0.963,-0.075,0.079
leg_dominant,0.2664,0.400,0.666,0.506,-0.518,1.051
motor_deficit,-0.1427,0.430,-0.332,0.740,-0.985,0.700
asa_grade_12_vs_34,-0.0159,0.374,-0.043,0.966,-0.748,0.716
BL_ODI,0.0246,0.014,1.820,0.069,-0.002,0.051
MIS,0.7134,0.366,1.948,0.051,-0.004,1.431


In [166]:
print_OR(res_gte_65)

Unnamed: 0,OR,5%,95%,p
bmi,1.0,0.93,1.08,0.96
leg_dominant,1.31,0.6,2.86,0.51
motor_deficit,0.87,0.37,2.01,0.74
asa_grade_12_vs_34,0.98,0.47,2.05,0.97
BL_ODI,1.02,1.0,1.05,0.07
MIS,2.04,1.0,4.18,0.05
