In [243]:
%matplotlib inline
import pandas as pd
import numpy as np
from IPython.display import display
from scipy.stats import chi2_contingency
from os.path import exists
from missforest.miss_forest import MissForest
from pandas.api.types import CategoricalDtype

# ensure reproducibility
np.random.seed(123)


In [244]:
pkl_file = './df.pkl'
df = pd.read_pickle(pkl_file)
df.shape

(608, 255)

In [245]:
col_sleep = [
  'BL_ODI_sleeping',
  '3m_ODI_sleeping',
  '12m_ODI_sleeping',
  '24m_ODI_sleeping'
]
df_sleep = df.dropna(subset='24m_ODI_sleeping')
df_sleep = df_sleep.drop(df_sleep[df_sleep['24m_ODI_sleeping'] < 0].index)
df_sleep.shape

(458, 255)

In [246]:
pkl_file = './df_sleep.pkl'

na_cols = ['3m_ODI_sleeping','12m_ODI_sleeping']

if exists(pkl_file):
  df_mf = pd.read_pickle(pkl_file)
else:
  mf = MissForest()
  df_na_scores_filled = mf.fit_transform(df_sleep[na_cols].copy())
  df_mf = df_sleep.copy()
  df_mf[na_cols] = df_na_scores_filled
  df_mf[col_sleep] = df_mf[col_sleep].round().astype('Int64').astype(CategoricalDtype(ordered=True))
  df_mf.to_pickle(pkl_file)

print('Any null values:', df_mf[col_sleep].isnull().values.any())
display(df_mf[col_sleep].head())

Any null values: True


Unnamed: 0,BL_ODI_sleeping,3m_ODI_sleeping,12m_ODI_sleeping,24m_ODI_sleeping
0,1,0,0,0
1,0,0,0,0
2,2,0,1,0
9,1,1,1,1
10,1,1,0,0


In [247]:
bool_impaired_BL = df_sleep['BL_ODI_sleeping'] > 0
bool_normal_BL = df_sleep['BL_ODI_sleeping'] == 0

In [248]:
df_mf['12m_ODI_sleeping'].value_counts()

0    244
1    116
2     53
3     31
4     11
5      3
Name: 12m_ODI_sleeping, dtype: int64

In [249]:
def value_to_pct(col):
  values = col.value_counts().astype(str)
  pct = col.value_counts(normalize=True).mul(100).round(1).astype(str) + '%'
  return (values + ' (' + pct + ')')

def df_odi_setup(df):
  df_sleep_all = pd.DataFrame({
    'Baseline ODI Item 7': df['BL_ODI_sleeping'].value_counts(),
    '3 mo ODI Item 7': df['3m_ODI_sleeping'].value_counts(),
    '12 mo ODI Item 7': df['12m_ODI_sleeping'].value_counts(),
    '24 mo ODI Item 7': df['24m_ODI_sleeping'].value_counts()
  })
  df_sleep_impaired = pd.DataFrame({
    'BL_ODI_sleeping': df[bool_impaired_BL]['BL_ODI_sleeping'].value_counts(),
    '3m_ODI_sleeping': df[bool_impaired_BL]['3m_ODI_sleeping'].value_counts(),
    '12m_ODI_sleeping': df[bool_impaired_BL]['12m_ODI_sleeping'].value_counts(),
    '24m_ODI_sleeping': df[bool_impaired_BL]['24m_ODI_sleeping'].value_counts()
  })
  df_sleep_normal = pd.DataFrame({
    'BL_ODI_sleeping': df[bool_normal_BL]['BL_ODI_sleeping'].value_counts(),
    '3m_ODI_sleeping': df[bool_normal_BL]['3m_ODI_sleeping'].value_counts(),
    '12m_ODI_sleeping': df[bool_normal_BL]['12m_ODI_sleeping'].value_counts(),
    '24m_ODI_sleeping': df[bool_normal_BL]['24m_ODI_sleeping'].value_counts()
  })

  df_sleep_all_pct = pd.DataFrame({
    'Baseline': value_to_pct(df['BL_ODI_sleeping']),
    '3 mo': value_to_pct(df['3m_ODI_sleeping']),
    '12 mo': value_to_pct(df['12m_ODI_sleeping']),
    '24 mo': value_to_pct(df['24m_ODI_sleeping']),
  }).fillna('0 (0%)')
  df_sleep_impaired_pct = pd.DataFrame({
    'Baseline': value_to_pct(df[bool_impaired_BL]['BL_ODI_sleeping']),
    '3 mo': value_to_pct(df[bool_impaired_BL]['3m_ODI_sleeping']),
    '12 mo': value_to_pct(df[bool_impaired_BL]['12m_ODI_sleeping']),
    '24 mo': value_to_pct(df[bool_impaired_BL]['24m_ODI_sleeping']),
  }).fillna('0 (0%)')
  df_sleep_normal_pct = pd.DataFrame({
    'Baseline': value_to_pct(df[bool_normal_BL]['BL_ODI_sleeping']),
    '3 mo': value_to_pct(df[bool_normal_BL]['3m_ODI_sleeping']),
    '12 mo': value_to_pct(df[bool_normal_BL]['12m_ODI_sleeping']),
    '24 mo': value_to_pct(df[bool_normal_BL]['24m_ODI_sleeping']),
  }).fillna('0 (0%)')

  df_sleep_all_pct.index.name = 'ODI Item 7 Score'
  df_sleep_impaired_pct.index.name = 'ODI Item 7 Score'
  df_sleep_normal_pct.index.name = 'ODI Item 7 Score'

  return (
    (df_sleep_all, df_sleep_impaired, df_sleep_normal), 
    (df_sleep_all_pct, df_sleep_impaired_pct, df_sleep_normal_pct)
  )

In [250]:
(
    (df_sleep_all, df_sleep_impaired, df_sleep_normal), 
    (df_sleep_all_pct, df_sleep_impaired_pct, df_sleep_normal_pct)
) = df_odi_setup(df_sleep)

In [251]:
print('All Patients')
display(df_sleep_all_pct)
print('Patients with Impairment at Baseline')
display(df_sleep_impaired_pct)
print('Patients with No Impairment at Baseline')
display(df_sleep_normal_pct)

All Patients


Unnamed: 0_level_0,Baseline,3 mo,12 mo,24 mo
ODI Item 7 Score,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,66 (14.4%),202 (48.8%),204 (53.7%),217 (47.4%)
1.0,143 (31.3%),124 (30.0%),99 (26.1%),142 (31.0%)
2.0,109 (23.9%),46 (11.1%),36 (9.5%),54 (11.8%)
3.0,88 (19.3%),30 (7.2%),27 (7.1%),28 (6.1%)
4.0,40 (8.8%),12 (2.9%),11 (2.9%),14 (3.1%)
5.0,11 (2.4%),0 (0%),3 (0.8%),3 (0.7%)


Patients with Impairment at Baseline


Unnamed: 0_level_0,Baseline,3 mo,12 mo,24 mo
ODI Item 7 Score,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,0 (0%),159 (44.8%),159 (49.2%),167 (42.7%)
1.0,143 (36.6%),113 (31.8%),92 (28.5%),131 (33.5%)
2.0,109 (27.9%),45 (12.7%),33 (10.2%),50 (12.8%)
3.0,88 (22.5%),27 (7.6%),26 (8.0%),27 (6.9%)
4.0,40 (10.2%),11 (3.1%),10 (3.1%),13 (3.3%)
5.0,11 (2.8%),0 (0%),3 (0.9%),3 (0.8%)


Patients with No Impairment at Baseline


Unnamed: 0_level_0,Baseline,3 mo,12 mo,24 mo
ODI Item 7 Score,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,66 (100.0%),43 (72.9%),45 (78.9%),49 (74.2%)
1.0,0 (0%),11 (18.6%),7 (12.3%),11 (16.7%)
2.0,0 (0%),1 (1.7%),3 (5.3%),4 (6.1%)
3.0,0 (0%),3 (5.1%),1 (1.8%),1 (1.5%)
4.0,0 (0%),1 (1.7%),1 (1.8%),1 (1.5%)


In [252]:
chisqt = pd.crosstab(df_sleep['3m_ODI_sleeping'] == 0, df_sleep['3m_ODI_sleeping'])
value = np.array([chisqt.iloc[0].values, chisqt.iloc[1].values])
chi2, p, dof, _ = chi2_contingency(value)
if p < 0.001:
  p = "<0.001"
else:
  p = '{:.3f}'.format(p)
print('p =', p)
print('chi2 =', chi2)
print('DOF =', dof)

p = <0.001
chi2 = 414.00000000000006
DOF = 4


In [253]:
chisqt = pd.crosstab(df_sleep['24m_ODI_sleeping'] == 0, df_sleep['24m_ODI_sleeping'])
value = np.array([chisqt.iloc[0].values, chisqt.iloc[1].values])
chi2, p, dof, _ = chi2_contingency(value)
if p < 0.001:
  p = "<0.001"
else:
  p = '{:.3f}'.format(p)
print('p =', p)
print('chi2 =', chi2)
print('DOF =', dof)

p = <0.001
chi2 = 457.99999999999994
DOF = 5


In [254]:
df_sleep.columns.to_list()

['id',
 'site',
 'site_id',
 'date_sx',
 'date_sx_A',
 'age',
 'sex',
 'principal_spondy_dx',
 'grade_listhesis',
 'height',
 'weight',
 'insurance',
 'prev_sx',
 'smoker',
 'diabetes',
 'cad',
 'anxiety',
 'depression',
 'osteoporosis',
 'main_symptom',
 'motor_deficit',
 'ambulation',
 'symptom_duration',
 'bmi',
 'ethniticity',
 'education',
 'workers_comp',
 'employment',
 'unemployed_status',
 'asa_grade',
 'surg_approach',
 'laminectomy_performed',
 'laminectomy_levels',
 'arthrodesis_performed',
 'arthrodesis_levels',
 'interbody_graft',
 'MIS_decompression',
 'MIS_percutaneous_pedicle_screws',
 'MIS_pedicle_screws',
 'cortical_screws',
 'MIS_interbody',
 'blood_loss',
 'length_of_surgery',
 'los',
 'place_discharged_to',
 'place_discharged_to_facility',
 '3m_pt_satisfaction',
 '12m_pt_satisfaction',
 '24m_pt_satisfaction',
 '36m_pt_satisfaction',
 'latest_pt_satisfaction',
 '3m_return_to_work',
 '12m_return_to_work',
 '24m_return_to_work',
 '36m_return_to_work',
 'latest_return

In [255]:
df_sleep[bool_impaired_BL]['24m_ODI_sleeping'].value_counts()

0.0    167
1.0    131
2.0     50
3.0     27
4.0     13
5.0      3
Name: 24m_ODI_sleeping, dtype: int64