# This notebook is created in association with the paper entitled:

## "Adolescent subtance use disorder correlates with psychiatric hospitalization following an emergency department visit" 

## Import Necessary Libraries

In [30]:
# standard libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Statistics libraries/functions
from scipy.stats import chi2_contingency
from scipy.stats import fisher_exact
from scipy.stats import ttest_ind as tt
from scipy.stats import normaltest as normal

# Custom libraries/functions
from grouped_normal_counts import gnc
from chi2_table import chi2_table

# Define row, column, and data-width options
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 10000)

## Read in data file 
- Note: Data file has been previously cleaned and pruned to final form prior to this notebook version. It is not possible to share all raw data without sacrificing patient privacy. 

In [29]:
df = pd.read_csv('../paper_data.csv', index_col=[0])

#print out dataset characteristics to confirm correct file.
print(df.shape)
print(df.age.max())
print(df.age.min())

(14852, 26)
21.0
15.0


## Descriptive statistics and demographic comparisons

In [32]:
gnc(df, 'Sex', 'SUD_dx')

Unnamed: 0_level_0,count_SUD_dx_False,percent_SUD_dx_False,count_SUD_dx_True,percent_SUD_dx_True
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,7069,0.546248,710,0.371533
Male,5872,0.453752,1201,0.628467


In [35]:
chi2_table(gnc(df,'Sex','SUD_dx'), ['count_SUD_dx_True','count_SUD_dx_False'])

Unnamed: 0_level_0,count_SUD_dx_True,count_SUD_dx_False,chi2
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,710,7069,"chi^2(1) = 203.073, p=0.0"
Male,1201,5872,"chi^2(1) = 203.073, p=0.0"


In [47]:
df.groupby('SUD_dx')['age'].quantile([0.75, 0.50, 0.25])

SUD_dx      
False   0.75    20.0
        0.50    19.0
        0.25    17.0
True    0.75    20.0
        0.50    19.0
        0.25    17.0
Name: age, dtype: float64

In [50]:
gnc(df,'age','SUD_dx')

Unnamed: 0_level_0,count_SUD_dx_False,percent_SUD_dx_False,count_SUD_dx_True,percent_SUD_dx_True
age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
15.0,1381,0.106715,141,0.073783
16.0,1570,0.12132,221,0.115646
17.0,1582,0.122247,258,0.135008
18.0,1760,0.136002,247,0.129252
19.0,2247,0.173634,312,0.163265
20.0,2126,0.164284,290,0.151753
21.0,2275,0.175798,442,0.231293


In [51]:
chi2_contingency([[1381,1570,1582,1760,2247,2126,2275],[141,221,258,247,312,290,442]])

(51.49353197154289,
 2.3574409113401693e-09,
 6,
 array([[1326.16496095, 1560.55285483, 1603.2480474 , 1748.76023431,
         2229.73464853, 2105.13439267, 2367.4048613 ],
        [ 195.83503905,  230.44714517,  236.7519526 ,  258.23976569,
          329.26535147,  310.86560733,  349.5951387 ]]))

In [52]:
gnc(df, 'race_eth', 'SUD_dx')

Unnamed: 0_level_0,count_SUD_dx_False,percent_SUD_dx_False,count_SUD_dx_True,percent_SUD_dx_True
race_eth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
asian,272,0.021018,34,0.017792
black,1464,0.113129,208,0.108844
hispanic,6880,0.531644,796,0.416536
native_am,49,0.003786,11,0.005756
other,744,0.057492,89,0.046572
white,3532,0.272931,773,0.4045


In [53]:
chi2_contingency([[272,1464,6880,49,744,3532],[34,208,796,11,89,773]])

(148.38036719097374,
 2.9528871645280945e-30,
 5,
 array([[ 266.62712093, 1456.86453003, 6688.33261514,   52.27982763,
          725.81827363, 3751.07763264],
        [  39.37287907,  215.13546997,  987.66738486,    7.72017237,
          107.18172637,  553.92236736]]))

In [54]:
gnc(df, 'insurance', 'SUD_dx')

Unnamed: 0_level_0,count_SUD_dx_False,percent_SUD_dx_False,count_SUD_dx_True,percent_SUD_dx_True
insurance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
medicaid,7566,0.584653,961,0.502878
medicare,8,0.000618,2,0.001047
other,331,0.025578,102,0.053375
private,3061,0.236535,551,0.288331
uninsured,1975,0.152616,295,0.154369


In [55]:
chi2_contingency([[961,551,295,102,2],[7566,3061,1975,331,8]])

(82.38210145664907,
 5.447212353668768e-17,
 4,
 array([[1.09716516e+03, 4.64754377e+02, 2.92079855e+02, 5.57139106e+01,
         1.28669539e+00],
        [7.42983484e+03, 3.14724562e+03, 1.97792015e+03, 3.77286089e+02,
         8.71330461e+00]]))

In [59]:
chi2_table(gnc(df,'Homeless','SUD_dx'), ['count_SUD_dx_True','count_SUD_dx_False'])

Unnamed: 0_level_0,count_SUD_dx_True,count_SUD_dx_False,chi2
Homeless,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,1791,12761,"chi^2(1) = 198.596, p=0.0"
1,120,180,"chi^2(1) = 198.596, p=0.0"


In [67]:
df.Alcohol.sum()

909.0

In [68]:
df.groupby('SUD_dx')['Alcohol'].value_counts()

SUD_dx  Alcohol
False   0.0        12941
True    0.0         1002
        1.0          909
Name: Alcohol, dtype: int64

In [69]:
for col in ['Alcohol','Cannabis','Opioid','Cocaine','Hallucinogen','Inhalant',
           'Sedative_hypnotic','Other_Stimulants','Other_drugs']:
    print(f'-------{col}----------')
    print(df.groupby('SUD_dx')[col].value_counts())
    print(df.groupby('SUD_dx')[col].value_counts(normalize=True))

-------Alcohol----------
SUD_dx  Alcohol
False   0.0        12941
True    0.0         1002
        1.0          909
Name: Alcohol, dtype: int64
SUD_dx  Alcohol
False   0.0        1.000000
True    0.0        0.524333
        1.0        0.475667
Name: Alcohol, dtype: float64
-------Cannabis----------
SUD_dx  Cannabis
False   0.0         12941
True    0.0          1224
        1.0           687
Name: Cannabis, dtype: int64
SUD_dx  Cannabis
False   0.0         1.000000
True    0.0         0.640502
        1.0         0.359498
Name: Cannabis, dtype: float64
-------Opioid----------
SUD_dx  Opioid
False   0.0       12941
True    0.0        1798
        1.0         113
Name: Opioid, dtype: int64
SUD_dx  Opioid
False   0.0       1.000000
True    0.0       0.940869
        1.0       0.059131
Name: Opioid, dtype: float64
-------Cocaine----------
SUD_dx  Cocaine
False   0.0        12941
True    0.0         1773
        1.0          138
Name: Cocaine, dtype: int64
SUD_dx  Cocaine
False   0.0       

In [74]:
for col in ['Anxiety_disorder','ADHD','Schizophrenia','PTSD','Psychotic_disorder','Personality_disorder',
           'Depression','Bipolar_disorder','MH_dx']:
    print(f'-------{col}----------')
    print(gnc(df,col,'SUD_dx'))

-------Anxiety_disorder----------
                  count_SUD_dx_False  percent_SUD_dx_False  count_SUD_dx_True  percent_SUD_dx_True
Anxiety_disorder                                                                                  
0.0                            12345              0.953945               1702             0.890633
1.0                              596              0.046055                209             0.109367
-------ADHD----------
      count_SUD_dx_False  percent_SUD_dx_False  count_SUD_dx_True  percent_SUD_dx_True
ADHD                                                                                  
0.0                12814              0.990186               1823             0.953951
1.0                  127              0.009814                 88             0.046049
-------Schizophrenia----------
               count_SUD_dx_False  percent_SUD_dx_False  count_SUD_dx_True  percent_SUD_dx_True
Schizophrenia                                                            

In [75]:
for col in ['Anxiety_disorder','ADHD','Schizophrenia','PTSD','Psychotic_disorder','Personality_disorder',
           'Depression','Bipolar_disorder']:
    print(f'-------{col}----------')
    print(chi2_table(gnc(df,col,'SUD_dx'),['count_SUD_dx_True','count_SUD_dx_False']))

-------Anxiety_disorder----------
                  count_SUD_dx_True  count_SUD_dx_False                       chi2
Anxiety_disorder                                                                  
0.0                            1702               12345  chi^2(1) = 128.965, p=0.0
1.0                             209                 596  chi^2(1) = 128.965, p=0.0
-------ADHD----------
      count_SUD_dx_True  count_SUD_dx_False                       chi2
ADHD                                                                  
0.0                1823               12814  chi^2(1) = 150.717, p=0.0
1.0                  88                 127  chi^2(1) = 150.717, p=0.0
-------Schizophrenia----------
               count_SUD_dx_True  count_SUD_dx_False                      chi2
Schizophrenia                                                                 
0.0                         1878               12917  chi^2(1) = 99.485, p=0.0
1.0                           33                  24  chi^2(

In [78]:
gnc(df,'MH_dx','SUD_dx')

Unnamed: 0_level_0,count_SUD_dx_False,percent_SUD_dx_False,count_SUD_dx_True,percent_SUD_dx_True
MH_dx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
False,11908,0.920176,1438,0.752486
True,1033,0.079824,473,0.247514


In [79]:
chi2_contingency([[1033,11908],[473,1438]])

(512.0329623294207,
 2.2903506665724396e-113,
 1,
 array([[ 1312.22367358, 11628.77632642],
        [  193.77632642,  1717.22367358]]))

In [81]:
gnc(df, 'psych_admits_90d', 'SUD_dx')

Unnamed: 0_level_0,count_SUD_dx_False,percent_SUD_dx_False,count_SUD_dx_True,percent_SUD_dx_True
psych_admits_90d,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,12897,0.9966,1891,0.989534
1,44,0.0034,20,0.010466


In [82]:
chi2_contingency([[12897,44],[1891,20]])

(17.76277435279532,
 2.502316315718194e-05,
 1,
 array([[1.28852349e+04, 5.57651495e+01],
        [1.90276515e+03, 8.23485053e+00]]))