# Statistical Hypothesis Testing

This notebook attempts to explore mainly on the Univariate analysis of each factor w.r.t to the 3 outcomes of interest ['ICUAdmgt24h', 'thirtydaymortality', 'Mortality']
Will employ Anova/Chi-square accordingly based on the nature of data

Side note: I have only done for all categorical responses using chi-square. CAA 07/Jul/2024... Will do the same for continuous response in abit

In [1]:
# Statistical Hypothesis Testing

In [17]:
import pandas as pd
import scipy

In [5]:
# import dataset clean and imputed previously
df = pd.read_csv('../data/CARES_data_imputedv1.csv', dtype={'RCRI_score':str})

# show first 5 rows
df.head()

Unnamed: 0,GENDER,RCRI_score,Anemia category,Preoptransfusionwithin30days,Intraop,Postopwithin30days,Transfusionintraandpostop,AnaestypeCategory,PriorityCategory,TransfusionIntraandpostopCategory,...,RaceCategory,CVARCRICategory,IHDRCRICategory,CHFRCRICategory,DMinsulinRCRICategory,CreatinineRCRICategory,GradeofKidneyCategory,RDW15.7,ASAcategorybinned,ICUAdmgt24h
0,FEMALE,0,mild,0.0,0.0,0.0,0.0,GA,Elective,0 units,...,Chinese,no,no,no,no,no,G1,<= 15.7,I,no
1,MALE,0,moderate/severe,0.0,1.0,0.0,1.0,GA,Elective,1 unit,...,Chinese,no,no,no,no,no,G1,<= 15.7,I,no
2,MALE,0,mild,0.0,0.0,0.0,0.0,GA,Elective,0 units,...,Chinese,no,no,no,no,no,G1,>15.7,II,no
3,MALE,0,none,0.0,0.0,0.0,0.0,GA,Emergency,0 units,...,Indian,no,no,no,no,no,G1,<= 15.7,I,no
4,FEMALE,0,none,0.0,0.0,0.0,0.0,GA,Elective,0 units,...,Chinese,no,no,no,no,no,G2,<= 15.7,II,no


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 69667 entries, 0 to 69666
Data columns (total 24 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   GENDER                             69667 non-null  object 
 1   RCRI_score                         69667 non-null  object 
 2   Anemia category                    69667 non-null  object 
 3   Preoptransfusionwithin30days       69667 non-null  float64
 4   Intraop                            69667 non-null  float64
 5   Postopwithin30days                 69667 non-null  float64
 6   Transfusionintraandpostop          69667 non-null  float64
 7   AnaestypeCategory                  69667 non-null  object 
 8   PriorityCategory                   69667 non-null  object 
 9   TransfusionIntraandpostopCategory  69667 non-null  object 
 10  AGEcategory                        69667 non-null  object 
 11  Mortality                          69667 non-null  obj

In [8]:
# target columns y_i
Y =  ['ICUAdmgt24h', 'thirtydaymortality', 'Mortality']

In [11]:
# Select categorical columns and exclude target columns
cat_cols = df.select_dtypes(include=['object', 'category']).columns.to_list()
cat_cols = [col for col in cat_cols if col not in Y]

# Select numerical columns and exclude target columns
num_cols = df.select_dtypes(include=['number']).columns.tolist()
num_cols = [col for col in num_cols if col not in Y]

In [13]:
# See what are the unique categories present in the categorical columns
summary_cat_data = {
    'Column': [],
    'Number of Unique Values': [],
    'Unique Values': []
}

for column in cat_cols:
    summary_cat_data['Column'].append(column)
    summary_cat_data['Number of Unique Values'].append(df[column].nunique())
    summary_cat_data['Unique Values'].append(df[column].unique())

summary_df = pd.DataFrame(summary_cat_data)
summary_df
# There are no NaN values in the columns

Unnamed: 0,Column,Number of Unique Values,Unique Values
0,GENDER,2,"[FEMALE, MALE]"
1,RCRI_score,7,"[0, 1, 2, 4, 3, 6, 5]"
2,Anemia category,3,"[mild, moderate/severe, none]"
3,AnaestypeCategory,2,"[GA, RA]"
4,PriorityCategory,2,"[Elective, Emergency]"
5,TransfusionIntraandpostopCategory,3,"[0 units, 1 unit, 2 or more units]"
6,AGEcategory,6,"[50-64, 65-74, 30-49, 75-84, 18-29, >=85]"
7,SurgRiskCategory,3,"[Low, Moderate, High]"
8,RaceCategory,4,"[Chinese, Indian, Others, Malay]"
9,CVARCRICategory,2,"[no, yes]"


In [14]:
num_cols

['Preoptransfusionwithin30days',
 'Intraop',
 'Postopwithin30days',
 'Transfusionintraandpostop']

Perform univariate analysis for AGEcategory to ICUAdmgt24h

In [32]:
# Build contingency table 01
conti_table01 = pd.crosstab(df['AGEcategory'], df['ICUAdmgt24h'])
#conti_table01
# Perform chisquare
chi2, p, dof, ex = scipy.stats.chi2_contingency(conti_table01)

print(f'Chi2: {chi2}')
print(f'p-value: {p}')
print(f'Degree of Freedom: {dof}')
print(f'Expected Frequencies: {ex}')

Chi2: 355.7421570463822
p-value: 1.0156666132806244e-74
Degree of Freedom: 5
Expected Frequencies: [[6.86582649e+03 1.17173511e+02]
 [1.90931526e+04 3.25847403e+02]
 [2.17389981e+04 3.71001909e+02]
 [1.29185299e+04 2.20470108e+02]
 [6.61117246e+03 1.12827537e+02]
 [1.27032047e+03 2.16795326e+01]]


Perform univariate analysis for AGEcategory to Mortality

In [35]:
# sanity check
df['Mortality'].value_counts()

Mortality
No death    64327
Yes          5340
Name: count, dtype: int64

In [42]:
# Build Contingency table 02
conti_table02 = pd.crosstab(df['AGEcategory'], df['Mortality'])
print(conti_table02,'\n')
# Perform chisquare
chi2, p, dof, ex = scipy.stats.chi2_contingency(conti_table02)

print(f'Chi2: {chi2}')
print(f'p-value: {p}')
print(f'Degree of Freedom: {dof}')
print(f'Expected Frequencies: {ex}')

Mortality    No death   Yes
AGEcategory                
18-29            6941    42
30-49           18957   462
50-64           20385  1725
65-74           11601  1538
75-84            5514  1210
>=85              929   363 

Chi2: 3338.422281464878
p-value: 0.0
Degree of Freedom: 5
Expected Frequencies: [[ 6447.75059928   535.24940072]
 [17930.5268348   1488.4731652 ]
 [20415.26074038  1694.73925962]
 [12131.8910388   1007.1089612 ]
 [ 6208.60304018   515.39695982]
 [ 1192.96774657    99.03225343]]


Perform Univariate Analysis for AGECategory to thirtydaymortality

In [43]:
df['thirtydaymortality'].value_counts()

thirtydaymortality
False    69143
True       524
Name: count, dtype: int64

In [44]:
# Build Contingency table 02
conti_table02 = pd.crosstab(df['AGEcategory'], df['thirtydaymortality'])
print(conti_table02,'\n')
# Perform chisquare
chi2, p, dof, ex = scipy.stats.chi2_contingency(conti_table02)

print(f'Chi2: {chi2}')
print(f'p-value: {p}')
print(f'Degree of Freedom: {dof}')
print(f'Expected Frequencies: {ex}')

thirtydaymortality  False  True 
AGEcategory                     
18-29                6977      6
30-49               19376     43
50-64               21971    139
65-74               13013    126
75-84                6579    145
>=85                 1227     65 

Chi2: 621.3406910405658
p-value: 4.94870936061591e-132
Degree of Freedom: 5
Expected Frequencies: [[6.93047740e+03 5.25226004e+01]
 [1.92729401e+04 1.46059914e+02]
 [2.19436997e+04 1.66300257e+02]
 [1.30401751e+04 9.88249243e+01]
 [6.67342547e+03 5.05745331e+01]
 [1.28228223e+03 9.71777169e+00]]


# Am i going to one-by-one do for all? Can we do this better in a operationalize manner...

In [67]:
outcome_lists = ['ICUAdmgt24h', 'thirtydaymortality', 'Mortality']

def chi_square_test_for_all_outcomes(df, risk_factor_identified, outcome_lists):
    for outcome in outcome_lists:
        conti_table = pd.crosstab(df[risk_factor_identified], df[outcome])
        chi2, p, dof, ex = scipy.stats.chi2_contingency(conti_table)
        print(f'======={risk_factor_identified} for {outcome}=======')
        print(f'Chi2: {chi2}')
        print(f'p-value: {p}', 'Statisitcal Significant' if p < 0.05 else 'Not Significant')
        print(f'Degree of Freedom: {dof}')
        print(f'Expected Frequencies: {ex}')
        print('\n')

In [68]:
chi_square_test_for_all_outcomes(df=df, risk_factor_identified= 'AGEcategory', outcome_lists=outcome_lists) 

Chi2: 355.7421570463822
p-value: 1.0156666132806244e-74 Statisitcal Significant
Degree of Freedom: 5
Expected Frequencies: [[6.86582649e+03 1.17173511e+02]
 [1.90931526e+04 3.25847403e+02]
 [2.17389981e+04 3.71001909e+02]
 [1.29185299e+04 2.20470108e+02]
 [6.61117246e+03 1.12827537e+02]
 [1.27032047e+03 2.16795326e+01]]


Chi2: 621.3406910405658
p-value: 4.94870936061591e-132 Statisitcal Significant
Degree of Freedom: 5
Expected Frequencies: [[6.93047740e+03 5.25226004e+01]
 [1.92729401e+04 1.46059914e+02]
 [2.19436997e+04 1.66300257e+02]
 [1.30401751e+04 9.88249243e+01]
 [6.67342547e+03 5.05745331e+01]
 [1.28228223e+03 9.71777169e+00]]


Chi2: 3338.422281464878
p-value: 0.0 Statisitcal Significant
Degree of Freedom: 5
Expected Frequencies: [[ 6447.75059928   535.24940072]
 [17930.5268348   1488.4731652 ]
 [20415.26074038  1694.73925962]
 [12131.8910388   1007.1089612 ]
 [ 6208.60304018   515.39695982]
 [ 1192.96774657    99.03225343]]




In [69]:
chi_square_test_for_all_outcomes(df=df, risk_factor_identified= 'RCRI_score', outcome_lists=outcome_lists) 

Chi2: 972.2768229378177
p-value: 8.852114921263676e-207 Statisitcal Significant
Degree of Freedom: 6
Expected Frequencies: [[5.40977573e+04 9.23242697e+02]
 [1.11762637e+04 1.90736260e+02]
 [2.32728216e+03 3.97178435e+01]
 [6.58757518e+02 1.12424821e+01]
 [2.02543356e+02 3.45664375e+00]
 [3.24462658e+01 5.53734193e-01]
 [2.94966053e+00 5.03394721e-02]]


Chi2: 735.8423312630983
p-value: 1.1135464311263952e-155 Statisitcal Significant
Degree of Freedom: 6
Expected Frequencies: [[5.46071598e+04 4.13840183e+02]
 [1.12815032e+04 8.54968349e+01]
 [2.34919662e+03 1.78033789e+01]
 [6.64960598e+02 5.03940173e+00]
 [2.04450572e+02 1.54942799e+00]
 [3.27517907e+01 2.48209339e-01]
 [2.97743551e+00 2.25644853e-02]]


Chi2: 2301.3644056451362
p-value: 0.0 Statisitcal Significant
Degree of Freedom: 6
Expected Frequencies: [[5.08036210e+04 4.21737896e+03]
 [1.04957155e+04 8.71284539e+02]
 [2.18556862e+03 1.81431381e+02]
 [6.18644265e+02 5.13557351e+01]
 [1.90210028e+02 1.57899723e+01]
 [3.04705384e+0

In [71]:
chi_square_test_for_all_outcomes(df=df, risk_factor_identified= 'GENDER', outcome_lists=outcome_lists) 

Chi2: 75.67269323993298
p-value: 3.348058845829266e-18 Statisitcal Significant
Degree of Freedom: 1
Expected Frequencies: [[34510.04495672   588.95504328]
 [33987.95504328   580.04495672]]


Chi2: 12.614719744340402
p-value: 0.00038272091539977786 Statisitcal Significant
Degree of Freedom: 1
Expected Frequencies: [[34835.00304305   263.99695695]
 [34307.99695695   260.00304305]]


Chi2: 87.20269819554039
p-value: 9.7950080692153e-21 Statisitcal Significant
Degree of Freedom: 1
Expected Frequencies: [[32408.64933182  2690.35066818]
 [31918.35066818  2649.64933182]]




In [72]:
chi_square_test_for_all_outcomes(df=df, risk_factor_identified='SurgRiskCategory', outcome_lists=outcome_lists)

Chi2: 2126.8362096827323
p-value: 0.0 Statisitcal Significant
Degree of Freedom: 2
Expected Frequencies: [[ 3164.98574648    54.01425352]
 [34112.82400563   582.17599437]
 [31220.19024789   532.80975211]]


Chi2: 215.84639028675448
p-value: 1.34757169367986e-47 Statisitcal Significant
Degree of Freedom: 2
Expected Frequencies: [[3.19478831e+03 2.42116928e+01]
 [3.44340417e+04 2.60958273e+02]
 [3.15141700e+04 2.38830034e+02]]


Chi2: 1354.0828546959629
p-value: 9.218156624034719e-295 Statisitcal Significant
Degree of Freedom: 2
Expected Frequencies: [[ 2972.26252028   246.73747972]
 [32035.61607361  2659.38392639]
 [29319.12140612  2433.87859388]]




In [74]:
chi_square_test_for_all_outcomes(df=df, risk_factor_identified='Anemia category', outcome_lists=outcome_lists)

Chi2: 588.677248368308
p-value: 1.4803339720113876e-128 Statisitcal Significant
Degree of Freedom: 2
Expected Frequencies: [[11395.52183961   194.47816039]
 [ 9793.85617294   167.14382706]
 [47308.62198745   807.37801255]]


Chi2: 983.4355532618051
p-value: 2.816326200473028e-214 Statisitcal Significant
Degree of Freedom: 2
Expected Frequencies: [[11502.82587165    87.17412835]
 [ 9886.07838718    74.92161282]
 [47754.09574117   361.90425883]]


Chi2: 4752.48001382974
p-value: 0.0 Statisitcal Significant
Degree of Freedom: 2
Expected Frequencies: [[10701.62243243   888.37756757]
 [ 9197.48585413   763.51414587]
 [44427.89171344  3688.10828656]]




In [73]:
# lets do for race in which i do not expect to be statistical significant
chi_square_test_for_all_outcomes(df=df, risk_factor_identified='RaceCategory', outcome_lists=outcome_lists)

Chi2: 4.0100259487504575
p-value: 0.2603835418212841 Not Significant
Degree of Freedom: 3
Expected Frequencies: [[47789.41665351   815.58334649]
 [ 6501.05180358   110.94819642]
 [ 7306.30912771   124.69087229]
 [ 6901.2224152    117.7775848 ]]


Chi2: 12.389430789207571
p-value: 0.006161512310634597 Statisitcal Significant
Degree of Freedom: 3
Expected Frequencies: [[48239.41773006   365.58226994]
 [ 6562.26787432    49.73212568]
 [ 7375.10776982    55.89223018]
 [ 6966.20662581    52.79337419]]


Chi2: 396.19707389151836
p-value: 1.475320109998201e-85 Statisitcal Significant
Degree of Freedom: 3
Expected Frequencies: [[44879.40969182  3725.59030818]
 [ 6105.18787948   506.81212052]
 [ 6861.41124205   569.58875795]
 [ 6480.99118665   538.00881335]]




In [75]:
chi_square_test_for_all_outcomes(df=df, risk_factor_identified='PriorityCategory', outcome_lists=outcome_lists)

Chi2: 464.6711722323638
p-value: 4.627799605502532e-103 Statisitcal Significant
Degree of Freedom: 1
Expected Frequencies: [[52907.07766948   902.92233052]
 [15590.92233052   266.07766948]]


Chi2: 569.7306074134207
p-value: 6.425646595827109e-126 Statisitcal Significant
Degree of Freedom: 1
Expected Frequencies: [[53405.268348   404.731652]
 [15737.731652   119.268348]]


Chi2: 709.1796072414033
p-value: 3.016870333647481e-156 Statisitcal Significant
Degree of Freedom: 1
Expected Frequencies: [[49685.4446151  4124.5553849]
 [14641.5553849  1215.4446151]]




In [76]:
chi_square_test_for_all_outcomes(df=df, risk_factor_identified='GradeofKidneyCategory', outcome_lists=outcome_lists)

Chi2: 764.4693698476417
p-value: 2.1967095614770173e-165 Statisitcal Significant
Degree of Freedom: 3
Expected Frequencies: [[39884.32643863   680.67356137]
 [20453.92932091   349.07067909]
 [ 4963.29544835    84.70455165]
 [ 3196.44879211    54.55120789]]


Chi2: 1151.3307273837886
p-value: 2.658434446762911e-249 Statisitcal Significant
Degree of Freedom: 3
Expected Frequencies: [[4.02598906e+04 3.05109449e+02]
 [2.06465303e+04 1.56469663e+02]
 [5.01003149e+03 3.79685073e+01]
 [3.22654762e+03 2.44523806e+01]]


Chi2: 4272.872819032678
p-value: 0.0 Statisitcal Significant
Degree of Freedom: 3
Expected Frequencies: [[37455.6785135   3109.3214865 ]
 [19208.44274908  1594.55725092]
 [ 4661.06902838   386.93097162]
 [ 3001.80970904   249.19029096]]




In [77]:
chi_square_test_for_all_outcomes(df=df, risk_factor_identified='ASAcategorybinned', outcome_lists=outcome_lists)

Chi2: 3881.644377419353
p-value: 0.0 Statisitcal Significant
Degree of Freedom: 3
Expected Frequencies: [[1.56037042e+04 2.66295807e+02]
 [3.86267878e+04 6.59212166e+02]
 [1.31948148e+04 2.25185238e+02]
 [1.07269321e+03 1.83067880e+01]]


Chi2: 3043.9193051984507
p-value: 0.0 Statisitcal Significant
Degree of Freedom: 3
Expected Frequencies: [[1.57506339e+04 1.19366127e+02]
 [3.89905105e+04 2.95489457e+02]
 [1.33190615e+04 1.00938464e+02]
 [1.08279405e+03 8.20595117e+00]]


Chi2: 6220.945926622753
p-value: 0.0 Statisitcal Significant
Degree of Freedom: 3
Expected Frequencies: [[14653.55893034  1216.44106966]
 [36274.71431237  3011.28568763]
 [12391.35229018  1028.64770982]
 [ 1007.37446711    83.62553289]]




In [78]:
chi_square_test_for_all_outcomes(df=df, risk_factor_identified='RDW15.7', outcome_lists=outcome_lists)

Chi2: 188.81417999216563
p-value: 5.76997713575394e-43 Statisitcal Significant
Degree of Freedom: 1
Expected Frequencies: [[60839.69804929  1038.30195071]
 [ 7658.30195071   130.69804929]]


Chi2: 526.6287457173818
p-value: 1.5289308794616506e-116 Statisitcal Significant
Degree of Freedom: 1
Expected Frequencies: [[6.14125849e+04 4.65415075e+02]
 [7.73041507e+03 5.85849254e+01]]


Chi2: 1670.7733299187246
p-value: 0.0 Statisitcal Significant
Degree of Freedom: 1
Expected Frequencies: [[57135.02958359  4742.97041641]
 [ 7191.97041641   597.02958359]]




In [79]:
chi_square_test_for_all_outcomes(df=df, risk_factor_identified='IHDRCRICategory', outcome_lists=outcome_lists)

Chi2: 207.52880085624955
p-value: 4.75363995132204e-47 Statisitcal Significant
Degree of Freedom: 1
Expected Frequencies: [[64474.66303989  1100.33696011]
 [ 4023.33696011    68.66303989]]


Chi2: 305.5001181626003
p-value: 2.0869712445835437e-68 Statisitcal Significant
Degree of Freedom: 1
Expected Frequencies: [[6.50817780e+04 4.93222042e+02]
 [4.06122204e+03 3.07779580e+01]]


Chi2: 779.0869562119626
p-value: 1.901115178376054e-171 Statisitcal Significant
Degree of Freedom: 1
Expected Frequencies: [[60548.65323611  5026.34676389]
 [ 3778.34676389   313.65323611]]




In [80]:
chi_square_test_for_all_outcomes(df=df, risk_factor_identified='CVARCRICategory', outcome_lists=outcome_lists)

Chi2: 35.654586370018805
p-value: 2.355925700016726e-09 Statisitcal Significant
Degree of Freedom: 1
Expected Frequencies: [[6.70251362e+04 1.14386382e+03]
 [1.47286382e+03 2.51361764e+01]]


Chi2: 83.53333814473179
p-value: 6.265046528343105e-20 Statisitcal Significant
Degree of Freedom: 1
Expected Frequencies: [[6.76562672e+04 5.12732800e+02]
 [1.48673280e+03 1.12671997e+01]]


Chi2: 321.6765925835274
p-value: 6.247496362741055e-72 Statisitcal Significant
Degree of Freedom: 1
Expected Frequencies: [[62943.82222573  5225.17777427]
 [ 1383.17777427   114.82222573]]




In [81]:
chi_square_test_for_all_outcomes(df=df, risk_factor_identified='AnaestypeCategory', outcome_lists=outcome_lists)

Chi2: 115.21564853140003
p-value: 7.058838055594819e-27 Statisitcal Significant
Degree of Freedom: 1
Expected Frequencies: [[57384.6623509   979.3376491]
 [11113.3376491   189.6623509]]


Chi2: 0.7924892131058686
p-value: 0.3733485018891709 Not Significant
Degree of Freedom: 1
Expected Frequencies: [[57925.01545926   438.98454074]
 [11217.98454074    85.01545926]]


Chi2: 170.5895799900371
p-value: 5.5004422705253526e-39 Statisitcal Significant
Degree of Freedom: 1
Expected Frequencies: [[53890.37891685  4473.62108315]
 [10436.62108315   866.37891685]]




In [82]:
chi_square_test_for_all_outcomes(df=df, risk_factor_identified='TransfusionIntraandpostopCategory', outcome_lists=outcome_lists)

Chi2: 2317.174173226892
p-value: 0.0 Statisitcal Significant
Degree of Freedom: 2
Expected Frequencies: [[6.38631001e+04 1.08989991e+03]
 [3.73033735e+03 6.36626523e+01]
 [9.04562562e+02 1.54374381e+01]]


Chi2: 1182.1257744712493
p-value: 2.0167386235657975e-257 Statisitcal Significant
Degree of Freedom: 2
Expected Frequencies: [[6.44644563e+04 4.88543672e+02]
 [3.76546345e+03 2.85365525e+01]
 [9.13080224e+02 6.91977550e+00]]


Chi2: 2455.578452091934
p-value: 0.0 Statisitcal Significant
Degree of Freedom: 2
Expected Frequencies: [[59974.3297544   4978.6702456 ]
 [ 3503.18856848   290.81143152]
 [  849.48167712    70.51832288]]




In [83]:
chi_square_test_for_all_outcomes(df=df, risk_factor_identified='CHFRCRICategory', outcome_lists=outcome_lists)

Chi2: 164.03353827768277
p-value: 1.4874249664414105e-37 Statisitcal Significant
Degree of Freedom: 1
Expected Frequencies: [[6.77369876e+04 1.15601242e+03]
 [7.61012416e+02 1.29875838e+01]]


Chi2: 143.94430036669388
p-value: 3.6540016335549025e-33 Statisitcal Significant
Degree of Freedom: 1
Expected Frequencies: [[6.83748216e+04 5.18178363e+02]
 [7.68178363e+02 5.82163722e+00]]


Chi2: 592.6169170675167
p-value: 6.756079958909795e-131 Statisitcal Significant
Degree of Freedom: 1
Expected Frequencies: [[6.36123274e+04 5.28067263e+03]
 [7.14672628e+02 5.93273716e+01]]




In [84]:
chi_square_test_for_all_outcomes(df=df, risk_factor_identified='DMinsulinRCRICategory', outcome_lists=outcome_lists)

Chi2: 17.563852640359364
p-value: 2.778194498974527e-05 Statisitcal Significant
Degree of Freedom: 1
Expected Frequencies: [[6.66092340e+04 1.13676596e+03]
 [1.88876596e+03 3.22340419e+01]]


Chi2: 41.48246771776999
p-value: 1.1893278186161898e-10 Statisitcal Significant
Degree of Freedom: 1
Expected Frequencies: [[6.72364488e+04 5.09551208e+02]
 [1.90655121e+03 1.44487921e+01]]


Chi2: 370.27182406534206
p-value: 1.633088194464354e-82 Statisitcal Significant
Degree of Freedom: 1
Expected Frequencies: [[62553.24532419  5192.75467581]
 [ 1773.75467581   147.24532419]]




In [86]:
chi_square_test_for_all_outcomes(df=df, risk_factor_identified='CreatinineRCRICategory', outcome_lists=outcome_lists)

Chi2: 224.88972688766435
p-value: 7.759994401604294e-51 Statisitcal Significant
Degree of Freedom: 1
Expected Frequencies: [[6.62621573e+04 1.13084268e+03]
 [2.23584268e+03 3.81573198e+01]]


Chi2: 542.6313246379763
p-value: 5.046565765166787e-120 Statisitcal Significant
Degree of Freedom: 1
Expected Frequencies: [[6.68861039e+04 5.06896120e+02]
 [2.25689612e+03 1.71038799e+01]]


Chi2: 1652.3233879996642
p-value: 0.0 Statisitcal Significant
Degree of Freedom: 1
Expected Frequencies: [[62227.30289807  5165.69710193]
 [ 2099.69710193   174.30289807]]


