https://onlinelibrary.wiley.com/doi/book/10.1002/0471249688

In [1]:
import pandas as pd
import numpy as np

# import plotting libraries
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
plt.style.use('seaborn-white')

# Statistical Packages
import scipy.stats as stats
import statsmodels.api as sm
import statsmodels.formula.api as smf

____
# Asprin v Placemo


In [2]:
result = [189, 10845, 104, 10933]
d = {
    'group': np.repeat(('placebo','placebo','aspirn','aspirn'), (result)),
    'heart_attack' : np.repeat(('yes','no', 'yes','no'), (result)),
    
}

drug = pd.DataFrame(d, columns=['group', 'heart_attack'])
dummy_ = pd.get_dummies(drug[['group']])
drug = pd.concat([drug, dummy_], axis=1)

# one hot encoding
heart_attack = {"no":0, "yes":1}
drug['attack_'] = drug.heart_attack.map(heart_attack)

drug.head()

Unnamed: 0,group,heart_attack,group_aspirn,group_placebo,attack_
0,placebo,yes,0,1,1
1,placebo,yes,0,1,1
2,placebo,yes,0,1,1
3,placebo,yes,0,1,1
4,placebo,yes,0,1,1


In [3]:
tb = pd.crosstab([drug.group], drug.heart_attack).T.sort_values('heart_attack', ascending=False).T.sort_values('group', ascending=False)

print(tb)

tab = sm.stats.Table2x2(tb.values)
chi2, p, dof, ex = stats.chi2_contingency(tb, correction=False)

print("Pearson's Chi-squared test with Yates' continuity correction")
print("X-squared: ", round(chi2, 3))
print("p-value  : ", round(p, 3))
print("degree of f: ", dof)

hi, low = tab.oddsratio_confint()
print("Odds Ratio CI: ", (round(hi, 3), round(low, 3)))
print("\n2x2 Summary:")
print(tab.summary())

heart_attack  yes     no
group                   
placebo       189  10845
aspirn        104  10933
Pearson's Chi-squared test with Yates' continuity correction
X-squared:  25.014
p-value  :  0.0
degree of f:  1
Odds Ratio CI:  (1.44, 2.331)

2x2 Summary:
               Estimate   SE   LCB   UCB  p-value
-------------------------------------------------
Odds ratio        1.832       1.440 2.331   0.000
Log odds ratio    0.605 0.123 0.365 0.846   0.000
Risk ratio        1.818       1.433 2.306   0.000
Log risk ratio    0.598 0.121 0.360 0.835   0.000
-------------------------------------------------


__the sample relative risk is p1/p2 =0.0171/0.0094=1.82. The sample proportionof MI cases was 82% higher for the group taking placebo.The sample difference of proportions of 0.008 makes it seem as if the two groups differ by a trivial amount__

__We can be 95% conﬁdent that, after 5 years, the proportion of MI cases for male physicians taking placebo is between 1.43 and 2.30 times the proportion of MI cases for male physicians taking aspirin. This indicates that the risk of MI is at least 43% higher for the placebo group.__