In [1]:
import numpy as np
import pandas as pd

import scipy
from statsmodels.stats.weightstats import *
from statsmodels.stats.proportion import proportion_confint

In [9]:
data = np.random.normal(loc = 0, scale = 1, size=1000)

In [10]:
from statsmodels.stats.weightstats import _zconfint_generic, _tconfint_generic

In [15]:
_tconfint_generic(0, 1, 99, 0.003, 'two-sided')

(-3.0429447329911525, 3.0429447329911525)

In [16]:
def proportions_confint_diff_ind(sample1, sample2, alpha = 0.05):    
    z = scipy.stats.norm.ppf(1 - alpha / 2.)   
    p1 = float(sum(sample1)) / len(sample1)
    p2 = float(sum(sample2)) / len(sample2)
    
    left_boundary = (p1 - p2) - z * np.sqrt(p1 * (1 - p1)/ len(sample1) + p2 * (1 - p2)/ len(sample2))
    right_boundary = (p1 - p2) + z * np.sqrt(p1 * (1 - p1)/ len(sample1) + p2 * (1 - p2)/ len(sample2))
    
    return (left_boundary, right_boundary)

In [18]:
sample1 = np.zeros(11037)
sample2 = np.zeros(11034)
for i in np.arange(104):
    sample1[i] = 1
for i in np.arange(189):
    sample2[i] = 1

In [19]:
proportions_confint_diff_ind(sample1, sample2)

(-0.010724297276960124, -0.004687750675049439)

In [20]:
proportions_confint_diff_ind(sample2, sample1)

(0.004687750675049439, 0.010724297276960124)

In [21]:
(proportions_confint_diff_ind(sample2, sample1)[0] + proportions_confint_diff_ind(sample2, sample1)[1]) / 2

0.0077060239760047815

In [22]:
odds2 = 189 / (11034 - 189)
odds1 = 104 / (11037 - 104)

print (odds1, odds2)

0.009512485136741973 0.017427385892116183


In [23]:
odds2 / odds1

1.8320539419087138

In [24]:
np.random.seed(0)

In [25]:
def get_bootstrap_samples(data, n_samples):
    indices = np.random.randint(0, len(data), (n_samples, len(data)))
    samples = data[indices]
    return samples

In [26]:
def stat_intervals(stat, alpha):
    boundaries = np.percentile(stat, [100 * alpha / 2., 100 * (1 - alpha / 2.)])
    return boundaries

In [31]:
np.random.seed(0)

In [57]:
samples1 = get_bootstrap_samples(sample1, 1000)
samples2 = get_bootstrap_samples(sample2, 1000)

In [58]:
def odds(data):
    p = data.sum() / data.shape[0]
    return p / (1 - p)

In [60]:
np.random.seed(0)
data1 = np.zeros(1000)
data2 = np.zeros(1000)
for i in np.arange(1000):
    data1[i] = odds(samples1[i])
    data2[i] = odds(samples2[i])

In [61]:
data1

array([0.00905101, 0.00942016, 0.00932785, 0.00748517, 0.00895877,
       0.01043669, 0.00895877, 0.01062174, 0.00997438, 0.00960483,
       0.00923555, 0.01043669, 0.00840566, 0.00978957, 0.00978957,
       0.00858997, 0.00923555, 0.00923555, 0.00886654, 0.00868214,
       0.0103442 , 0.00905101, 0.00877434, 0.00969719, 0.01015925,
       0.00951249, 0.00849781, 0.00960483, 0.0103442 , 0.01089943,
       0.01071429, 0.01145528, 0.01173343, 0.00831354, 0.00969719,
       0.00905101, 0.01015925, 0.01154798, 0.00951249, 0.01015925,
       0.00923555, 0.00914328, 0.00914328, 0.00923555, 0.01145528,
       0.01108465, 0.01108465, 0.00942016, 0.00868214, 0.00951249,
       0.00951249, 0.01015925, 0.00776114, 0.00978957, 0.00988197,
       0.00923555, 0.01025172, 0.00868214, 0.00868214, 0.00886654,
       0.00868214, 0.00988197, 0.00932785, 0.00905101, 0.01025172,
       0.00923555, 0.00951249, 0.00978957, 0.00932785, 0.00895877,
       0.00914328, 0.00923555, 0.0103442 , 0.00895877, 0.01052

In [62]:
data33 = data2 / data1

In [63]:
stat_intervals(data33, 0.05)

array([1.44045188, 2.37473964])