In [1]:
import numpy as np
import pandas as pd

import iqplot

import bokeh.io
import bokeh.plotting

bokeh.io.output_notebook()

In [2]:
df = pd.read_csv('data/grant_complete.csv', comment='#')
bd_1975 = df.loc[df['year']==1975, 'beak depth (mm)'].values

In [43]:
def draw_bs_reps(data, func, rg, size=1, args=()):
    """Returns a numpy array with bootstrapped values of specified length
    
    data = numpy array of data
    func = function that generates a single value from data, like np.mean
    rg = numpy random generator instance
    size = number of bootstrapped replicates to be generated
    
    """
    #number of points in the dataset
    n = len(data)
    bootstrap_array = np.array([func(rg.choice(data, replace=True, size=n),*args) for _ in range(size)])
    return bootstrap_array

In [46]:
bs_reps_1975 = draw_bs_reps(bd_1975, np.mean, np.random.default_rng(), size = 1_000_000)

In [47]:
bs_reps_1975

array([9.06588972, 9.16007519, 9.07195489, ..., 9.159599  , 9.16957393,
       9.14984962])

In [50]:
def bs_sample(data):
    return np.random.default_rng().choice(data, replace=True, size=len(data))

def bs_replicate(data, func, args=()):
    return func(bs_sample(data),*args)

n_reps=1_000_000
bs_reps_1975 = np.array([bs_replicate(bd_1975,np.mean) for _ in range(n_reps)])

In [51]:
bs_reps_1975

array([9.17050125, 9.11528822, 9.14122807, ..., 9.12275689, 9.08987469,
       9.13546366])

In [52]:
def backtrack_steps():
    x = 0
    steps = 0
    list_values = []
    rng = np.random.default_rng()
    while x<=0:
        random = rng.uniform(low=0, high=1)
        if random > 0.5:
            x-=1
        else:
            x+=1
        steps+=1
    return steps

In [57]:
backtrack_steps()

3

In [58]:
num_steps = np.array([backtrack_steps() for _ in range(10000)])

In [61]:
p = iqplot.ecdf(
    data=num_steps,
    q='num of steps',
    style='staircase',
    x_axis_type='log'
)

bokeh.io.show(p)