In [1]:
import numpy as np
import pandas as pd

import iqplot

import bokeh.io
import bokeh.plotting

bokeh.io.output_notebook()

In [2]:
df = pd.read_csv('data/grant_complete.csv', comment='#')

df.head()

Unnamed: 0,band,beak depth (mm),beak length (mm),species,year
0,20123,8.05,9.25,fortis,1973
1,20126,10.45,11.35,fortis,1973
2,20128,9.55,10.15,fortis,1973
3,20129,8.75,9.95,fortis,1973
4,20133,10.15,11.55,fortis,1973


In [33]:
p = iqplot.ecdf(
    data=df,
    q='beak depth (mm)',
    cats='year',
)

bokeh.io.show(p)

In [34]:
# Pull our data sets as Numpy arrays
bd_1975 = df.loc[df['year']==1975, 'beak depth (mm)'].values
bd_2012 = df.loc[df['year']==2012, 'beak depth (mm)'].values

# Compute the means
np.mean(bd_1975), np.mean(bd_2012)

(8.959999999999999, 9.188492063492063)

In [35]:
rng = np.random.default_rng(3252)

bs_sample = rng.choice(bd_1975, replace=True, size=len(bd_1975))

np.mean(bs_sample)

8.849770114942531

In [36]:
p = iqplot.ecdf(
    data=df.loc[df['year']==1975, :],
    q='beak depth (mm)',
)

# Bootstrap data set
p = iqplot.ecdf(
    data=bs_sample,
    q='bootstrap',
    cats=None,
    p=p,
    marker_kwargs=dict(
        fill_color=None,
        line_color='gray'
    ),
)

bokeh.io.show(p)

In [37]:
def bs_sample(data):
    return rng.choice(data, replace=True, size=len(data))

def bs_replicate(data, func, args=()):
    return func(bs_sample(data),*args)

In [38]:
bs_replicate(bd_1975, np.mean)

8.970344827586208

In [39]:
n_reps=1_000_000
bs_reps_1975 = np.array([bs_replicate(bd_1975,np.mean) for _ in range(n_reps)])

In [40]:
bs_reps_1975

array([8.95344828, 8.97758621, 8.91896552, ..., 8.94218391, 8.91206897,
       8.88425287])

In [41]:
np.percentile(bs_reps_1975, [2.5, 97.5])

array([8.84264368, 9.07965517])

In [42]:
sem = np.std(bd_1975) / np.sqrt(len(bd_1975))
mu_plugin = np.mean(bd_1975)
(mu_plugin-1.96*sem, mu_plugin+1.96*sem)

(8.841625267378157, 9.07837473262184)

In [43]:
p = iqplot.ecdf(
    data=df,
    q='beak depth (mm)',
    cats='year',
    style='staircase',
    conf_int=True,
    n_bs_reps=100_000,
)

bokeh.io.show(p)

In [44]:
np.mean(bd_2012) - np.mean(bd_1975)

0.2284920634920642

In [45]:
bs_reps_1975 = np.array([bs_replicate(bd_1975,np.mean) for _ in range(n_reps)])
bs_reps_2012 = np.array([bs_replicate(bd_2012,np.mean) for _ in range(n_reps)])

bs_reps_diff = bs_reps_2012 - bs_reps_1975

np.percentile(bs_reps_diff, [2.5, 97.5])

array([0.06152155, 0.39432957])

In [48]:
df = pd.read_csv("data/grant_complete.csv", comment="#")
df = df.loc[(df["species"] == "scandens") & (df["year"].isin([1975, 2012])), :]

bd_1975 = df.loc[df['year']==1975, 'beak depth (mm)'].values
bd_2012 = df.loc[df['year']==2012, 'beak depth (mm)'].values

bl_1975 = df.loc[df['year']==1975, 'beak length (mm)'].values

In [49]:
def draw_bs_pairs(x,y,rng):
    bd_inds = rng.choice(np.arange(len(x)), size=len(x), replace=True)

    return x[bd_inds], y[bd_inds]

In [50]:
correlation_bs_reps = np.empty(n_reps)

for i in range(n_reps):
    bd, bl = draw_bs_pairs(bd_1975, bl_1975, rng)
    correlation_bs_reps[i] = np.corrcoef(bd,bl)[0,1]

np.percentile(correlation_bs_reps, [2.5, 97.5])

array([0.45499678, 0.75119062])

**DASHBOARDS**

In [51]:
import pandas as pd
import numpy as np
import scipy.stats

import bokeh.io
import bokeh.layouts
import bokeh.models
import bokeh.plotting

notebook_url = 'localhost:8888'
bokeh.io.output_notebook()

In [55]:
# Parameters; we'll start with standard Normal
mu = 0.0
sigma = 1.0

# Generate data
x = np.linspace(-10, 10, 200)
pdf = scipy.stats.norm.pdf(x, loc=mu, scale=sigma)

# Column data source for plot
source = bokeh.models.ColumnDataSource(dict(x=x, pdf=pdf))

# Build figure
p = bokeh.plotting.figure(
    frame_width=350,
    frame_height=200,
    x_axis_label='x',
    y_axis_label='f(x)',
    x_range=[-10, 10],
)

# Put line on plot
p.line(source=source, x='x', y='pdf', line_width=2);

# We will not show it because if it is in a dashboard, a given plot can only
# be shown there in a notebook. Instead, it's displayed as an image below.

In [56]:
mu_slider = bokeh.models.Slider(title="µ", start=-5.0, end=5.0, step=0.1, value=0.0, width=100)
sigma_slider = bokeh.models.Slider(title="σ", start=0.1, end=5.0, step=0.1, value=1.0, width=100)

In [57]:
def norm_callback(attr, old, new):
    """Callback for updating data in Normal PDF plot."""
    # Pull the values off of each slider
    mu = mu_slider.value
    sigma = sigma_slider.value

    # Re-compute the y-values
    pdf = scipy.stats.norm.pdf(source.data['x'], loc=mu, scale=sigma)

    # Update the column data source
    source.data["pdf"] = pdf

In [58]:
mu_slider.on_change('value', norm_callback)
sigma_slider.on_change('value', norm_callback)

In [59]:
# Put the sliders one on top of the other
slider_layout = bokeh.layouts.column(
    bokeh.layouts.Spacer(height=30),
    mu_slider,
    bokeh.layouts.Spacer(height=15),
    sigma_slider,
)

# Put the sliders to the right of the plot
norm_layout = bokeh.layouts.row(
    p,
    bokeh.layouts.Spacer(width=15),
    slider_layout
)

In [60]:
def norm_app(doc):
    doc.add_root(norm_layout)

In [61]:
bokeh.io.show(norm_app, notebook_url=notebook_url)

In [62]:
df = pd.read_csv('data/gfmt_sleep.csv', na_values='*')

# Add column for insomnia
df['insomnia'] = df['sci'] <= 16

df.head()

Unnamed: 0,participant number,gender,age,correct hit percentage,correct reject percentage,percent correct,confidence when correct hit,confidence when incorrect hit,confidence when correct reject,confidence when incorrect reject,confidence when correct,confidence when incorrect,sci,psqi,ess,insomnia
0,8,f,39,65,80,72.5,91.0,90.0,93.0,83.5,93.0,90.0,9,13,2,True
1,16,m,42,90,90,90.0,75.5,55.5,70.5,50.0,75.0,50.0,4,11,7,True
2,18,f,31,90,95,92.5,89.5,90.0,86.0,81.0,89.0,88.0,10,9,3,True
3,22,f,35,100,75,87.5,89.5,,71.0,80.0,88.0,80.0,13,8,20,True
4,27,f,74,60,65,62.5,68.5,49.0,61.0,49.0,65.0,49.0,13,9,12,True


In [63]:
# Options for x- and y- selector; omit part. num., gender, and insomnia
xy_options = list(
    df.columns[~df.columns.isin(["participant number", "gender", "insomnia"])]
)


In [64]:
x_selector = bokeh.models.Select(
    title="x", options=xy_options, value="percent correct", width=200,
)

y_selector = bokeh.models.Select(
    title="y", options=xy_options, value="confidence when correct", width=200,
)

colorby_selector = bokeh.models.Select(
    title="color by", options=["none", "gender", "insomnia",], value="none", width=200,
)

In [65]:
source = bokeh.models.ColumnDataSource(dict(x=df[x_selector.value], y=df[y_selector.value]))

# Add a column for colors; for now, all Bokeh's default blue
source.data['color'] = ['#1f77b4'] * len(df)

In [66]:
p = bokeh.plotting.figure(
    frame_height=250,
    frame_width=250,
    x_axis_label=x_selector.value,
    y_axis_label=y_selector.value,
)

# Populate gylphs
circle = p.circle(source=source, x="x", y="y", color="color")

In [67]:
def gfmt_callback(attr, old, new):
    """Callback for updating plot of GMFT results."""
    # Update color column
    if colorby_selector.value == "none":
        source.data["color"] = ["#1f77b4"] * len(df)
    elif colorby_selector.value == "gender":
        source.data["color"] = [
            "#1f77b4" if gender == "f" else "#ff7e0e"
            for gender in df["gender"]
        ]
    elif colorby_selector.value == 'insomnia':
        source.data["color"] = [
            "#1f77b4" if insomnia else "#ff7e0e"
            for insomnia in df["insomnia"]
        ]

    # Update x-data and axis label
    source.data["x"] = df[x_selector.value]
    p.xaxis.axis_label = x_selector.value

    # Update x-data and axis label
    source.data["y"] = df[y_selector.value]
    p.yaxis.axis_label = y_selector.value

In [68]:
colorby_selector.on_change("value", gfmt_callback)
x_selector.on_change("value", gfmt_callback)
y_selector.on_change("value", gfmt_callback)

In [69]:
gfmt_layout = bokeh.layouts.row(
    p,
    bokeh.layouts.Spacer(width=15),
    bokeh.layouts.column(
        x_selector,
        bokeh.layouts.Spacer(height=15),
        y_selector,
        bokeh.layouts.Spacer(height=15),
        colorby_selector,
    ),
)

def gfmt_app(doc):
    doc.add_root(gfmt_layout)

bokeh.io.show(gfmt_app, notebook_url=notebook_url)