In [None]:
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
# import arviz as az
# import pymc3 as pm

%matplotlib inline
%config InlineBackend.figure_format = 'retina'
plt.style.use(['seaborn-colorblind', 'seaborn-darkgrid'])

## Helper Functions

In [None]:
def compute_grid_approximation(prior, success=6, tosses=9):
    """
        This function helps calculate a grid approximation of the posterior distribution.
        
        Parameters:
            prior: np.array
                A distribution representing our state of knowledge before seeing the data.
                Number of items should be the same as number of grid points.
                
            success: integer
                Number of successes.
            
            tosses: integer
                Number of tosses (i.e. successes + failures).
                
        Returns: 
            p_grid: np.array
                Evenly-spaced out grid between 0 and 1.
                
            posterior: np.array
                The posterior distribution.
    """
    # define grid
    p_grid = np.linspace(0, 1, prior.shape[0])

    # compute likelihood at each point in the grid
    likelihood = stats.binom.pmf(success, tosses, p_grid)

    # compute product of likelihood and prior
    unstd_posterior = likelihood * prior

    # standardize the posterior, so it sums to 1
    posterior = unstd_posterior / unstd_posterior.sum()
    
    return p_grid, posterior, success, tosses

In [None]:
def plot_grid_approximation(p_grid, posterior, success, tosses, x_label):
    """
        This function plots a grid approximation of the posterior distribution.
    """
    plt.plot(p_grid, posterior, 'o-', label=f'Success = {success}\nTosses = {tosses}')
    plt.xlabel(x_label)
    plt.ylabel('Posterior Probability')
    plt.legend(loc=0)

## 2M1

In [None]:
# We'll create distributions with 20 points
prior = np.ones(20)
print(prior)
x_label = 'Probability of Water'
# 1
pg, po, s, t = compute_grid_approximation(prior, success=3, tosses=3)
plot_grid_approximation(pg, po, s, t, x_label)
# 2
pg, po, s, t = compute_grid_approximation(prior, 3, 4)
plot_grid_approximation(pg, po, s, t, x_label)
# 3
pg, po, s, t = compute_grid_approximation(prior, 5, 7)
plot_grid_approximation(pg, po, s, t, x_label)

## 2M2

In [None]:
# Create distributions with 20 points
p_grid = np.linspace(start=0, stop=1, num=20)
prior = np.where(p_grid < 0.5, 0, 1)
x_label = 'Probability of Water'
# 1) W, W, W
pg, po, s, t = compute_grid_approximation(prior, success=3, 3)
plot_grid_approximation(pg, po, s, t, x_label)
# 2) W, W, W, L
pg, po, s, t = compute_grid_approximation(prior, 3, 4)
plot_grid_approximation(pg, po, s, t, x_label)
# 3) L, W, W, L, W, W, W
pg, po, s, t = compute_grid_approximation(prior, 5, 7)
plot_grid_approximation(pg, po, s, t, x_label)

## 2M3

$$P(S=a \mid C_1=2) = \frac{P(C_1=2 \mid S=a)P(S=a)}{P(C_1=2)} = \frac{0.05}{0.05+0.1} = \frac{1}{3}$$
$$P(S=b \mid C_1=2) = 1 - P(S=a \mid C_1=2)=\frac{2}{3}$$
$$P(C_2=2 \mid C_1=2) = \sum_S P(C_2=2, S, \mid C_1=2)\\
= P(C_2=2, S=a \mid C_1=2) + P(C_2=2, S=b \mid C_1=2)\\
= P(C_2=2 \mid S=a, C_1=2) \cdot P(S=a \mid C_1=2 \quad+ P(C_2=2 \mid S=b, C_1=2) \cdot P(S=b \mid C_1=2)\\
= P(C_2=2 \mid S=a) \cdot P(S=a \mid C_1=2) \quad+ P(C_2=2 \mid S=b) \cdot P(S=b \mid C_1=2) \text{Conditional independence. See Note below}\\
= \frac{1}{10} \times \frac{1}{3} + \frac{2}{10} \times \frac{2}{3}\\
= \frac{1}{30} + \frac{4}{30}\\
= \frac{1}{6}$$

## 2M4

In [None]:
from __future__ import absolute_import
from __future__ import unicode_literals
from __future__ import print_function
from __future__ import division

__author__ = """Aric Hagberg (hagberg@lanl.gov)"""

from pygraphviz import *

A=AGraph()

# set some default node attributes
A.node_attr['style']='filled'
A.node_attr['shape']='circle'
A.node_attr['fixedsize']='true'
A.node_attr['fontcolor']='#FFFFFF'

# make a star in shades of red
for i in range(16):
    A.add_edge(0,i)
    n=A.get_node(i)
    n.attr['fillcolor']="#%2x0000"%(i*16)
    n.attr['height']="%s"%(i/16.0+0.5)
    n.attr['width']="%s"%(i/16.0+0.5)

print(A.string()) # print to screen
A.write("star.dot") # write to simple.dot
print("Wrote star.dot")
A.draw('star.png',prog="circo") # draw to png using circo
print("Wrote star.png")

In [None]:
A=pgv.AGraph()

A.add_edge(1,2)
A.add_edge(2,3)
A.add_edge(1,3)

print(A.string()) # print to screen
print("Wrote simple.dot")
A.write('simple.dot') # write to simple.dot

B=pgv.AGraph('simple.dot') # create a new graph from file
B.layout() # layout with default (neato)
B.draw('simple.png') # draw png
print("Wrote simple.png")

## 2M5

In [None]:
a = stats.binom.pmf(2, n=2, p=0.1)
b = stats.binom.pmf(2, n=2, p=0.2)
a*0.5+b*0.5

## 2M6

In [None]:
stats.binom.rvs(size=10, n=2, p=0.7)

## 2M7

In [None]:
dummy_w = stats.binom.rvs(size=100000, n=9, p=0.6)
# [(dummy_w == i).mean() for i in range(3)]
plt.hist(dummy_w, bins=50)

## 2H1

In [None]:
pg, po, w, n = grid_approximation(np.ones(20), 6, 9)
samples = np.random.choice(pg, p=po, size=10000, replace=True)

## 2H2

In [None]:
dummy_w = stats.binom.rvs(size=10000, n=n, p=samples)
# [(dummy_w == i).mean() for i in range(3)]
plt.hist(dummy_w, bins=50)

## 2H3

In [None]:
size = 1000
pg, po, w, n = grid_approximation(np.ones(size), 6, 9)
samples = np.random.choice(pg, p=po, size=size, replace=True)
plt.hist(samples, bins=50)

## 2H4

In [None]:
np.mean(samples < 0.2)

In [None]:
np.mean(samples > 0.8)

In [None]:
np.mean((samples < 0.8) & (samples > 0.2))

In [None]:
np.percentile(samples, 20)

In [None]:
perc_range = np.percentile(samples, [80, 100])[0]
perc_range

In [None]:
plt.plot(pg, po)
plt.xlabel('proportion of water (p)', fontsize=14)
plt.ylabel('Density', fontsize=14)
plt.xticks([0,0.25,0.50,0.75,1.00])
plt.fill_between(pg, po, where = (pg > perc_range))

In [None]:
az.hpd(samples, credible_interval=0.66)

In [None]:
np.percentile(samples, [17, 83])

In [None]:
pg, po, w, n = grid_approximation(np.ones(100), success=8, tosses=15)
plt.plot(pg, po)

In [None]:
np.random.seed(1492)
samples2 = np.random.choice(pg, p=po, size=10000, replace=True)
az.hpd(samples2, credible_interval=.9)

In [None]:
ppc = stats.binom.rvs(n=15, size=10000, p=samples2)
plt.hist(ppc, bins=50)
np.mean(ppc == 8)

In [None]:
ppc2 = stats.binom.rvs(n=9, size=10000, p=samples2)
plt.hist(ppc2, bins=50)
np.mean(ppc2 == 6)

In [None]:
prior3 = np.where(np.linspace(start=0, stop=1, num=1000) < 0.5, 0, 1)
pg3, po3, w, n = grid_approximation(prior3, success=8, tosses=15)
plt.plot(pg3, po3)

In [None]:
samples3 = np.random.choice(pg3, p=po3, size=10000, replace=True)
az.hpd(samples3, credible_interval=.9)

In [None]:
ppc3 = stats.binom.rvs(n=15, size=10000, p=samples3)
plt.hist(ppc3, bins=50)
np.mean(ppc3 == 8)

In [None]:
ppc4 = stats.binom.rvs(n=9, size=10000, p=samples3)
plt.hist(ppc4, bins=50)
np.mean(ppc4 == 6)

In [None]:
birth1 = np.array([1,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0, 0,0,0,1,1,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0, 1,1,0,1,0,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,1,1,1])
birth2 = np.array([0,1,0,1,0,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,
1,1,1,0,1,1,1,0,1,0,0,1,1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,
0,0,0,1,1,1,0,0,0,0])

In [None]:
total = len(birth1) + len(birth2)
boys = sum(birth1) + sum(birth2)
pg, po, b, n = grid_approximation(np.ones(1000), success=boys, tosses=total)
plt.plot(pg, po)

In [None]:
pg[po == max(po)]

In [None]:
samples = np.random.choice(pg, p=po, size=10000, replace=True)

In [None]:
for i in (0.5, 0.89, 0.97):
    print(f'{i}:', az.hpd(samples, credible_interval=i))

In [None]:
ppc = stats.binom.rvs(n=200, size=10000, p=samples)

In [None]:
np.mean(ppc == boys)

In [None]:
az.plot_kde(ppc)
plt.axvline(boys, color='red')

In [None]:
boys2 = sum(birth1)
pg2, po2, b2, n2 = grid_approximation(np.ones(1000), success=boys2, tosses=len(birth1))
samples2 = np.random.choice(pg2, p=po2, size=10000, replace=True)
ppc2 = stats.binom.rvs(n=100, size=10000, p=samples2)
az.plot_kde(ppc2)
plt.axvline(boys2, color='red')

In [None]:
ppc3 = stats.binom.rvs(n=100, size=10000, p=samples)
az.plot_kde(ppc3)
plt.axvline(boys2, color='red')

In [None]:
first_girls = len(birth1) - sum(birth1)
boys_after_girls = birth2[birth1 == 0]
samples4 = stats.binom.rvs(n=first_girls, size=10000, p=samples)
az.plot_kde(samples4)
plt.axvline(sum(boys_after_girls), color='red')
lower, upper = az.hpd(samples4, 0.95)
plt.axvline(lower, color='black')
plt.axvline(upper, color='black')

In [None]:
pos = np.random.uniform(-1, 1, size=(16, 10000)).sum(0)
az.plot_kde(pos)

In [None]:
pos = np.random.uniform(1, 1.1, size=(12, 10000)).prod(0)
az.plot_kde(pos)

In [None]:
big = np.random.uniform(1, 1.5, size=(12, 10000)).prod(0)
small = np.random.uniform(1, 1.01, size=(12, 10000)).prod(0))
_, ax = plt.subplots(1,2, figsize=(8,4))
az.plot_kde(big, ax=ax[0])
az.plot_kde(small, ax=ax[1])

In [None]:
pos = np.log(np.random.uniform(1, 1.5, size=(12, 10000)).prod(0))
az.plot_kde(pos)

In [None]:
sample_size = 10000
m = stats.norm.rvs(178, 20, sample_size)
s = stats.uniform.rvs(0, 50, sample_size)
prior_h = stats.norm.rvs(m, s, sample_size)
az.plot_kde(prior_h)

In [None]:
m = stats.norm.rvs(178, 100, sample_size)
s = stats.uniform.rvs(0, 50, sample_size)
prior_h = stats.norm.rvs(m, s, sample_size)
az.plot_kde(prior_h)

In [None]:
print(np.mean(prior_h < 0))
np.mean(prior_h > 272)