In [None]:
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import arviz as az
# import pymc3 as pm

%matplotlib inline
%config InlineBackend.figure_format = 'retina'
plt.style.use(['seaborn-colorblind', 'seaborn-darkgrid'])

## Helper Functions

In [None]:
def compute_grid_approximation(prior, success=6, tosses=9):
    """
        This function helps calculate a grid approximation of the posterior distribution.
        
        Parameters:
            prior: np.array
                A distribution representing our state of knowledge before seeing the data.
                Number of items should be the same as number of grid points.
                
            success: integer
                Number of successes.
            
            tosses: integer
                Number of tosses (i.e. successes + failures).
                
        Returns: 
            p_grid: np.array
                Evenly-spaced out grid between 0 and 1.
                
            posterior: np.array
                The posterior distribution.
    """
    # define grid
    p_grid = np.linspace(0, 1, prior.shape[0])

    # compute likelihood at each point in the grid
    likelihood = stats.binom.pmf(success, tosses, p_grid)

    # compute product of likelihood and prior
    unstd_posterior = likelihood * prior

    # standardize the posterior, so it sums to 1
    posterior = unstd_posterior / unstd_posterior.sum()
    
    return p_grid, posterior, success, tosses

In [None]:
def plot_grid_approximation(p_grid, posterior, success, tosses, x_label):
    """
        This function plots a grid approximation of the posterior distribution.
    """
    plt.plot(p_grid, posterior, 'o-', label=f'Success = {success}\nTosses = {tosses}')
    plt.xlabel(x_label)
    plt.ylabel('Posterior Probability')
    plt.legend(loc=0)

## 2M1

In [None]:
# We'll create distributions with 20 points
prior = np.ones(20)
print(prior)
x_label = 'Probability of Water'
# 1
pg, po, s, t = compute_grid_approximation(prior, success=3, tosses=3)
plot_grid_approximation(pg, po, s, t, x_label)
# 2
pg, po, s, t = compute_grid_approximation(prior, 3, 4)
plot_grid_approximation(pg, po, s, t, x_label)
# 3
pg, po, s, t = compute_grid_approximation(prior, 5, 7)
plot_grid_approximation(pg, po, s, t, x_label)

## 2M2

In [None]:
# Create distributions with 20 points
p_grid = np.linspace(start=0, stop=1, num=20)
prior = np.where(p_grid < 0.5, 0, 1)
x_label = 'Probability of Water'
# 1) W, W, W
pg, po, s, t = compute_grid_approximation(prior, success=3, 3)
plot_grid_approximation(pg, po, s, t, x_label)
# 2) W, W, W, L
pg, po, s, t = compute_grid_approximation(prior, 3, 4)
plot_grid_approximation(pg, po, s, t, x_label)
# 3) L, W, W, L, W, W, W
pg, po, s, t = compute_grid_approximation(prior, 5, 7)
plot_grid_approximation(pg, po, s, t, x_label)

## 2M3

$$P(S=a \mid C_1=2) = \frac{P(C_1=2 \mid S=a)P(S=a)}{P(C_1=2)} = \frac{0.05}{0.05+0.1} = \frac{1}{3}$$
$$P(S=b \mid C_1=2) = 1 - P(S=a \mid C_1=2)=\frac{2}{3}$$
$$P(C_2=2 \mid C_1=2) = \sum_S P(C_2=2, S, \mid C_1=2)\\
= P(C_2=2, S=a \mid C_1=2) + P(C_2=2, S=b \mid C_1=2)\\
= P(C_2=2 \mid S=a, C_1=2) \cdot P(S=a \mid C_1=2 \quad+ P(C_2=2 \mid S=b, C_1=2) \cdot P(S=b \mid C_1=2)\\
= P(C_2=2 \mid S=a) \cdot P(S=a \mid C_1=2) \quad+ P(C_2=2 \mid S=b) \cdot P(S=b \mid C_1=2) \text{Conditional independence. See Note below}\\
= \frac{1}{10} \times \frac{1}{3} + \frac{2}{10} \times \frac{2}{3}\\
= \frac{1}{30} + \frac{4}{30}\\
= \frac{1}{6}$$

## 2M4

In [None]:
from __future__ import absolute_import
from __future__ import unicode_literals
from __future__ import print_function
from __future__ import division

__author__ = """Aric Hagberg (hagberg@lanl.gov)"""

from pygraphviz import *

A=AGraph()

# set some default node attributes
A.node_attr['style']='filled'
A.node_attr['shape']='circle'
A.node_attr['fixedsize']='true'
A.node_attr['fontcolor']='#FFFFFF'

# make a star in shades of red
for i in range(16):
    A.add_edge(0,i)
    n=A.get_node(i)
    n.attr['fillcolor']="#%2x0000"%(i*16)
    n.attr['height']="%s"%(i/16.0+0.5)
    n.attr['width']="%s"%(i/16.0+0.5)

print(A.string()) # print to screen
A.write("star.dot") # write to simple.dot
print("Wrote star.dot")
A.draw('star.png',prog="circo") # draw to png using circo
print("Wrote star.png")

In [None]:
A=pgv.AGraph()

A.add_edge(1,2)
A.add_edge(2,3)
A.add_edge(1,3)

print(A.string()) # print to screen
print("Wrote simple.dot")
A.write('simple.dot') # write to simple.dot

B=pgv.AGraph('simple.dot') # create a new graph from file
B.layout() # layout with default (neato)
B.draw('simple.png') # draw png
print("Wrote simple.png")

## 2M5

In [None]:
a = stats.binom.pmf(2, n=2, p=0.1)
b = stats.binom.pmf(2, n=2, p=0.2)
a*0.5+b*0.5

## 2M6

In [None]:
stats.binom.rvs(size=10, n=2, p=0.7)

## 2M7

In [None]:
dummy_w = stats.binom.rvs(size=100000, n=9, p=0.6)
# [(dummy_w == i).mean() for i in range(3)]
plt.hist(dummy_w, bins=50)

## 2H1

In [None]:
pg, po, w, n = grid_approximation(np.ones(20), 6, 9)
samples = np.random.choice(pg, p=po, size=10000, replace=True)

## 2H2

In [None]:
dummy_w = stats.binom.rvs(size=10000, n=n, p=samples)
# [(dummy_w == i).mean() for i in range(3)]
plt.hist(dummy_w, bins=50)

## 2H3

In [None]:
np.random.seed(3)
size = 1000
prior = np.ones(size)
pg, po, s, t = compute_grid_approximation(prior, success=6, tosses=9)
samples = np.random.choice(pg, p=po, size=size, replace=True)
plot_interval(samples, right=0)

## 2H4

In [None]:
np.mean(samples < 0.2)
plot_interval(samples, right=0.2)

In [None]:
def plot_interval(samples, left=0, right=1, xvar=None):
    fig, ax = plt.subplots()
    ax.axvspan(left, right, facecolor='grey', alpha=0.35)
    ax.hist(samples, bins=100)
    ax.set_xlim(0, 1)
    yt = list((str(int(i)) for i in ax.get_yticks()))
    yt[0] = None
    ax.set_yticklabels(yt)
    plt.xlabel(f'Probability of {xvar}')
    plt.ylabel('Frequency')

In [None]:
print(np.mean(samples > 0.8))

plot_interval(samples, left=0.8)

In [None]:
np.mean((samples > 0.2) & (samples < 0.8))
plot_interval(samples, left=0.2, right=0.8)

In [None]:
value = np.percentile(samples, 20)
plot_interval(samples, right=value)
print(value)

In [None]:
value = np.percentile(samples, 100-20)
plot_interval(samples, left=value)
value

In [None]:
plt.plot(pg, po)
plt.xlabel('proportion of water (p)', fontsize=14)
plt.ylabel('Density', fontsize=14)
plt.xticks([0,0.25,0.50,0.75,1.00])
plt.fill_between(pg, po, where = (pg > perc_range))

In [None]:
az.hpd(samples, credible_interval=0.66)

In [None]:
values = az.hdi(samples, hdi_prob=0.66)
plot_interval(samples, left=values[0], right=values[1])
values

In [None]:
values = np.percentile(samples, [17, 100-17])
plot_interval(samples, left=values[0], right=values[1])
values

In [None]:
np.random.seed(3)
size = 1000
prior = np.ones(size)
pg, po, s, t = compute_grid_approximation(prior, success=8, tosses=15)
samples = np.random.choice(pg, p=po, size=size, replace=True)
plt.hist(samples, bins=50)

In [None]:
np.random.seed(3)
size = 1000
prior = np.ones(size)
pg, po, s, t = compute_grid_approximation(prior, success=8, tosses=15)
plt.plot(pg, po)
plt.xlabel('Probability of Water')
plt.ylabel('Density')

In [None]:
np.random.seed(3)
samples = np.random.choice(pg, p=po, size=10000, replace=True)
values = az.hdi(samples, hdi_prob=0.9)
print(values)
plot_interval(samples, left=values[0], right=values[1])

In [None]:
def plot_ppc(ppc, value=None, xvar=None, line=None):
    fig, ax = plt.subplots()
    if value:
        ax.axvspan(value - 0.5, value + 0.5, facecolor='grey', alpha=0.35)
    ax.hist(ppc, bins=100)
    if line:
        plt.axvline(line, color='#d55e00', linewidth=3, label='Actual # of Boys')
        plt.legend()
    plt.xlabel(f'# of {xvar} Observations')
    plt.ylabel('Frequency')

In [None]:
ppc = stats.binom.rvs(n=15, size=10000, p=samples)

plot_ppc(ppc, 8, var)
np.mean(ppc == 8)

In [None]:
ppc = stats.binom.rvs(n=9, size=10000, p=samples)
print(np.mean(ppc == 6))
plot_ppc(ppc, 6)

In [None]:
np.random.seed(3)
size = 1000
prior = np.where(np.linspace(start=0, stop=1, num=size) < 0.5, 0, 1)
pg, po, s, t = compute_grid_approximation(prior, success=8, tosses=15)
plt.plot(pg, po)
plt.xlabel('Probability of Water')
plt.ylabel('Density')

In [None]:
samples = np.random.choice(pg, p=po, size=10000, replace=True)
values = az.hdi(samples, hdi_prob=0.9)
print(values)
plot_interval(samples, left=values[0], right=values[1])

In [None]:
ppc = stats.binom.rvs(n=15, size=10000, p=samples)
plot_ppc(ppc, 8)
np.mean(ppc == 8)



In [None]:
ppc = stats.binom.rvs(n=9, size=10000, p=samples)
plot_ppc(ppc, 6)
np.mean(ppc == 6)

In [None]:
birth1 = (1,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0, 0,0,0,1,1,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0, 1,1,0,1,0,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,1,1,1)
birth2 = (0,1,0,1,0,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,1,0,1,1,1,0,1,0,0,1,1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,0,0,0,1,1,1,0,0,0,0)

In [None]:
np.random.seed(3)
var = 'Boy'
orange='#d55e00'
total = len(birth1 + birth2)
boys = sum(birth1 + birth2)
prior = np.ones(1000)
pg, po, b, n = compute_grid_approximation(prior, success=boys, tosses=total)
value = pg[po == max(po)]
print(value)

plt.plot(pg, po)
plt.axvline(value, color=orange, label='Maximum value')
plt.xlabel(f'Probability of {var}')
plt.ylabel('Density')
plt.legend()

In [None]:
pg[po == max(po)]

In [None]:
samples = np.random.choice(pg, p=po, size=10000, replace=True)

In [None]:
np.random.seed(3)
samples = np.random.choice(pg, p=po, size=10000, replace=True)
for i in (0.5, 0.89, 0.97):
    values = az.hdi(samples, hdi_prob=i)
    print(values)
    plot_interval(samples, left=values[0], right=values[1], xvar=var)

In [None]:
ppc = stats.binom.rvs(n=200, size=10000, p=samples)

In [None]:
np.mean(ppc == boys)

In [None]:
ppc = stats.binom.rvs(n=200, size=10000, p=samples)
plot_ppc(ppc, value=None, xvar=var)
plt.axvline(boys, color=orange, linewidth=3, label='Actual # of Boys')
plt.legend()

In [None]:
boys_1 = sum(birth1)
ppc = stats.binom.rvs(n=100, p=samples, size=10000)
plot_ppc(ppc, value=None, xvar=var)
plt.axvline(boys_1, color=orange, linewidth=3, label='Actual # of Boys')
plt.legend()

In [None]:
girls_1 = len(birth1) - sum(birth1)
girl1_boy2 = list(zip(birth1, birth2)).count((0,1))
ppc = stats.binom.rvs(n=girls_1, p=samples, size=10000)
plot_ppc(ppc, value=None, xvar=var)
plt.axvline(girl1_boy2, color=orange, linewidth=3, label='Actual # of Boys')
plt.legend()

In [None]:
ppc3 = stats.binom.rvs(n=100, size=10000, p=samples)
az.plot_kde(ppc3)
plt.axvline(boys2, color='red')

In [None]:
girls_1 = len(birth1) - sum(birth1)
girl1_boy2 = list(zip(birth1, birth2)).count((0,1))
ppc = stats.binom.rvs(n=girls_1, p=samples, size=10000)
plot_ppc(ppc, value=None, xvar=var)
plt.axvline(girl1_boy2, color=orange, linewidth=3, label='Actual # of Boys')
plt.legend()

In [None]:
pos = np.random.uniform(-1, 1, size=(16, 10000)).sum(0)
az.plot_kde(pos)

In [None]:
pos = np.random.uniform(1, 1.1, size=(12, 10000)).prod(0)
az.plot_kde(pos)

In [None]:
big = np.random.uniform(1, 1.5, size=(12, 10000)).prod(0)
small = np.random.uniform(1, 1.01, size=(12, 10000)).prod(0))
_, ax = plt.subplots(1,2, figsize=(8,4))
az.plot_kde(big, ax=ax[0])
az.plot_kde(small, ax=ax[1])

In [None]:
pos = np.log(np.random.uniform(1, 1.5, size=(12, 10000)).prod(0))
az.plot_kde(pos)

In [None]:
sample_size = 10000
m = stats.norm.rvs(178, 20, sample_size)
s = stats.uniform.rvs(0, 50, sample_size)
prior_h = stats.norm.rvs(m, s, sample_size)
az.plot_kde(prior_h)

In [None]:
m = stats.norm.rvs(178, 100, sample_size)
s = stats.uniform.rvs(0, 50, sample_size)
prior_h = stats.norm.rvs(m, s, sample_size)
az.plot_kde(prior_h)

In [None]:
print(np.mean(prior_h < 0))
np.mean(prior_h > 272)