# Problem Solutions

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import math
import pymc3 as pm
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
from scipy.stats import beta

### Question 2 - For the example of measurement noise in the notes, confirm that a Gamma distribution can be used as a prior for both the value of the physical property being measured and also the standard deviation of measurement noise. Propose suitable values for the parameters of these Gamma distributions.

In [None]:
measurements = np.array([
    12.5, 13.2, 15.1, 14.2, 13.5
])

In [None]:
model = pm.Model()

with model:
    
    mu = pm.Exponential("mu", 1.0 / 15.0)

    sigma = pm.Exponential('sigma', 1.0 / 1.0)
            
    observation = pm.Normal("obs", mu=mu, sigma=sigma, observed=measurements) 

In [None]:
prior_mu = mu.random(size=(100000))

prior_sigma = sigma.random(size=(100000))

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(10, 5))

axes[0].hist(prior_mu, histtype='stepfilled', bins=100, alpha=0.85, color="#467821", density=True)
axes[0].set_xlim(0, 30)
axes[0].set_title('Circumference')
axes[0].set_xlabel('Length (cm)')

axes[1].hist(prior_sigma, histtype='stepfilled', bins=100, alpha=0.85, color="#A60628", density=True)
axes[1].set_xlim(0, 10)
axes[1].set_title('Standard deviation of measurement error')
axes[1].set_xlabel('Length (cm)')

plt.tight_layout()

In [None]:
with model:
    
    trace = pm.sample(5000);

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(10, 5))

axes[0].hist(trace['mu'], histtype='stepfilled', bins=70, alpha=0.85, color="#467821", density=True)
axes[0].set_xlim(12, 16)
axes[0].set_title('Circumference')
axes[0].set_xlabel('Length (cm)')

axes[1].hist(trace['sigma'], histtype='stepfilled', bins=30, alpha=0.85, color="#A60628", density=True)
axes[1].set_xlim(0, 3)
axes[1].set_title('Standard deviation of measurement error')
axes[1].set_xlabel('Length (cm)')

plt.tight_layout()

### Question 3 - A supermarket is trying to estimate the number of customers who will visit a store each day and uses a Poisson process to model the arrival of customers. If 40 people visit the store on the first day, how many are expected to visit the store on any day? What is the uncertainty in this estimate? If on subsequent days 30, 60, and 50 customers actually do visit the store, how does the estimate of the expected number of customers change?

In [None]:
number_of_customers = 40

model = pm.Model()

with model:
    
    mu = pm.Exponential("mu", 1.0 / 40.0)
            
    observation = pm.Poisson("obs", mu=mu, observed=number_of_customers) 
    
    trace = pm.sample(5000);

In [None]:
print("Estimate of visits per hour: " + str(np.round(np.mean(trace['mu']),1)))
print("Uncertainty of error: +/- " + str(np.round(np.std(trace['mu']),1)))

In [None]:
number_of_customers = np.array([40, 30, 60, 50])

model = pm.Model()

with model:
    
    mu = pm.Exponential("mu", 1.0 / 40.0)
            
    observation = pm.Poisson("obs", mu=mu, observed=number_of_customers) 
    
    trace = pm.sample(5000);

In [None]:
print("Estimate of visits per hour: " + str(np.round(np.mean(trace['mu']),1)))
print("Uncertainty of error: +/- " + str(np.round(np.std(trace['mu']),1)))

### Question 4 - Modify the example from Session 1 to be a true mixture of two independent Gaussians, each with their own mean and variance. Generate some suitable data and show that your model is able to infer the correct properties of the two Gaussian distributions.

In [None]:
N = 100

data1 = pm.Normal.dist(mu=0, sd=4).random(size=(N//2))
data2 = pm.Normal.dist(mu=2, sd=1).random(size=(N//2))

data = np.concatenate([data1, data2])

In [None]:
plt.figure(figsize=(10, 5))

plt.hist(data, histtype='stepfilled', bins=100, alpha=0.85, color="#467821", density=True)
plt.xlim(-10, 10)

plt.tight_layout()

In [None]:
model = pm.Model()

with model:
        
    mu1 = pm.Normal("mu1", mu=0, sd=10)
    sd1 = pm.Exponential("sd1", 1.0)

    mu2 = pm.Normal("mu2", mu=0, sd=10)
    sd2 = pm.Exponential("sd2", 1.0)

    one_or_two = pm.Bernoulli("one_or_two", 0.5, shape=N)
       
    mu = pm.math.switch(one_or_two, mu1, mu2)
    sd = pm.math.switch(one_or_two, sd1, sd2)

    observation = pm.Normal("obs", mu=mu, sd=sd, observed=data)
    
    trace = pm.sample(5000);

In [None]:
mu1 = np.mean(trace['mu1'])
sd1 = np.mean(trace['sd1'])
 
mu2 = np.mean(trace['mu2'])
sd2 = np.mean(trace['sd2'])

In [None]:
print("Mean of first Gaussian: " + str(np.round(mu1, 1)))
print("Standard deviation of first Gaussian: " + str(np.round(sd1, 1)))

print("Mean of first Gaussian: " + str(np.round(mu2, 1)))
print("Standard deviation of first Gaussian: " + str(np.round(sd2, 1)))

In [None]:
STEPS = 1001

x = np.linspace(-3, 6, STEPS)

y1 = np.exp(pm.Normal.dist(mu=mu1, sd=sd1).logp(x).eval())
y2 = np.exp(pm.Normal.dist(mu=mu2, sd=sd2).logp(x).eval())

In [None]:
plt.figure(figsize=(10, 5))

plt.hist(data, histtype='stepfilled', bins=100, alpha=0.85, color="#467821", density=True)

plt.plot(x, y1 / 2, 'r')
plt.plot(x, y2 / 2, 'r')

plt.xlim(-3, 6)

plt.tight_layout()