# Chapter 2: Probability Distributions

In [1]:
import sys
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(1234)
# import the prml package
sys.path.append('F:\\fci\\Pattern Recognation\\Week 03 - Introduction')


## Bernoulli Distribution - Maximum Likelihood Estimation

In [2]:
from prml.rv import Bernoulli

# create the dataset
dataset = np.array([1., 1., 1., 1., 1., 1., 0, 0, 0, 0])

# fit a Bernoulli model
model = Bernoulli()
model.fit(dataset)

print(model)  # prints mu, which is the probability that we get x=1

Bernoulli(
    mu=0.6
)


In [6]:
num_exper = 1000

samples = model.draw(num_exper)

count_0 = np.count_nonzero(samples == 0.)
count_1 = np.count_nonzero(samples == 1.)

prop_0 = count_0 / num_exper  # Proportion of zeros
prop_1 = count_1 / num_exper  # Proportion of ones

print("The proportion of zeros: ", prop_0)
print("The proportion of ones: ", prop_1)


The proportion of zeros:  0.413
The proportion of ones 0.587


#### What if we have a dataset like this dataset = [1., 1., 1., 1.]

In [8]:
# create the dataset
dataset = np.array([1., 1., 1., 1.])

model = Bernoulli()
model.fit(dataset)

num_exper = 1000

samples = model.draw(num_exper)

count_0 = np.count_nonzero(samples == 0.)
count_1 = np.count_nonzero(samples == 1.)

prop_0 = count_0 / num_exper  # Proportion of zeros
prop_1 = count_1 / num_exper  # Proportion of ones

print("The proportion of zeros: ", prop_0)
print("The proportion of ones: ", prop_1)


The proportion of zeros:  0.0
The proportion of ones:  1.0


## Bernoulli Distribution - Bayesian Estimation using Beta as a prior

In [13]:
from prml.rv import Bernoulli
from prml.rv import Beta
np.random.seed(1234)

# create the dataset
dataset = np.array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

x = np.linspace(0, 1, 100)
beta = Beta(3, 2)
pdf_beta = beta.pdf(x)

# fit a Bernoulli model
model = Bernoulli(mu=beta)
model.fit(dataset)
pdf_bern = model.mu.pdf(x)

print(model)

Bernoulli(
    mu=Beta(
    )
)


In [15]:
num_exper = 1000

samples = model.draw(num_exper)

count_0 = np.count_nonzero(samples == 0.)
count_1 = np.count_nonzero(samples == 1.)

prop_0 = count_0 / num_exper  # Proportion of zeros
prop_1 = count_1 / num_exper  # Proportion of ones

print("The proportion of zeros: ", prop_0)
print("The proportion of ones: ", prop_1)


The proportion of zeros:  0.195
The proportion of ones:  0.805


## Multinomial Distribution - Maximum Likelihood Estimation

In [19]:
from prml.rv import Categorical

model = Categorical()
dataset = np.array([
    [1, 0, 0],
    [0, 1, 0],
    [1, 0, 0],
    [0, 0, 1],
    [0, 1, 0],
    [1, 0, 0],
    [0, 0, 1],
    [0, 0, 1],
    [0, 1, 0],
    [0, 0, 1]
])

model.fit(dataset)  # computes the mu values

print(model)

Categorical(
    mu=[0.3 0.3 0.4]
)


In [22]:
num_exper = 1000

samples = model.draw(num_exper)

# compute how many each feature occurs
x_1 = samples[:, 0].sum()
x_2 = samples[:, 1].sum()
x_3 = samples[:, 2].sum()

print(x_1/num_exper)
print(x_2/num_exper)
print(x_3/num_exper)


0.306
0.283
0.411


In [23]:
model = Categorical()

bad_dataset = np.array([
    [0, 0, 1],
    [0, 0, 1],
    [0, 0, 1],
    [0, 0, 1],
    [0, 0, 1],
    [0, 0, 1],
    [0, 0, 1],
    [0, 0, 1],
    [0, 0, 1],
    [0, 0, 1],
])

model.fit(bad_dataset)

print(model)


Categorical(
    mu=[0. 0. 1.]
)


In [15]:
# perform 1000 experiments
# np.random.seed(1234)
num_exper = 1000
samples = []
for i in range(num_exper):
    samples.append(model.draw())  # draw a random sample

# this converts the list into an array of size 100*1*3
samples = np.array(samples)

# compute how many each feature occurs
x_1 = samples[:, :, 0].sum()
x_2 = samples[:, :, 1].sum()
x_3 = samples[:, :, 2].sum()
print(x_1/num_exper)
print(x_2/num_exper)
print(x_3/num_exper)


0.0
0.0
1.0


## Multinomial Distribution - Bayesian Estimation using Dirichlet distribution as a prior

In [26]:
from prml.rv import Dirichlet

alphas = np.array([2, 3, 5])
dir = Dirichlet(alphas)
model = Categorical(mu=dir)
print(model)

# fit the data on the same inbalanced dataset
model.fit(bad_dataset)
print(model)

Categorical(
    mu=Dirichlet(
        alpha=[2 3 5]
    )
)
Categorical(
    mu=Dirichlet(
        alpha=[ 2  3 15]
    )
)


In [27]:
# normalize the mu vector to sum up to 1
mu = np.array([2, 3, 15])
mu_norm = mu/mu.sum(axis=0, keepdims=0)
print(mu_norm)

model = Categorical(mu=mu_norm)
print(model)

num_exper = 1000

preds = model.draw(num_exper)

x_1 = preds[:, 0].sum()
x_2 = preds[:, 1].sum()
x_3 = preds[:, 2].sum()
print(x_1/num_exper)
print(x_2/num_exper)
print(x_3/num_exper)


[0.1  0.15 0.75]
Categorical(
    mu=[0.1  0.15 0.75]
)
0.098
0.144
0.758
