# Machine Learning - Exercise 2 
# Bayesian Learning

See description of the example in Russel & Norvig: Artificial Intelligence: A modern approach. Chap. 20.

In [10]:
import numpy as np
import random
from random import randrange
from datetime import datetime

## Prior knowledge

In [2]:
PH = np.array([0.1, 0.2, 0.4, 0.2, 0.1])

PdH = {}

# Probability distribution of Lime
PdH['l'] = np.array([0.0, 0.25, 0.5, 0.75, 1.0])

# Probability distribution of Cherry
PdH['c'] = 1 - PdH['l']

print('P(H) = %s' %(str(PH)))
print('P(l|H) = %s' %(str(PdH['l'])))
print('P(c|H) = %s' %(str(PdH['c'])))

# Probability of having lime (Slide 19/54 - PDF 5)
# Sum of "P(l|hi) * P(hi)""
cP = np.sum(PdH['l'] * PH)

print('P(l) = sum %s = %.3f' %(str(PdH['l'] * PH),cP))

P(H) = [0.1 0.2 0.4 0.2 0.1]
P(l|H) = [0.   0.25 0.5  0.75 1.  ]
P(c|H) = [1.   0.75 0.5  0.25 0.  ]
P(l) = sum [0.   0.05 0.2  0.15 0.1 ] = 0.500


## Dataset

In [3]:
D = ['l','l','l','l','l']

## Bayesian Learning

In [4]:
np.set_printoptions(formatter={'float': '{: 0.3f}'.format})
P = PH  # Copy P(H) into a new variable
db = '' # This is used to store the 'l' string variables each time we loop.

print('P(H)      \t= %s' %(str(PH)))

# Iteration over the "Lime" array
# Each iteration, we take into consideration the probability of extracting a lime
for d in D:
    # ---- First iteration ----
    # d1 = {Lime}
    # alpha = 1 / sum of P
    # GENERAL FORMULA ===> P(H|d1) == P({d1}|H1) * P(H1) * alpha
    # -------------------------
    P = P * PdH[d]
    alpha = 1 / np.sum(P)
    P = P * alpha
    db = db+d
    print('P(H|%s)  \t= %s' %(db,str(P)))

P(H)      	= [ 0.100  0.200  0.400  0.200  0.100]
P(H|l)  	= [ 0.000  0.100  0.400  0.300  0.200]
P(H|ll)  	= [ 0.000  0.038  0.308  0.346  0.308]
P(H|lll)  	= [ 0.000  0.013  0.211  0.355  0.421]
P(H|llll)  	= [ 0.000  0.004  0.132  0.335  0.529]
P(H|lllll)  	= [ 0.000  0.001  0.078  0.296  0.624]


## MAP hypothesis

In [5]:
# We do the argmax over the array P, which returns us the index of the best hypothesis.
i = np.argmax(P)
print('MAP hypothesis: h[%d]' %(i+1))

MAP hypothesis: h[5]


## Prediction

Probability that next candy is lime

Using MAP hypothesis

In [6]:
PlhMAP = PdH['l'][i]
print('P(l|h_MAP) = %.3f' %(PlhMAP))

P(l|h_MAP) = 1.000


Using all hypotheses

In [7]:
cP = PdH['l'] * P
PlD = np.sum(cP)
print('P(l|D) = sum %s = %.3f' %(str(cP),PlD))

P(l|D) = sum [ 0.000  0.000  0.039  0.222  0.624] = 0.886


# Home Exercise

Di : outcome of rolling a 6-faces die

Z = D1 + D2 = sum of the outcomes of rolling 2 dice

S = D1 + D2 + D3 = sum of the outcomes of rolling 3 dice

Z in [2,12], S in [3,18]

**Question 1** 

Compute

Prior: P(S)  -- 16 values summing to 1

Posterior: P( S | D1 ) -- 16 x 6 matrix (each column sums to 1)

Posterior: P( S | D1, D2 ) -- 16 x 6 x 6 matrix (each column sums to 1)

Posterior: P( S | Z ) -- 16 x 12 matrix (each column sums to 1)

In [23]:
def rollDice(dice):
    prob = [1/6, 1/6, 1/6, 1/6, 1/6, 1/6]
    sequence = np.random.choice(6, size=10, p=prob) + 1
    index = sequence[randrange(10)]
    return dice[index]

dice = np.array([1, 2, 3, 4, 5, 6])

Z = rollDice(dice) + rollDice(dice)
S = rollDice(dice) + rollDice(dice) + rollDice(dice)

# PRIOR
PS = []

# POSTERIOR


**Question 2** 

Verify experimentally that

P( S | Z, D1 ) = P ( S | Z )