# Probability-based Learning (Chapter 3)

In [1]:
import numpy as np
from numpy import linalg as LA

## 3.1. Baysian Classification

Two classes are formed by the following samples

In [2]:
## Classes c1 and c2 represent the training data
c1 = np.array( [
                    [-1, 0],
                    [0,-1],
                    [-0.5, -0.5],
                    [-1.5,-1.5],
                    [-2,0],
                    [0, -2],
                    [-1, -1.3]]
             )
c2 = np.array( [
                    [1,1],
                    [1.3, 0.7],
                    [0.7, 1.3],
                    [2.5, 1],
                    [0,1]
               ]
             )

## Input vector X represent test data not seen by the model
X = np.array( [
                    [0, 0],
                    [1, 1],
                    [-1, 0],
                    [0.7, -0.2],
                    [-0.2, 1.5]
                ]
              )

In [3]:
# We determine the training sample size
sample_size = len(c1) + len(c2)

# We compute the priori probabilities P(c_k)
pc1 = len(c1) / sample_size
pc2 = len(c2) / sample_size
print('Priori Probabilities:\np(c_1) = {}\np(c_2) = {}'.format(pc1, pc2))

Priori Probabilities:
p(c_1) = 0.5833333333333334
p(c_2) = 0.4166666666666667


In [4]:
# We compute the mean vectors of the classes
m1 = c1.mean(0)
m2 = c2.mean(0)
print('Mean vectors:\nm_c1 = {}\nm_c2 = {}'.format(m1, m2))

Mean vectors:
m_c1 = [-0.85714286 -0.9       ]
m_c2 = [1.1 1. ]


In [5]:
# We compute the conditional propability p(c|c_k)
pxc1 = []
pxc2 = []
for i in X:
    pxc1 = np.append(pxc1, np.exp(-(LA.norm(i-m1)))/(np.sum(np.exp(-(LA.norm(i-m1))) + np.exp(-(LA.norm(i-m2))))))
    pxc2 = np.append(pxc2, np.exp(-(LA.norm(i-m2)))/(np.sum(np.exp(-(LA.norm(i-m1))) + np.exp(-(LA.norm(i-m2))))))
    
print('Conditional Probabilities:\np(x|c1) = {}\np(x|c2) = {}'.format(pxc1, pxc2))

Conditional Probabilities:
p(x|c1) = [0.5606375  0.07196604 0.804502   0.39118439 0.25058376]
p(x|c2) = [0.4393625  0.92803396 0.195498   0.60881561 0.74941624]


In [6]:
# We compute the posterior probability P(c_k,x) = p(c|c_k)*p(c_k)
pc1x = pxc1 * pc1
pc2x = pxc2 * pc2

print('Posterior Probabilities:\nP(c1|x) = {}\nP(c2|x) = {}'.format(pc1x, pc2x))

Posterior Probabilities:
P(c1|x) = [0.32703854 0.04198019 0.46929284 0.2281909  0.14617386]
P(c2|x) = [0.18306771 0.38668082 0.0814575  0.25367317 0.31225677]


In [7]:
# We classify two the highest probability P(c1|x) > P(c2|x), as the numerical diffenence P(c1|x) - P(c2|x)
classification = []
D = pc1x - pc2x
for d in D:
    if d > 0:
        classification.append('c1')
    else:
        classification.append('c2')
        
print('Classification of X: {}'.format(classification))

Classification of X: ['c1', 'c2', 'c1', 'c2', 'c2']


## 3.2-3. Risk-Based Classification

**NB.** Alexandros results are switched compared to ours. I suspect he did it the other way arround. I can't figure out wether we're right, but i guess we're.

In [69]:
loss_function = np.array( [ [0,1],
                            [1,0] ] )
R1 = np.array([])
R2 = np.array([])
for (i, j) in zip(pc1x, pc2x):
    R1 = np.append(R1, ((loss_function[0][0]*i) + (loss_function[0][1]*j)))
    R2 = np.append(R2, ((loss_function[1][1]*j) + (loss_function[1][0]*i)))
print('Risk function are:\nR(a1|x) = {}\nR(a2|x) = {}'.format(R1, R2))

Risk function are:
R(a1|x) = [0.18306771 0.38668082 0.0814575  0.25367317 0.31225677]
R(a2|x) = [0.32703854 0.04198019 0.46929284 0.2281909  0.14617386]


In [87]:
### I don't wether this is useful or not, however it is in ML notes but not used in exercises... yet
print('Likelihood Ratio: {}'.format((pc1x[0] / pc2x[0])
      > (((loss_function[0][1] - loss_function[1][1])*pc2x[0]) / 
         ((loss_function[1][0] - loss_function[0][0])*pc1x[0]))))

Likelihood Ratio: True


In [88]:
# We classify two the highest probability P(c1|x) > P(c2|x), as the numerical diffenence P(c1|x) - P(c2|x)
classification = []
D = R1 - R2
for d in D:
    if d > 0:
        classification.append('c1')
    else:
        classification.append('c2')
        
print('Classification of X: {}'.format(classification))

Classification of X: ['c1', 'c1', 'c2', 'c1', 'c1']


In [89]:
loss_function = np.array( [ [0.4,0.8],
                            [0.6,0.2] ] )
R1 = np.array([])
R2 = np.array([])
for (i, j) in zip(pc1x, pc2x):
    R1 = np.append(R1, ((loss_function[0][0]*i) + (loss_function[0][1]*j)))
    R2 = np.append(R2, ((loss_function[1][1]*j) + (loss_function[1][0]*i)))
print('Risk function are:\nR(a1|x) = {}\nR(a2|x) = {}'.format(R1, R2))

Risk function are:
R(a1|x) = [0.27726958 0.32613673 0.25288313 0.29421489 0.30827496]
R(a2|x) = [0.23283667 0.10252428 0.2978672  0.18764917 0.15015567]


In [90]:
# We classify two the highest probability P(c1|x) > P(c2|x), as the numerical diffenence P(c1|x) - P(c2|x)
classification = []
D = R1 - R2
for d in D:
    if d > 0:
        classification.append('c1')
    else:
        classification.append('c2')
        
print('Classification of X: {}'.format(classification))

Classification of X: ['c1', 'c1', 'c2', 'c1', 'c1']


## Gaussian Classification

In [92]:
# Our covariance matrix is Sigma_1 = Sigma_2 = Sigma = [1 0; 0 2]

covariance_matrix = np.array([ [1, 0], 
                               [0, 2]])

In [93]:
covariance_matrix

array([[1, 0],
       [0, 2]])

In [117]:
# I'm stuck... do not know how to get P(x|ck)... I get a matrix instead of an array...
pxc1 = []
pxc2 = []
for i in X:
    pxc1 = np.append(pxc1, (np.exp(-(np.transpose((i-m1))) * LA.inv(covariance_matrix)) * (i-m1)) /
                     np.sum((np.exp(-(np.transpose((i-m1))) * LA.inv(covariance_matrix)) * (i-m1)) + 
                            (np.exp(-(np.transpose((i-m2))) * LA.inv(covariance_matrix)) * (i-m2))))
    

In [118]:
pxc1

array([-0.08345628, -0.20649081, -0.19665791, -0.13166435,  0.06342381,
        0.41563058,  0.40625545,  0.1607414 ,  0.00794903, -0.04341227,
        0.00689084, -0.02768089, -0.25151235, -0.53652188, -1.19348746,
       -0.37808058, -0.32131877, -2.26402419, -0.61991139, -0.68191098])

In [131]:
v1 = c1.var(0)
v2 = c2.var(0)
0.5*(np.array( [ [(m1[0]-v1[0])*(m1[0]-v1[0])],
                 [(m1[0]-v1[0])*(m1[1]-v1[1])],
                 [(m1[1]-v1[1])*(m1[0]-v1[0])],
                 [(m1[1]-v1[1])*(m1[1]-v1[1])]])
     +
     np.array( [ [(m2[0]-v2[0])*(m2[0]-v2[0])],
                 [(m2[0]-v2[0])*(m2[1]-v2[1])],
                 [(m2[1]-v2[1])*(m2[0]-v2[0])],
                 [(m2[1]-v2[1])*(m2[1]-v2[1])]])
    )

array([[0.98331782],
       [1.14199191],
       [1.14199191],
       [1.44865208]])

In [124]:
for (i, j) in zip(m1, v1)
(m1-v1)*(m1-v1)

array([1.78685964, 1.96800816])