# boh roba

In [1]:
# inclusion
import sklearn.datasets
import numpy as np
import matplotlib.pyplot as plt
import scipy.linalg as linalg
import sklearn as sk

In [2]:
# utility functions

def load_iris():
    D,L = sklearn.datasets.load_iris()['data'].T, sklearn.datasets.load_iris()['target']
    return D, L

def vcol(v):
    return np.array(v).reshape(v.size, 1)

def vrow(v):
    return np.array(v).reshape(1, v.size)


# from lab5
def logpdf_GAU_ND(x, mu, C):
    M = C.shape[0] # == C.shape[1] == mu.shape[0] == x.shape[0]
    N = x.shape[1]
    Y = np.zeros(N)  
    for i in range(N):
        (sign, log_det_C) = np.linalg.slogdet(C)
        x_i = vcol(x[:,i])
        Y[i] = (-M/2*np.log(2*np.pi) - 0.5*sign*log_det_C - 0.5*(x_i-mu).T@linalg.inv(C)@(x_i-mu)).item()
    return vcol(Y)

def loglikelyhood(x, mu, C):
    Y = logpdf_GAU_ND(x, mu, C)
    return Y.sum()

# get score for gaussian distributed CV
def gauss_score_matrix(D, Nc, mu, C):
    # Score matrix
    S = np.zeros((Nc,  D.shape[1]))

    for c in range(Nc):
        S[c,:] = np.exp(logpdf_GAU_ND(D, vcol(mu[:,c]), C[:,:,c]).T)  
    return S

In [3]:
#split
def split(D, L, seed=0):
    nTrain = int(D.shape[1]*2.0/3.0)
    np.random.seed(seed)
    idx = np.random.permutation(D.shape[1])
    idxTrain = idx[0:nTrain]
    idxTest = idx[nTrain:]
    DTR = D[:, idxTrain]
    DTE = D[:, idxTest]
    LTR = L[idxTrain]
    LTE = L[idxTest]
    return (DTR, LTR), (DTE, LTE)

In [4]:
D, L = load_iris()
(DTR, LTR), (DTE, LTE) = split(D, L)

In [5]:
# Max Likelihood parameters:
Nc = np.unique(LTR).size
mu = np.zeros((4,Nc))
C = np.zeros((4, 4, Nc))
l = np.zeros(Nc) 
for c in range(Nc):
    DTR_c = DTR[:, LTR == c]
    mu[:,c] = DTR_c.mean(axis=1)
    C[:,:,c] = ((DTR_c - vcol(mu[:,c])) @ (DTR_c - vcol(mu[:,c])).T) / DTR_c.shape[1]
    l[c] = np.exp(loglikelyhood(DTR_c, vcol(mu[:,c]), C[:,:,c]))
    print(f"mu_{c}: {mu[:,c]}\nC_{c}:{C[:,:,c]}\n")

mu_0: [4.96129032 3.42903226 1.46451613 0.2483871 ]
C_0:[[0.13140479 0.11370447 0.02862643 0.01187305]
 [0.11370447 0.16270552 0.01844953 0.01117586]
 [0.02862643 0.01844953 0.03583767 0.00526535]
 [0.01187305 0.01117586 0.00526535 0.0108845 ]]

mu_1: [5.91212121 2.78484848 4.27272727 1.33939394]
C_1:[[0.26470156 0.09169881 0.18366391 0.05134068]
 [0.09169881 0.10613407 0.08898072 0.04211203]
 [0.18366391 0.08898072 0.21955923 0.06289256]
 [0.05134068 0.04211203 0.06289256 0.03208448]]

mu_2: [6.45555556 2.92777778 5.41944444 1.98888889]
C_2:[[0.30080247 0.08262346 0.18614198 0.04311728]
 [0.08262346 0.08533951 0.06279321 0.05114198]
 [0.18614198 0.06279321 0.18434414 0.04188272]
 [0.04311728 0.05114198 0.04188272 0.0804321 ]]



In [6]:
# likelihood:
print("likelihood: ",l)

# Score matrix
S = np.zeros((Nc,  DTE.shape[1])) # 3 classes, 50 values 

for c in range(Nc):
    S[c,:] = np.exp(logpdf_GAU_ND(DTE, vcol(mu[:,c]), C[:,:,c]).T)

# print(f"\n Score matrix:\n{S}")

likelihood:  [7.53645059e+10 5.02921185e-03 1.17632117e-13]


In [7]:
SJoint = 1/3*S
Sol_SJOINT = np.load("./SJoint_MVG.npy")

err = (np.absolute(SJoint-Sol_SJOINT)/Sol_SJOINT).sum(0).sum()
print("err: ", err, "\n")

err:  3.1551066431862095e-12 



In [8]:
SMarginal = vrow(SJoint.sum(0))

SPost = SJoint/SMarginal

Pred = np.argmax(SPost, axis=0)
print(Pred)

[0 0 1 2 2 0 0 0 1 1 0 0 1 0 2 1 2 1 0 2 0 2 0 0 2 0 2 1 1 1 2 2 2 1 0 1 2
 2 0 1 1 2 1 0 0 0 2 1 2 0]


In [9]:
# Method 1 (Creative method)
tmp = (Pred - LTE)
tmp = tmp[tmp != 0]
#print(tmp)
wrong = (tmp/tmp).sum()
err = wrong/LTE.size


# Method 2
correct = (Pred == LTE).sum()
#print(correct)
acc = correct/LTE.size

print(f"Error: {(1-acc)*100}%\nAccuracy: {(acc)*100}%")

Error: 4.0000000000000036%
Accuracy: 96.0%


In [10]:
# Re-do everithing using the log-values (I hate everithing)
# Score matrix
SLog = np.zeros((Nc,  DTE.shape[1])) # 3 classes, 50 values 

for c in range(Nc):
    SLog[c,:] = (logpdf_GAU_ND(DTE, vcol(mu[:,c]), C[:,:,c]).T)

In [11]:
LogSJoint = np.log(1/3)+SLog
Sol_LogSJOINT = np.load("./logSJoint_MVG.npy")

err = np.absolute((LogSJoint-Sol_LogSJOINT)/Sol_LogSJOINT).sum(0).sum()
print("err: ", err, )

err:  9.324424924327251e-14


In [12]:
i_l = vcol(np.argmax(LogSJoint.sum(1), axis=0)).item()
i_l_sus = (np.argmax(LogSJoint, axis=1))
l = vcol(LogSJoint[:,i_l])
LogMarginal = np.log(np.exp(np.delete(LogSJoint, i_l, axis=1)).sum(axis=0)) # doesn't work :(

In [13]:
LogPost = LogSJoint-np.log(np.exp(LogSJoint).sum(axis=0))

In [14]:
sol = np.load("./logPosterior_MVG.npy")

err =((np.exp(LogPost)-np.exp(sol))/np.exp(sol)).sum()

print(err)

-2.7792891585087076e-12


# Naive Bayes Gaussian Classifier

We now consider the Naive Bayes version of the classifier. As we have seen, the Naive Bayes version of the MVG is simply a Gaussian classifier where the covariance matrices are diagonal. The ML solution for the mean parameters is the same, whereas the ML solution for the covariance matrices is

$$
 \textnormal{diag}(\Sigma^*_c) = \text{diag} \left[ \frac{1}{N_c} \sum_i (x_{c,i} - \mu^*_c)(x_{c,i} - \mu^*_c)^T \right]
$$

i.e., the diagonal of the ML solution for the MVG model. Implement the Naive Bayes classifier.

**NOTE:** since the number of features is small, we can adapt the MVG code by simply zeroing the out-of-diagonal elements of the MVG ML solution. This can be done, for example, multiplying element-wise the MVG ML solution with the identity matrix. The rest of the code remains unchanged. If we have large dimensional data, it may be advisable to implement ad-hoc functions to work directly with just the diagonal of the covariance matrices (we won’t do this in this course).

The accuracy for the Naive Bayes classifier should be again 96.0% for this dataset. The **Solution** folder contains all the intermediate results, both in the likelihood and in the log-likelihood domain.


In [15]:
# Note NB => Naive Bayes
NBC = np.zeros((4,4,3))
for i in range(3):
    NBC[:,:,i]= C[:,:,i] * np.eye(4, 4)
    print(f"NBC_{i}:\n{NBC[:,:,i]}")


NBC_0:
[[0.13140479 0.         0.         0.        ]
 [0.         0.16270552 0.         0.        ]
 [0.         0.         0.03583767 0.        ]
 [0.         0.         0.         0.0108845 ]]
NBC_1:
[[0.26470156 0.         0.         0.        ]
 [0.         0.10613407 0.         0.        ]
 [0.         0.         0.21955923 0.        ]
 [0.         0.         0.         0.03208448]]
NBC_2:
[[0.30080247 0.         0.         0.        ]
 [0.         0.08533951 0.         0.        ]
 [0.         0.         0.18434414 0.        ]
 [0.         0.         0.         0.0804321 ]]


In [22]:
# calculate S and S marginal
NGS = gauss_score_matrix(DTE, 3, mu, NBC)
NGS_joint = 1/3*NGS

NGS_marginal = vrow(NGS_joint.sum(0))

NGS_Post = NGS_joint/NGS_marginal

NGS_Pred = np.argmax(NGS_Post, axis=0)


In [23]:
NGS_Pred

array([0, 0, 1, 2, 2, 0, 0, 0, 1, 1, 0, 0, 1, 0, 2, 1, 2, 1, 0, 2, 0, 2,
       0, 0, 2, 0, 2, 1, 1, 1, 2, 2, 1, 2, 0, 1, 2, 2, 0, 1, 1, 2, 1, 0,
       0, 0, 2, 1, 2, 0])

In [24]:
# Accuracy for Naive Bayes
NB_acc = (NGS_Pred[NGS_Pred == LTE].shape[0])/LTE.shape[0]
print(f"accuracy with Naive Bayes Gaussian classifier: {NB_acc*100}%")

accuracy with Naive Bayes Gaussian classifier: 96.0%


# Tied Covariance Gaussian Classifier

We now consider the Tied covariance version of the classifier. In this case, the class covariance matrices are tied, with $\Sigma_c = \Sigma$. We have seen that the ML solution for the class means is again the same. The ML solution for the covariance matrix is given by the empirical within-class covariance matrix

$$
\Sigma^* = \frac{1}{N} \sum_c \sum_i (x_{c,i} - \mu^*_c)(x_{c,i} - \mu^*_c)^T
$$

Compute the ML solution for the model. Remember that we have already computed within-class covariance matrices when we implemented LDA. Alternatively, we can observe that 

$$
\Sigma^* = \frac{1}{N} \sum_c N_c \Sigma^*_c
$$

where $\Sigma^*_c$ is the ML solution for class $c$ for the MVG classifier.

You should obtain

$$
\Sigma^* =
\begin{bmatrix}
0.23637589 & 0.09525344 & 0.1364944 & 0.03614529 \\
0.09525344 & 0.11618517 & 0.05768855 & 0.0357726 \\
0.1364944 & 0.05768855 & 0.14992811 & 0.03764588 \\
0.03614529 & 0.0357726 & 0.03764588 & 0.04291763 \\
\end{bmatrix}
$$

The accuracy for the tied covariance classifier should be 98.0% for this dataset. Again, the **Solution** folder contains all the intermediate results, both in the likelihood and in the log-likelihood domain.


In [21]:
CT = np.zeros((4,4))
for c in range(3):
    DTR_c = DTR[:, LTR == c]
    CT += C[:,:,c] * DTR_c.shape[1]

CT /= DTR.shape[1]

print(f"CT:\n{CT}")
CT_Tensor = np.zeros((4,4,3))
for i in range(3):
    CT_Tensor[:,:,i] = CT

TS = gauss_score_matrix(DTE, 3, mu, CT_Tensor)
TS_joint = 1/3*TS

TS_marginal = vrow(TS_joint.sum(0))

TS_Post = TS_joint/TS_marginal

TS_Pred = np.argmax(TS_Post, axis=0)

T_acc = (TS_Pred[TS_Pred == LTE].shape[0])/LTE.shape[0]
print(f"accuracy with tied Gaussian classifier: {T_acc*100}%")

CT:
[[0.23637589 0.09525344 0.1364944  0.03614529]
 [0.09525344 0.11618517 0.05768855 0.0357726 ]
 [0.1364944  0.05768855 0.14992811 0.03746458]
 [0.03614529 0.0357726  0.03746458 0.04291763]]
accuracy with tied Gaussian classifier: 98.0%


## Binary task: log-likelihood ratios and MVG
We now focus on the same binary task we employed for LDA (see Laboratory 3), which requries classi-
fying only two kinds of flowers, iris versicolor and iris virginica. You can refer to Laboratory 2 to build
the 2-class dataset.

Although we could proceed in the same way as for the multiclass iris problem, for binary tasks we have
seen that we can cast the classification as a comparison of a score, the log-likelihood ratio, with a thresh-
old t that depends on class priors

In [25]:
def load_iris_sklearn():
    return sklearn.datasets.load_iris()['data'].T, sklearn.datasets.load_iris()['target']
DIris, LIris = load_iris_sklearn()
D = DIris[:, LIris != 0]
L = LIris[LIris != 0]

In [26]:
def split_db_2to1(D, L, seed=0):
    nTrain = int(D.shape[1]*2.0/3.0)
    np.random.seed(seed)
    idx = np.random.permutation(D.shape[1])
    idxTrain = idx[0:nTrain]
    idxTest = idx[nTrain:]
    DTR = D[:, idxTrain]
    DVAL = D[:, idxTest]
    LTR = L[idxTrain]
    LVAL = L[idxTest]
    return (DTR, LTR), (DVAL, LVAL)

In [52]:
# DTR and LTR are model training data and labels
# DVAL and LVAL are validation data and labels
(DTR, LTR), (DVAL, LVAL) = split_db_2to1(D, L)

# we assume that classe 2 is true
true_class = 2
# and 1 is false
false_class = 1

# mu and C computed with max likelihood method
mu_1 = DTR[:, LTR == 1].mean(axis=1)
mu_2 = DTR[:, LTR == 2].mean(axis=1)

C_1 = ((DTR[:, LTR == 1] - vcol(mu_1)) @ (DTR[:, LTR == 1] - vcol(mu_1)).T)/DTR[:, LTR == 1].shape[1]
C_2 = ((DTR[:, LTR == 2] - vcol(mu_2)) @ (DTR[:, LTR == 2] - vcol(mu_2)).T)/DTR[:, LTR == 1].shape[1]

# score computations
Score = np.zeros((2, DVAL.shape[1]))

Score[0,:] = vrow(logpdf_GAU_ND(DVAL, vcol(mu_1), C_1))
Score[1,:] = vrow(logpdf_GAU_ND(DVAL, vcol(mu_2), C_2))

llr = Score[1,:] - Score[0,:]


In [53]:
llr

array([-12.17161321,   5.62521909, -12.01169368,   8.25893696,
       -11.59666694, -17.36267605,  -9.07501906,   6.98399727,
        15.17586183,   0.68668463,   1.10299881,  22.67572956,
        12.97330375,   2.10622477, -10.79680446,  -8.15595229,
        -1.0132232 ,  -8.83632766,  -7.76493548,  17.21681417,
         7.31573716, -12.8663077 ,   2.32407629,  14.26593151,
         2.39028403,  -8.15262229, -11.90205646,  -1.37772105,
        -5.20559802,   7.04936964,   3.31844988,  28.86231038,
       -10.43354846,  -7.41289129])

In [55]:
# A priori probability for class 1 and class 2 are the same => threshold = 0
L_pred = np.zeros(LVAL.shape[0])
i = 0
for p in llr:
    L_pred[i] = 1 if p < 0 else 2
    i += 1

rel_class_err = np.abs(L_pred-LVAL).sum()/LVAL.shape[0]
print(f"Relative % classification error = {rel_class_err*100}%\n")

Relative % classification error = 8.823529411764707%

