# boh roba

In [1]:
# inclusion
import sklearn.datasets
import numpy as np
import matplotlib.pyplot as plt
import scipy.linalg as linalg
import sklearn as sk

In [2]:
# utility functions

def load_iris():
    D,L = sklearn.datasets.load_iris()['data'].T, sklearn.datasets.load_iris()['target']
    return D, L

def vcol(v):
    return np.array(v).reshape(v.size, 1)

def vrow(v):
    return np.array(v).reshape(1, v.size)


# from lab5
def logpdf_GAU_ND(x, mu, C):
    M = C.shape[0] # == C.shape[1] == mu.shape[0] == x.shape[0]
    N = x.shape[1]
    Y = np.zeros(N)  
    for i in range(N):
        (sign, log_det_C) = np.linalg.slogdet(C)
        x_i = vcol(x[:,i])
        Y[i] = (-M/2*np.log(2*np.pi) - 0.5*sign*log_det_C - 0.5*(x_i-mu).T@linalg.inv(C)@(x_i-mu)).item()
    return vcol(Y)

def loglikelyhood(x, mu, C):
    Y = logpdf_GAU_ND(x, mu, C)
    return Y.sum()

In [3]:
#split
def split(D, L, seed=0):
    nTrain = int(D.shape[1]*2.0/3.0)
    np.random.seed(seed)
    idx = np.random.permutation(D.shape[1])
    idxTrain = idx[0:nTrain]
    idxTest = idx[nTrain:]
    DTR = D[:, idxTrain]
    DTE = D[:, idxTest]
    LTR = L[idxTrain]
    LTE = L[idxTest]
    return (DTR, LTR), (DTE, LTE)

In [4]:
D, L = load_iris()
(DTR, LTR), (DTE, LTE) = split(D, L)

In [5]:
# Max Likelihood parameters:
Nc = np.unique(LTR).size
mu = np.zeros((4,Nc))
C = np.zeros((4, 4, Nc))
l = np.zeros(Nc) 
for c in range(Nc):
    DTR_c = DTR[:, LTR == c]
    mu[:,c] = DTR_c.mean(axis=1)
    C[:,:,c] = ((DTR_c - vcol(mu[:,c])) @ (DTR_c - vcol(mu[:,c])).T) / DTR_c.shape[1]
    l[c] = np.exp(loglikelyhood(DTR_c, vcol(mu[:,c]), C[:,:,c]))
    print(f"mu_{c}: {mu[:,c]}\nC_{c}:{C[:,:,c]}\n")

mu_0: [4.96129032 3.42903226 1.46451613 0.2483871 ]
C_0:[[0.13140479 0.11370447 0.02862643 0.01187305]
 [0.11370447 0.16270552 0.01844953 0.01117586]
 [0.02862643 0.01844953 0.03583767 0.00526535]
 [0.01187305 0.01117586 0.00526535 0.0108845 ]]

mu_1: [5.91212121 2.78484848 4.27272727 1.33939394]
C_1:[[0.26470156 0.09169881 0.18366391 0.05134068]
 [0.09169881 0.10613407 0.08898072 0.04211203]
 [0.18366391 0.08898072 0.21955923 0.06289256]
 [0.05134068 0.04211203 0.06289256 0.03208448]]

mu_2: [6.45555556 2.92777778 5.41944444 1.98888889]
C_2:[[0.30080247 0.08262346 0.18614198 0.04311728]
 [0.08262346 0.08533951 0.06279321 0.05114198]
 [0.18614198 0.06279321 0.18434414 0.04188272]
 [0.04311728 0.05114198 0.04188272 0.0804321 ]]



In [6]:
# likelihood:
print("likelihood: ",l)

# Score matrix
S = np.zeros((Nc,  DTE.shape[1])) # 3 classes, 50 values 

for c in range(Nc):
    S[c,:] = np.exp(logpdf_GAU_ND(DTE, vcol(mu[:,c]), C[:,:,c]).T)

# print(f"\n Score matrix:\n{S}")

likelihood:  [7.53645059e+10 5.02921185e-03 1.17632117e-13]


In [7]:
SJoint = 1/3*S
Sol_SJOINT = np.load("./SJoint_MVG.npy")

err = (np.absolute(SJoint-Sol_SJOINT)/Sol_SJOINT).sum(0).sum()
print("err: ", err, "\n")

err:  3.1551066431862095e-12 



In [8]:
SMarginal = vrow(SJoint.sum(0))

SPost = SJoint/SMarginal

Pred = np.argmax(SPost, axis=0)
print(Pred)

[0 0 1 2 2 0 0 0 1 1 0 0 1 0 2 1 2 1 0 2 0 2 0 0 2 0 2 1 1 1 2 2 2 1 0 1 2
 2 0 1 1 2 1 0 0 0 2 1 2 0]


In [9]:
# Method 1 (Creative method)
tmp = (Pred - LTE)
tmp = tmp[tmp != 0]
#print(tmp)
wrong = (tmp/tmp).sum()
err = wrong/LTE.size


# Method 2
correct = (Pred == LTE).sum()
#print(correct)
acc = correct/LTE.size

print(f"Error: {(1-acc)*100}%\nAccuracy: {(acc)*100}%")

Error: 4.0000000000000036%
Accuracy: 96.0%


In [10]:
# Re-do everithing using the log-values (I hate everithing)
# Score matrix
SLog = np.zeros((Nc,  DTE.shape[1])) # 3 classes, 50 values 

for c in range(Nc):
    SLog[c,:] = (logpdf_GAU_ND(DTE, vcol(mu[:,c]), C[:,:,c]).T)

In [11]:
LogSJoint = np.log(1/3)+SLog
Sol_LogSJOINT = np.load("./logSJoint_MVG.npy")

err = np.absolute((LogSJoint-Sol_LogSJOINT)/Sol_LogSJOINT).sum(0).sum()
print("err: ", err, )

err:  9.324424924327251e-14


In [None]:
i_l = vcol(np.argmax(LogSJoint.sum(1), axis=0)).item()
i_l_sus = (np.argmax(LogSJoint, axis=1))
l = vcol(LogSJoint[:,i_l])
LogMarginal = np.log(np.exp(np.delete(LogSJoint, i_l, axis=1)).sum(axis=0)) # doesn't work :(

In [13]:
LogPost = LogSJoint-np.log(np.exp(LogSJoint).sum(axis=0))

In [17]:
sol = np.load("./logPosterior_MVG.npy")

err =((np.exp(LogPost)-np.exp(sol))/np.exp(sol)).sum()

print(err)

-2.7792891585087076e-12


(49,)