# 1. We should first experiment with artificially generated data. To that end, use the numpy routine numpy.random.multivariate_normal that generates points randomly by the multivariate Gauss distribution. In that case, the subroutine for task
1 has an interface as follows:
Parameters
mean center of the Gaussian
cov covariance matrix
nx number data points
nt number test points
Returns
x set of random vectors of length nx (data points)
t set of random vectors of length nt (test points)
mu mean based on x
Sgm covariance matrix based on x

def getArtificialData(mean, cov, nx, nt):
Note that this routine calls the random.multivariate_normal twice, i.e., for the
data and test set. The vector mean and the matrix cov must be in R
D and R
D×D.
The returned values mu and Sgm are computed by (9.19) and (9.20) in Murphy’s
book. They should be similar to mean and cov, but are different, in general. This
routine must be called for each class c. To get the test points of each class into one
long vector use the numpy.concatenate function.

In [1]:
import numpy as np

def getArtificialData(mean, cov, nx, nt):
    x = np.random.multivariate_normal(mean, cov, nx)
    t = np.random.multivariate_normal(mean, cov, nt)
    mu = np.mean(x, axis=0)
    Sgm = np.cov(x, rowvar=False)
    return x, t, mu, Sgm

# example parameters for three classes
mean_class1 = [0, 0]
cov_class1 = [[2, 1], [1, 50]]
nx1, nt1 = 40, 10

mean_class2 = [7, 5]
cov_class2 = [[3, 1], [1, 3]]
nx2, nt2 = 80, 20

mean_class3 = [-5, 5]
cov_class3 = [[5, 2], [2, 3]]
nx3, nt3 = 20, 5

# generate data for class 1
x1, t1, mu1, Sgm1 = getArtificialData(mean_class1, cov_class1, nx1, nt1)

# generate data for class 2
x2, t2, mu2, Sgm2 = getArtificialData(mean_class2, cov_class2, nx2, nt2)

# generate data for class 3
x3, t3, mu3, Sgm3 = getArtificialData(mean_class3, cov_class3, nx3, nt3)

# combine the test points from both classes
test_points_combined = np.concatenate([t1, t2, t3], axis=0)


# print(x1.shape)
# print(x1)
# print(t1.shape)
# print(t1)
# print(mu1.shape)
# print(mu1)
# print(Sgm1.shape)
# print(Sgm1)
# print()
# print(x2.shape)
# print(x2)
# print(t2.shape)
# print(t2)
# print(mu2.shape)
# print(mu2)
# print(Sgm2.shape)
# print(Sgm2)
# print()
# print(x3.shape)
# print(x3)
# print(t3.shape)
# print(t3)
# print(mu3.shape)
# print(mu3)
# print(Sgm3.shape)
# print(Sgm3)
# print()
# print(test_points_combined)
# print(test_points_combined.shape)

# 2. The interface for the second task should look like this

Parameters
t points: t[n,d] is the d-th component of the n-th point
mu mean
Sgm covariance matrix
Returns
p p[n] is the probability density function at the n-th point
def evaluateMultiVarGauss(t, mu, Sgm):
This routine must be called for each class c with the same merged test points t .

In [2]:
import numpy as np

def evaluateMultiVarGauss(t, mu, Sgm):
    # the dimensionality of the multivariate Gaussian
    d = mu.shape[0]
    
    # the determinant of the covariance matrix
    det_Sgm = np.linalg.det(Sgm)
    
    # the inverse of the covariance matrix
    inv_Sgm = np.linalg.inv(Sgm)
    
    # the difference between each test point and the mean
    diff = t - mu
    
    # the Gauss component for each test point
    gauss_component = np.sum(np.dot(diff, inv_Sgm) * diff, axis=1)
    
    # the normalization factor
    normalization_factor = 1 / ((2 * np.pi) ** (d / 2) * np.sqrt(det_Sgm))
    
    # probability density function for each test point
    p = normalization_factor * np.exp(-0.5 * gauss_component)
    
    return p


# now we have test_points_combined from the previous
p_class1 = evaluateMultiVarGauss(test_points_combined, mu1, Sgm1)
p_class2 = evaluateMultiVarGauss(test_points_combined, mu2, Sgm2)
p_class3 = evaluateMultiVarGauss(test_points_combined, mu3, Sgm3)

print('The probabilities p(X = t|Y = c) for each test point and each class:: \n')
print(p_class1,'\n')
print(p_class2,'\n')
print(p_class3,'\n')
# print('\n',p_class1.shape)
# print()
# print(sum(p_class1) + sum(p_class2) + sum(p_class3))

The probabilities p(X = t|Y = c) for each test point and each class:: 

[1.73628115e-02 7.86174736e-03 1.45942874e-02 3.18112700e-03
 6.22520751e-03 6.71641633e-05 1.54355129e-02 1.36945430e-02
 7.22526475e-04 6.80775450e-03 2.73435043e-14 1.75684228e-07
 1.82939338e-14 3.85497914e-18 1.64018761e-09 3.88900810e-11
 1.50234180e-08 2.00179500e-17 4.21537111e-08 1.54639439e-14
 2.28318066e-12 7.38354364e-15 9.42937584e-19 2.81521637e-09
 7.78607511e-11 2.85580913e-11 5.45692008e-26 5.14644350e-10
 8.21796915e-08 7.38024682e-07 1.70716806e-06 1.65384727e-12
 2.68167308e-04 2.99955734e-05 1.12973820e-12] 

[8.46817501e-06 1.98188301e-05 5.59406336e-05 4.72990145e-04
 5.95393718e-09 3.04274808e-33 1.90618495e-09 6.80127736e-05
 2.32937765e-06 5.87782844e-09 3.06975441e-02 3.65181430e-03
 4.01870444e-02 3.02083366e-02 3.66307472e-02 1.86383273e-02
 2.84036647e-02 2.33340790e-02 6.30004612e-03 2.16080709e-02
 4.86273779e-02 2.61027084e-02 2.87044385e-02 1.63513217e-02
 4.54320635e-02 4.6201764

# 3. The interface for the third task should look like this

Parameters
t coordinates of the test points: t[n,d]
pXY conditional probabilities: pXY[n,c]
yEx exact labels of the test points: y[n]
def calculateTestSet(t, pXY, yEx: int):
Here pXY is a matrix whose columns are the output of evaluateMultiVarGauss.
This routine should also print the probabilities and the actual and computed labels
for each test point. It should also count the number of mislabelled points.

In [3]:
import numpy as np

def calculateTestSet(t, pXY, yEx):
    # uniform priors
    n, c = pXY.shape
    prior = 1 / c
    
    pYX_unnormalized = pXY * prior
    
    # Normalize to get proper prob
    pYX = pYX_unnormalized / np.sum(pYX_unnormalized, axis=1, keepdims=True)
    
    # Predicted class
    yPred = np.argmax(pYX, axis=1)
    
    # number of mislabeled points
    num_mislabeled = np.sum(yEx != yPred)
    
    # the results for each test point
    for i in range(len(t)):
        print(f"Test point {i+1}: True label = {yEx[i]}, Predicted label = {yPred[i]}")
        print(f"Posterior probabilities: {pYX[i]}")
    
    print(f"Number of mislabeled points: {num_mislabeled} out of {len(t)}")

# usage:
pXY_combined = np.column_stack((p_class1, p_class2, p_class3)) 
yEx_combined = np.concatenate((np.zeros(nt1), np.ones(nt2), np.ones(nt3)*2)) 
calculateTestSet(test_points_combined, pXY_combined, yEx_combined)

Test point 1: True label = 0.0, Predicted label = 0
Posterior probabilities: [8.69502941e-01 4.24073201e-04 1.30072986e-01]
Test point 2: True label = 0.0, Predicted label = 0
Posterior probabilities: [0.73715405 0.00185831 0.26098764]
Test point 3: True label = 0.0, Predicted label = 0
Posterior probabilities: [0.9566024 0.0036667 0.0397309]
Test point 4: True label = 0.0, Predicted label = 0
Posterior probabilities: [0.77078886 0.11460578 0.11460537]
Test point 5: True label = 0.0, Predicted label = 0
Posterior probabilities: [9.60780648e-01 9.18913565e-07 3.92184334e-02]
Test point 6: True label = 0.0, Predicted label = 0
Posterior probabilities: [1.00000000e+00 4.53031488e-29 4.23736868e-41]
Test point 7: True label = 0.0, Predicted label = 0
Posterior probabilities: [9.99999876e-01 1.23493448e-07 1.86381661e-11]
Test point 8: True label = 0.0, Predicted label = 0
Posterior probabilities: [0.98300435 0.00488201 0.01211364]
Test point 9: True label = 0.0, Predicted label = 0
Posteri

# Modifying the code to test tied covariances as given in question

In [4]:
#given formula for tied cov
N = nx1 + nx2 + nx3
tied_cov = (nx1 * Sgm1 + nx2 * Sgm2 + nx3 * Sgm3) / N

p_class1_tied = evaluateMultiVarGauss(test_points_combined, mu1, tied_cov)
p_class2_tied = evaluateMultiVarGauss(test_points_combined, mu2, tied_cov)
p_class3_tied = evaluateMultiVarGauss(test_points_combined, mu3, tied_cov)

pXY_combined_tied = np.column_stack((p_class1_tied, p_class2_tied, p_class3_tied))
calculateTestSet(test_points_combined, pXY_combined_tied, yEx_combined)

Test point 1: True label = 0.0, Predicted label = 0
Posterior probabilities: [9.07983250e-01 7.56087612e-05 9.19411411e-02]
Test point 2: True label = 0.0, Predicted label = 0
Posterior probabilities: [0.93429425 0.00321667 0.06248908]
Test point 3: True label = 0.0, Predicted label = 0
Posterior probabilities: [9.77036950e-01 6.15320830e-04 2.23477294e-02]
Test point 4: True label = 0.0, Predicted label = 0
Posterior probabilities: [0.97384213 0.01955782 0.00660005]
Test point 5: True label = 0.0, Predicted label = 2
Posterior probabilities: [4.64894686e-01 5.34465233e-04 5.34570849e-01]
Test point 6: True label = 0.0, Predicted label = 0
Posterior probabilities: [9.99409966e-01 5.66993423e-10 5.90033771e-04]
Test point 7: True label = 0.0, Predicted label = 0
Posterior probabilities: [9.99100572e-01 5.71200632e-05 8.42307563e-04]
Test point 8: True label = 0.0, Predicted label = 0
Posterior probabilities: [9.86850423e-01 7.78647078e-04 1.23709298e-02]
Test point 9: True label = 0.0, 

# Testing the methodology on the iris data set

In [5]:
from sklearn.datasets import load_iris
import numpy as np

def getIrisData():
    iris = load_iris()
    data = iris.data
    target = iris.target
    
    # Splitting the data into training and test sets as per the instructions
    x = np.concatenate([data[:40, :], data[50:90, :], data[100:140, :]])  #training data
    t = np.concatenate([data[40:50, :], data[90:100, :], data[140:150, :]])  #testing data
    y_train = np.concatenate([target[:40], target[50:90], target[100:140]])   #labels corresponding to training data
    y_test = np.concatenate([target[40:50], target[90:100], target[140:150]])   #labels corresponding to testing data
    
    return x, t, y_train, y_test

# Loading the data
x, t, y_train, y_test = getIrisData()


In [6]:
def calculateMeanAndCov(x, y, class_label):
    class_data = x[y == class_label, :]
    mean = np.mean(class_data, axis=0)
    cov = np.cov(class_data, rowvar=False)
    return mean, cov

# Class means and covariances
means = []
covariances = []
for class_label in np.unique(y_train):
    mean, cov = calculateMeanAndCov(x, y_train, class_label)
    means.append(mean)
    covariances.append(cov)

In [7]:
p_classes = []
for i, (mean, cov) in enumerate(zip(means, covariances)):
    p = evaluateMultiVarGauss(t, mean, cov)
    p_classes.append(p)

pXY = np.column_stack(p_classes)
print('The conditional probabilities: pXY[n,c] \n\n',pXY)
print('\n',pXY.shape)

The conditional probabilities: pXY[n,c] 

 [[7.81435194e+000 5.88902311e-023 9.35547567e-036]
 [2.93126805e-003 1.73532119e-012 4.79357840e-026]
 [2.45619345e+000 1.70664995e-019 1.93466684e-029]
 [4.81174613e-003 3.58520997e-018 6.64854132e-029]
 [9.87470311e-002 6.31867093e-021 8.39767713e-030]
 [3.45674864e+000 2.01425700e-016 2.61432439e-029]
 [3.52940179e+000 4.57657405e-026 8.55244329e-036]
 [9.29964603e+000 5.79339655e-019 1.29804288e-029]
 [1.12524756e+001 1.12468803e-025 4.38094192e-038]
 [1.57261747e+001 9.24900533e-021 1.06527756e-033]
 [7.13615519e-074 2.40751868e-001 1.31807440e-002]
 [3.98144515e-086 2.44508165e+000 1.83458137e-002]
 [8.29902570e-060 3.92526536e+000 3.08060272e-004]
 [3.39890017e-036 3.67453462e-001 5.59130953e-006]
 [1.93728765e-069 3.33068754e+000 9.94926107e-003]
 [2.67930372e-064 4.88100376e-001 6.35497100e-004]
 [2.96056406e-068 2.71070381e+000 3.38968338e-003]
 [8.99168518e-072 3.77762450e+000 5.22602840e-004]
 [1.32502087e-032 9.19610454e-003 3.957

In [8]:
calculateTestSet(t, pXY, y_test)

Test point 1: True label = 0, Predicted label = 0
Posterior probabilities: [1.00000000e+00 7.53616315e-24 1.19721709e-36]
Test point 2: True label = 0, Predicted label = 0
Posterior probabilities: [9.99999999e-01 5.92003584e-10 1.63532584e-23]
Test point 3: True label = 0, Predicted label = 0
Posterior probabilities: [1.00000000e+00 6.94835316e-20 7.87668757e-30]
Test point 4: True label = 0, Predicted label = 0
Posterior probabilities: [1.00000000e+00 7.45095414e-16 1.38173153e-26]
Test point 5: True label = 0, Predicted label = 0
Posterior probabilities: [1.00000000e+00 6.39884649e-20 8.50423252e-29]
Test point 6: True label = 0, Predicted label = 0
Posterior probabilities: [1.00000000e+00 5.82702768e-17 7.56295775e-30]
Test point 7: True label = 0, Predicted label = 0
Posterior probabilities: [1.00000000e+00 1.29669965e-26 2.42319911e-36]
Test point 8: True label = 0, Predicted label = 0
Posterior probabilities: [1.00000000e+00 6.22969576e-20 1.39579816e-30]
Test point 9: True label