In [10]:
## create two 3x20 datasets
import numpy as np

np.random.seed(234234) # random seed for consistency
 
mu_vec1 = np.array([0,0,0])
cov_mat1 = np.array([[1,0,0],[0,1,0],[0,0,1]])
class1_sample = np.random.multivariate_normal(mu_vec1, cov_mat1, 20).T
assert class1_sample.shape == (3,20), "The matrix has not the dimensions 3x20"
 
mu_vec2 = np.array([1,1,1])
cov_mat2 = np.array([[1,0,0],[0,1,0],[0,0,1]])
class2_sample = np.random.multivariate_normal(mu_vec2, cov_mat2, 20).T
assert class1_sample.shape == (3,20), "The matrix has not the dimensions 3x20"

In [12]:
## Using the whole dataset without class labels
all_samples = np.concatenate((class1_sample, class2_sample), axis=1)
assert all_samples.shape == (3,40), "The matrix has not the dimensions 3x40"

In [13]:
## d-dimensional mean vector
mean_x = np.mean(all_samples[0,:])
mean_y = np.mean(all_samples[1,:])
mean_z = np.mean(all_samples[2,:])
 
mean_vector = np.array([[mean_x],[mean_y],[mean_z]])
 
print('Mean Vector:\n', mean_vector)

Mean Vector:
 [[ 0.57786468]
 [ 0.34875596]
 [ 0.52071362]]


In [14]:
## Computing the Scatter Matrix
scatter_matrix = np.zeros((3,3))
for i in range(all_samples.shape[1]):
    scatter_matrix += (all_samples[:,i].reshape(3,1)\
         - mean_vector).dot((all_samples[:,i].reshape(3,1) - mean_vector).T)
print('Scatter Matrix:\n', scatter_matrix)  

Scatter Matrix:
 [[ 52.61055245  15.06845363   4.07624697]
 [ 15.06845363  45.56755824   0.92729735]
 [  4.07624697   0.92729735  67.84643492]]


In [15]:
## Computing the Covariance Matrix which is an alternative expression of to the scatter matrix
cov_mat = np.cov([all_samples[0,:],all_samples[1,:],all_samples[2,:]])
print('Covariance Matrix:\n', cov_mat)

Covariance Matrix:
 [[ 1.34898852  0.38637061  0.10451915]
 [ 0.38637061  1.16839893  0.02377686]
 [ 0.10451915  0.02377686  1.73965218]]


In [16]:
# eigenvectors and eigenvalues for the from the scatter matrix
eig_val_sc, eig_vec_sc = np.linalg.eig(scatter_matrix)
 
# eigenvectors and eigenvalues for the from the covariance matrix
eig_val_cov, eig_vec_cov = np.linalg.eig(cov_mat)
 
for i in range(len(eig_val_sc)):
    eigvec_sc = eig_vec_sc[:,i].reshape(1,3).T
    eigvec_cov = eig_vec_cov[:,i].reshape(1,3).T
    assert eigvec_sc.all() == eigvec_cov.all(), 'Eigenvectors are not identical'
 
    print('Eigenvector {}: \n{}'.format(i+1, eigvec_sc))
    print('Eigenvalue {} from scatter matrix: {}'.format(i+1, eig_val_sc[i]))
    print('Eigenvalue {} from covariance matrix: {}'.format(i+1, eig_val_cov[i]))
    print('Scaling factor: ', eig_val_sc[i]/eig_val_cov[i])
    print(40 * '-')

Eigenvector 1: 
[[ 0.62563478]
 [-0.77829547]
 [-0.05326609]]
Eigenvalue 1 from scatter matrix: 33.51820750301696
Eigenvalue 1 from covariance matrix: 0.8594412180260762
Scaling factor:  39.0
----------------------------------------
Eigenvector 2: 
[[-0.63625553]
 [-0.54857976]
 [ 0.54243815]]
Eigenvalue 2 from scatter matrix: 62.12738429750416
Eigenvalue 2 from covariance matrix: 1.5930098537821578
Scaling factor:  39.0
----------------------------------------
Eigenvector 3: 
[[ 0.45139786]
 [ 0.30547733]
 [ 0.83840538]]
Eigenvalue 3 from scatter matrix: 70.37895381131396
Eigenvalue 3 from covariance matrix: 1.8045885592644608
Scaling factor:  39.0
----------------------------------------


In [17]:
## Checking the eigenvector-eigenvalue calculation
for i in range(len(eig_val_sc)):
    eigv = eig_vec_sc[:,i].reshape(1,3).T
    np.testing.assert_array_almost_equal(scatter_matrix.dot(eigv), eig_val_sc[i] * eigv, 
                                         decimal=6, err_msg='', verbose=True)