# Mean/Covariance of a data set and effect of linear transformation

In [11]:
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
matplotlib.style.use('fivethirtyeight')
from sklearn.datasets import fetch_lfw_people, fetch_olivetti_faces
import time
import timeit

In [12]:
%matplotlib inline
from ipywidgets import interact

In [19]:
import numpy.testing as npt

## Data

In [20]:
#Let's create some data with five variables (D0, D1,...,D4) and two observations (n0,n1)

X = np.array([[1,2,4,6,6], [1,2,3,4,5]])
X

array([[1, 2, 4, 6, 6],
       [1, 2, 3, 4, 5]])

In [21]:
N, D = X.shape
X
D,N

(5, 2)

In [22]:
X_test = np.arange(6).reshape(2,3)
expected_test_mean = np.array([1., 4.]).reshape(-1, 1)
expected_test_cov = np.array([[2/3., 2/3.], [2/3.,2/3.]])
print('X:\n', X_test)
print('Expected mean:\n', expected_test_mean)
print('Expected covariance:\n', expected_test_cov)

X:
 [[0 1 2]
 [3 4 5]]
Expected mean:
 [[1.]
 [4.]]
Expected covariance:
 [[0.66666667 0.66666667]
 [0.66666667 0.66666667]]


In [23]:
D, N = X_test.shape
X
D,N

(2, 3)

##  Mean and Variance Functions

In [62]:
# Taking the mean of each variable involves averaging over the Ds
# e.g. for the first variable D=0 the mean is A(D=0, n=0)+ A(D=0, n=1)/2
def mean_naive(X):
    D, N = X.shape
    mean = np.zeros((D,1))
    for n in range(N):
      for m in range(D):
        mean[m] = mean[m]+ X[m,n]
    mean = mean / N
    return mean

def mean(X):
    D, N = X.shape
    ### Edit the code to compute a (D,1) array `mean` for the mean of dataset.
    mean = np.zeros((D,1))
    ### Update mean here
    mean = np.mean(X,axis=1)

    ###
    return mean

def cov_naive(X):
    D, N = X.shape
    covariance = np.zeros((D, D))    
    mean = np.mean(X,axis=1)
    
    for n in range(N):
        diff = np.asmatrix(X[:,n] - mean)
        covariance += diff.T @ diff 
    covariance = covariance/N
    return covariance


def cov(X):
    D, N = X.shape
    covariance_matrix = np.zeros((D, D))
    covariance_matrix = np.cov(X, rowvar=True, bias=True)
    return covariance_matrix

In [63]:
print(mean(X_test))
print( expected_test_mean)
print(npt.assert_almost_equal(mean_naive(X_test), expected_test_mean))

[1. 4.]
[[1.]
 [4.]]
None


In [66]:
#print(cov(X_test))
print(cov_naive(X_test))
print(cov(X_test))
print(expected_test_cov)

[[0.66666667 0.66666667]
 [0.66666667 0.66666667]]
[[0.66666667 0.66666667]
 [0.66666667 0.66666667]]
[[0.66666667 0.66666667]
 [0.66666667 0.66666667]]


In [74]:
X_test = np.arange(6).reshape(2,3)
expected_test_mean = np.array([1., 4.]).reshape(-1, 1)
expected_test_cov = np.array([[2/3., 2/3.], [2/3.,2/3.]])
print('X:\n', X_test)
print('Expected mean:\n', expected_test_mean)
print('Expected covariance:\n', expected_test_cov)
print('Calculated mean naive:\n', mean_naive(X_test))
print('Calculated mean np:\n', mean(X_test))
print('Calculated covariance naive:\n', cov_naive(X_test))
print('Calculated covariance np:\n', cov(X_test))


np.testing.assert_almost_equal(mean_naive(X_test), expected_test_mean)
#np.testing.assert_almost_equal(mean(X_test), expected_test_mean)

np.testing.assert_almost_equal(cov(X_test), expected_test_cov)
np.testing.assert_almost_equal(cov_naive(X_test), expected_test_cov)

X:
 [[0 1 2]
 [3 4 5]]
Expected mean:
 [[1.]
 [4.]]
Expected covariance:
 [[0.66666667 0.66666667]
 [0.66666667 0.66666667]]
Calculated mean naive:
 [[1.]
 [4.]]
Calculated mean np:
 [1. 4.]
Calculated covariance naive:
 [[0.66666667 0.66666667]
 [0.66666667 0.66666667]]
Calculated covariance np:
 [[0.66666667 0.66666667]
 [0.66666667 0.66666667]]
