In [103]:
# imports
import numpy as np

## Task 0: Mean and Covariance

In [104]:
def mean_cov(X):
    """Calculates the mean and covariance of a data set:
    Args:
        X (np.ndarray): shape of (n, d) where
        n is the number of data points
        d is the number of dimensions in each data point"""
    if type(X) is not np.ndarray or len(X.shape) != 2:
        raise TypeError("X must be a 2D numpy.ndarray")
    if X.shape[0] < 2:
        raise ValueError("X must contain multiple data points")
    mean = np.mean(X, axis=0)[np.newaxis, :]
    cov = np.matmul((X - mean).T, (X - mean)) / (X.shape[0] - 1)
    return mean, cov
    

## Task 1: Correlation

In [105]:
np.random.seed(0)
X = np.random.multivariate_normal([12, 30, 10], [[36, -30, 15], [-30, 100, -20], [15, -20, 25]], 10000)
mean, cov = mean_cov(X)
print(mean)
print(cov)

[[12.04341828 29.92870885 10.00515808]]
[[ 36.2007391  -29.79405239  15.37992641]
 [-29.79405239  97.77730626 -20.67970134]
 [ 15.37992641 -20.67970134  24.93956823]]


In [106]:
def correlation(C):
    """Calculates a correlation matrix:
    Args:
        C (np.ndarray): covariance matrix with shape of (d, d) where
        d is the number of dimensions"""
    if type(C) is not np.ndarray:
        raise TypeError("C must be a numpy.ndarray")
    if len(C.shape) != 2 or C.shape[0] != C.shape[1]:
        raise ValueError("C must be a 2D square matrix")
    var = np.sqrt(np.diag(C))
    return np.divide(C, np.outer(var, var))

In [107]:
try:
    correlation(np.array([1, 2, 3, 4]))
except ValueError as e:
    print(str(e))
try:
    correlation(np.array([[1, 2, 3, 4], [5, 6, 7, 8]]))
except ValueError as e:
    print(str(e))

C must be a 2D square matrix
C must be a 2D square matrix


In [112]:
class MultiNormal:
    """Represents a Multivariate Normal distribution"""
    
    def __init__(self, data):
        """constructor"""
        if type(data) is not np.ndarray or len(data.shape) != 2:
            raise TypeError("data must be a 2D numpy.ndarray")
        if data.shape[1] < 2:
            raise ValueError("data must contain multiple data points")
        self.mean = np.mean(data.T, axis=0)[np.newaxis, :].T
        self.cov = np.matmul((data.T - self.mean.T).T,
                             (data.T - self.mean.T)) / (data.shape[1] - 1)


    def pdf(self, x):
        """Calcluates the PDF at a data point:
        Args:
            x (np.ndarray): shape of (d, 1) containing the data point
            whose PDF should be calculated
            d is the number of dimensions of the Multinomial instance"""
        d = self.cov.shape[0]
        if type(x) is not np.ndarray:
            raise TypeError("x must be a numpy.ndarray")
        if len(x.shape) != 2 or x.shape[1] != 1 or x.shape[0] != d:
            raise ValueError("x must have the shape ({}, 1)".format(d))
        x_m = x - self.mean
        Px = 1 / np.sqrt(((2 * np.pi) ** d) * np.linalg.det(self.cov))
        Px *= np.exp(-0.5 * np.matmul(np.matmul((x_m).T,
                                                np.linalg.inv(self.cov)),
                                      (x_m)))
        return Px[0][0]

In [109]:
np.random.seed(0)
data = np.random.multivariate_normal([12, 30, 10], [[36, -30, 15], [-30, 100, -20], [15, -20, 25]], 10000).T
mn = MultiNormal(data)
print(mn.mean)
print(mn.cov)

[[12.04341828]
 [29.92870885]
 [10.00515808]]
[[ 36.2007391  -29.79405239  15.37992641]
 [-29.79405239  97.77730626 -20.67970134]
 [ 15.37992641 -20.67970134  24.93956823]]


In [110]:
np.random.seed(0)
data = np.random.multivariate_normal([12, 30, 10], [[36, -30, 15], [-30, 100, -20], [15, -20, 25]], 10000).T
mn = MultiNormal(data)
x = np.random.multivariate_normal([12, 30, 10], [[36, -30, 15], [-30, 100, -20], [15, -20, 25]], 1).T
print(x)
print(mn.pdf(x))

[[ 8.20311936]
 [32.84231319]
 [ 9.67254478]]
0.0002293023620214375


In [113]:
np.random.seed(5)
X = np.random.multivariate_normal([5, -4, 2], [[6, -3, 5], [-3, 10, -2], [5, -2, 5]], 10000).T
mn = MultiNormal(X)
try:
    mn.pdf(np.array([[1], [2], [3], [4]]))
except ValueError as e:
    print(str(e))
try:
    mn.pdf(np.array([[1, 1], [2, 2], [3, 3]]))
except ValueError as e:
    print(str(e))

x must have the shape (3, 1)
x must have the shape (3, 1)
