In [4]:
def calculate_covariance_matrix(vectors: list[list[float]]) -> list[list[float]]:
    # check that n == m for all vectors
    for list in vectors:
        if len(list) != len(vectors[0]):
            raise ValueError("The vectors are not the same length")
    # calculate the number of vectors - m
    m = len(vectors)
    # calculate the length of vectors - n
    n = len(vectors[0])

    # build covariance matrix, which must be number of vectors x number of vectors
    # m x m
    covariance_matrix = [[0 for _ in range(m)] for _ in range(m)]

    # calculate the covariance between each pair of vectors
    for i in range(m):
        for j in range(m):
            # get means of the vectors
            mean_i = sum(vectors[i]) / n
            mean_j = sum(vectors[j]) / n

            # subtract the means from the vectors
            vector_i = [v - mean_i for v in vectors[i]]
            vector_j = [v - mean_j for v in vectors[j]]

            # calculate the covariance
            for v in range(n):
                covariance_matrix[i][j] += vector_i[v] * vector_j[v]
            covariance_matrix[i][j] /= n - 1

    return covariance_matrix
    
print(calculate_covariance_matrix([[1, 2, 3], [4, 5, 6]]))

[[1.0, 1.0], [1.0, 1.0]]


In [5]:
print(calculate_covariance_matrix([[92, 60, 100], [80, 30, 70]]))

[[448.0, 520.0], [520.0, 700.0]]


In [6]:
print(calculate_covariance_matrix([[75, 65, 22, 15, 18], [10.5, 12.8, 7.3, 2.1, 92], [45, 65, 74, 76, 56]]))

[[819.5, -348.925, -236.99999999999994], [-348.925, 1421.443, -181.185], [-236.99999999999994, -181.185, 166.70000000000002]]


The ideal solution

In [None]:
def calculate_covariance_matrix(vectors: list[list[float]]) -> list[list[float]]:
    n_features = len(vectors)
    n_observations = len(vectors[0])
    covariance_matrix = [[0 for _ in range(n_features)] for _ in range(n_features)]

    means = [sum(feature) / n_observations for feature in vectors]

    for i in range(n_features):
        for j in range(i, n_features):
            covariance = sum((vectors[i][k] - means[i]) * (vectors[j][k] - means[j]) for k in range(n_observations)) / (n_observations - 1)
            covariance_matrix[i][j] = covariance_matrix[j][i] = covariance

    return covariance_matrix