In [48]:
import numpy as np
import pandas as pd

In [49]:
# Create dummy data in this cell, to test the rest of the algorithm with for now

# target
t = np.array([0,0,0,1,1,1])

# base learner predictions
p = np.array([
    [0,0,0,1,1,1],
    [0,1,0,1,1,1],
    [0,0,0,1,0,1],
    [0,0,0,1,2,1]
])

In [143]:
# error is prediction minus target
e = p - t
print(e)

[[ 0  0  0  0  0  0]
 [ 0  1  0  0  0  0]
 [ 0  0  0  0 -1  0]
 [ 0  0  0  0  1  0]]


In [144]:
# calculate the mean  for each base learner
means = np.mean(e, axis=1)
print(means)

[ 0.          0.16666667 -0.16666667  0.16666667]


In [50]:
# calculate the covariance matrix of the errors
cov = np.cov(e)
print(cov)

0.044642857142857144

In [51]:
# define a function that gives the ensemble MSE for a given set of weights
def ensemble_mse(weights, errors):
    weights = np.array(weights)/np.sum(weights)
    error = np.dot(weights, errors)
    return np.mean(error**2)   
    

0.044642857142857144

In [52]:
print(ensemble_mse([0,0,0,1], e))

-0.03571428571428571 0.04336734693877552


0.04464285714285715

In [53]:
# define a function that approximates the ensemble mse for a given set of weights
# uses that mse is the same as the variance of the errors, assuming the errors have 
def approx_ensemble_error(weights, covariances, means):
    selection = np.array(weights) != 0
    cov = covariances[selection][:,selection]
    print(np.mean(cov))
    print(means[selection])
    return np.mean(cov) + np.mean(means[selection])**2

[[ 0.          0.          0.          0.        ]
 [ 0.          0.28571429  0.          0.14285714]
 [ 0.          0.          0.12244898 -0.14285714]
 [ 0.          0.14285714 -0.14285714  0.28571429]]


0.0433673469387755

In [54]:
print(approx_ensemble_error([0,0,0,1], cov, means))

In [55]:
# test the approx_ensemble_error function
# iterate over all combinations of weights (binary)
# and compare the approximated ensemble error with the actual ensemble error
for i in range(2**4):
    weights = [int(x) for x in list(bin(i)[2:].zfill(4))]
    real = ensemble_mse(weights, e)
    approx = approx_ensemble_error(weights, cov, means)
    isclose = np.isclose(real, approx)
    print(weights, real, approx, isclose)

[0, 0, 0, 0] 0.0 0.0 True
[0, 0, 0, 1] 0.017857142857142856 0.017857142857142856 True
[0, 0, 1, 0] 0.017857142857142856 0.017857142857142856 True
[0, 0, 1, 1] 0.07142857142857142 0.07142857142857142 True
[0, 1, 0, 0] 0.017857142857142856 0.017857142857142856 True
[0, 1, 0, 1] 0.07142857142857142 0.07142857142857142 True
[0, 1, 1, 0] 0.07142857142857142 0.07142857142857142 True
[0, 1, 1, 1] 0.16071428571428573 0.1607142857142857 True
[1, 0, 0, 0] 0.017857142857142856 0.017857142857142856 True
[1, 0, 0, 1] 0.07142857142857142 0.07142857142857142 True
[1, 0, 1, 0] 0.07142857142857142 0.07142857142857142 True
[1, 0, 1, 1] 0.16071428571428573 0.1607142857142857 True
[1, 1, 0, 0] 0.07142857142857142 0.07142857142857142 True
[1, 1, 0, 1] 0.16071428571428573 0.1607142857142857 True
[1, 1, 1, 0] 0.16071428571428573 0.1607142857142857 True
[1, 1, 1, 1] 0.2857142857142857 0.2857142857142857 True
