<a href="https://colab.research.google.com/github/MSimonFRA-UAS/LfDFra-UAS/blob/main/biasvariance_sol.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import numpy as np
import matplotlib.pyplot as plt
import math

In [7]:
# Sample from the uniform distribution on [lb, ub] 
def uniform_sample(lb, ub, num_samples):
    return lb + np.random.random_sample((num_samples,))*(ub-lb)

In [8]:
# Analytical expression for the learned hypothesis (we consider only (i) here)
def g_D(x, x1, x2):
    a = (np.sin(math.pi*x2)-np.sin(math.pi*x1))/(x2-x1)
    b = (x2*np.sin(math.pi*x1)-x1*np.sin(math.pi*x2))/(x2-x1)
    return a*x + b

In [9]:
# Compute the average hypothesis with respect to all possible D at a given point x
def g_avg(x, _g_D, num_samples, targetfunc):
    average_g_at_x, variance_at_x, bias_at_x = [], [], []
    g_D_realizations = []
    for i in range(num_samples):
        # generate 2 data points for each realization of D
        x1, x2 = uniform_sample(-1, 1, 2)
        v = _g_D(x, x1, x2)
        g_D_realizations.append(v)
        
    average_g_at_x = np.mean(g_D_realizations)
    variance_at_x = np.var(g_D_realizations)
    bias_at_x = (average_g_at_x - targetfunc(x))**2
    return average_g_at_x, variance_at_x, bias_at_x

In [13]:
# Compute the expected value averaged over all x of variance, bias and out-of-sample error
def calc_bias_variance_eout(_g_D, targetfunc, num_data_samples, num_x_samples):
    variances, biases, eouts = [], [], []
    for i in range(num_x_samples):
        x = uniform_sample(-1, 1, 1)
        _, variance, bias = g_avg(x, _g_D, num_data_samples, targetfunc)
        variances.append(variance)
        biases.append(bias)
        eout_realizations = []
        for i in range(num_data_samples):
            x1, x2 = uniform_sample(-1, 1, 2)
            v= _g_D(x, x1, x2)
            eout_realizations.append((v-targetfunc(x))**2) 

        average_eout = np.mean(eout_realizations)
        eouts.append(average_eout)
    
    bias = np.mean(biases)
    variance = np.mean(variances)    
    eout = np.mean(eouts)
    print('The bias is: ', bias)
    print('The variance is: ', variance)
    print('The expected out-of-sample error is: ', eout)
    print('The bias+variance is: ', variance+bias)

In [14]:
num_data_samples = 1000
num_x_samples = 1000    
calc_bias_variance_eout(g_D, lambda x: np.sin(math.pi*x), num_data_samples, num_x_samples)    

The bias is:  0.21355839726468484
The variance is:  1.6709736270390998
The expected out-of-sample error is:  1.893718547183065
The bias+variance is:  1.8845320243037846
