In [1]:
import numpy as np

<h2>Task 1</h2>

In [2]:
X = np.array([4.17, 5.58, 5.18, 6.11, 4.5, 4.61, 5.17, 4.53, 5.33, 5.14])

In [2]:
def biased_var(x):
    return np.mean(x**2) - np.mean(x)**2

In [3]:
def biased_sd(x):
    return np.sqrt(biased_var(x))

In [4]:
def sample_var(x):
    return np.var(x, ddof=1)

In [5]:
def sample_sd(x):
    return np.sqrt(sample_var(x))

**a)**

In [7]:
print(f'Biased estimate of population variance: {biased_var(X):.6f}')
print(f'Biased estimate of population SD: {biased_sd(X):.6f}')
print()
print(f'Sample variance (unbiased estimate of the population variance): {sample_var(X):.6f}')
print(f'Sample SD (unbiased estimate of the population SD): {sample_sd(X):.6f}')

Biased estimate of population variance: 0.305996
Biased estimate of population SD: 0.553169

Sample variance (unbiased estimate of the population variance): 0.339996
Sample SD (unbiased estimate of the population SD): 0.583091


**b)**

In [6]:
def jackknife(X, T, print_results=False, return_estimates=False):
    # The number of data points
    n = X.shape[0]
    
    # Estimator for the original sample
    T_hat = T(X)
    
    # (\hat{T}^{j}, j=1,...,n)
    # Remove the jth observation, and compute the estimator with the subset
    loo_estimates = np.array([T(np.delete(X, j)) for j in range(n)])
    
    # Pseudo values
    pv = n * T_hat - (n - 1) * loo_estimates
    
    # The jackknife estimator is the mean of the pseudo values
    jack_est = np.mean(pv)
    
    # The jackknife estimate of the standard error of the jackknife estimator
    jack_est_se = np.std(pv, ddof=1) / np.sqrt(n)
    
    if print_results:
        print(f'The estimator for the original sample: {T_hat:.6f}')
        print(f'Jackknife estimator: {jack_est:.6f}')
        print(f'The jackknife estimate of the SE of the jackknife estimator: {jack_est_se:.6f}')
        
    if return_estimates:
        return jack_est, jack_est_se

In [9]:
for f in [biased_var, biased_sd, sample_var, sample_sd]:
    print(f.__name__)
    jackknife(X, T=f, print_results=True)
    print()

biased_var
The estimator for the original sample: 0.305996
Jackknife estimator: 0.339996
The jackknife estimate of the SE of the jackknife estimator: 0.130452

biased_sd
The estimator for the original sample: 0.553169
Jackknife estimator: 0.598540
The jackknife estimate of the SE of the jackknife estimator: 0.126360

sample_var
The estimator for the original sample: 0.339996
Jackknife estimator: 0.339996
The jackknife estimate of the SE of the jackknife estimator: 0.146758

sample_sd
The estimator for the original sample: 0.583091
Jackknife estimator: 0.598517
The jackknife estimate of the SE of the jackknife estimator: 0.134025



<h2>Task 2</h2>

In [7]:
def simulation_experiment(n, m, T):
    """
    Args:
        n: the number of rando samples to draw from N(0,1)
        m: the number of simulations
        T: the estimator whose behaviour one wishes to investigate
    """
    orig_estimates = np.zeros(m)
    jack_estimates = np.zeros(m)
    jack_estimate_ses = np.zeros(m)
    
    for i in range(m):
        # Draw random samples from N(0,1)
        X = np.random.normal(loc=0.0, scale=1.0, size=n)
        
        # Compute the value of the original estimator with the current sample X
        T_hat = T(X)
        orig_estimates[i] = T_hat
        
        # Jackknife
        jack_est, jack_est_se = jackknife(X, T=T, return_estimates=True)
        jack_estimates[i] = jack_est
        jack_estimate_ses[i] = jack_est_se
        
    # Calculate biases and standard errors
    # Note that since we are drawing samples from N(0,1),
    # the true value of T is 1.
    bias_orig = np.mean(orig_estimates) - 1.0
    se_orig = np.std(orig_estimates, ddof=1)
    bias_jack = np.mean(jack_estimates) - 1.0
    se_jack = np.std(jack_estimates, ddof=1)
    mean_jack_se = np.mean(jack_estimate_ses)
    
    print(f'Original estimator: bias={bias_orig:.4f}, SE={se_orig:.4f}')
    print(f'Jackknife estimator: bias={bias_jack:.4f}, SE={se_jack:.4f}')
    print(f'Mean of the jackknife SE estimates: {mean_jack_se:.4f}')

In [8]:
for f in [biased_var, biased_sd, sample_var, sample_sd]:
    print(f'##### {f.__name__} #####')
    simulation_experiment(n=100, m=10000, T=f)
    print()

##### biased_var #####
Original estimator: bias=-0.0115, SE=0.1405
Jackknife estimator: bias=-0.0015, SE=0.1419
Mean of the jackknife SE estimates: 0.1389

##### biased_sd #####
Original estimator: bias=-0.0063, SE=0.0713
Jackknife estimator: bias=0.0012, SE=0.0718
Mean of the jackknife SE estimates: 0.0706

##### sample_var #####
Original estimator: bias=0.0001, SE=0.1421
Jackknife estimator: bias=0.0001, SE=0.1421
Mean of the jackknife SE estimates: 0.1407

##### sample_sd #####
Original estimator: bias=-0.0013, SE=0.0711
Jackknife estimator: bias=0.0012, SE=0.0712
Mean of the jackknife SE estimates: 0.0707



- `sample_var` is the only unbiased estimator.