In [6]:
import pickle
import os
import numpy as np

### Load in the simulation results

In [7]:
results_loc = os.path.abspath((os.path.join(os.getcwd(), '..', '..', 'Results', 'bootstrap')))
with open(results_loc + '/logistic_regression_accuracy.pkl', 'rb') as f:
    logreg_acc_list = np.array(pickle.load(f))
with open(results_loc + '/logistic_regression_f1.pkl', 'rb') as f:
    logreg_f1_list = np.array(pickle.load(f))
with open(results_loc + '/neural_network_accuracy.pkl', 'rb') as f:
    nn_acc_list = np.array(pickle.load(f))
with open(results_loc + '/neural_network_f1.pkl', 'rb') as f:
    nn_f1_list = np.array(pickle.load(f))

In [24]:
print(f'The mean accuracy of the logistic regression: {logreg_acc_list.mean()}.')
print(f'The mean f1-score of the logistic regression: {logreg_f1_list.mean()}.')
print(f'The mean accuracy of the neural network: {nn_acc_list.mean()}.')
print(f'The mean f1-score of the neural network: {nn_f1_list.mean()}.')

The mean accuracy of the logistic regression: 0.8944355555555555.
The mean f1-score of the logistic regression: 0.5717595584997391.
The mean accuracy of the neural network: 0.9531411111111111.
The mean f1-score of the neural network: 0.8442213634154173.


In [23]:
print(f'The standard error of the accuracy for the logistic regression: {logreg_acc_list.std()/np.sqrt(len(logreg_acc_list))}.')
print(f'The standard error of the f1-score for the logistic regression: {logreg_f1_list.std()/np.sqrt(len(logreg_f1_list))}.')
print(f'The standard error of the accuracy for the neural network: {nn_acc_list.std()/np.sqrt(len(nn_acc_list))}.')
print(f'The standard error of the f1-score for the neural network: {nn_f1_list.std()}.')

The standard error of the accuracy for the logistic regression: 0.00034573581579146195.
The standard error of the f1-score for the logistic regression: 0.0014204677116960932.
The standard error of the accuracy for the neural network: 0.0003201524926008206.
The standard error of the f1-score for the neural network: 0.038038346438302666.


In [14]:
def n_sim_mean(d: float, performance_list: np.ndarray):
    """This function computes the desired number of simulations for the mean performance measures based on a 
    prespecified Monte Carlo Standard Error d for the mean.

    ARGS:
        d: The desired Monte Carlo Standard Error.
        performance_list: a numpy array containing the simulation results.

    OUTPUT:
        The minimum number of simulations required to satisfy the prespecified MCSE.
    """ 
    numerator = (performance_list.std())**2
    denominator = d**2
    return (numerator/denominator)

In [15]:
def n_sim_se(d: float, performance_list: np.ndarray):
    """This function computes the desired number of simulations based on a prespecified Monte Carlo Standard Error 
    d for the emperical standard error of the simulation results.

    ARGS:
        d: The desired Monte Carlo Standard Error.
        performance_list: a numpy array containing the simulation results.

    OUTPUT:
        The minimum number of simulations required to satisfy the prespecified MCSE.
    """
    empse = performance_list.std()/np.sqrt(len(performance_list))
    n_sim = ((empse**2)/(d**2) + 2)/2
    return n_sim


In [18]:
d = 0.0005
print(n_sim_mean(d, logreg_acc_list))

478.13301728395084
