In [6]:
import pickle
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

### Load in the simulation results

In [7]:
n_sim = 1000
results_loc = os.path.abspath((os.path.join(os.getcwd(), '..', '..', 'Results', 'nn_bootstrap')))
with open(results_loc + f'/logistic_regression_accuracy_{n_sim}.pkl', 'rb') as f:
    logreg_acc_list = np.array(pickle.load(f))
with open(results_loc + f'/logistic_regression_f1_{n_sim}.pkl', 'rb') as f:
    logreg_f1_list = np.array(pickle.load(f))
with open(results_loc + f'/logistic_regression_f1_w_{n_sim}.pkl', 'rb') as f:
    logreg_f1_w_list = np.array(pickle.load(f))
with open(results_loc + f'/neural_network_accuracy_{n_sim}.pkl', 'rb') as f:
    nn_acc_list = np.array(pickle.load(f))
with open(results_loc + f'/neural_network_f1_{n_sim}.pkl', 'rb') as f:
    nn_f1_list = np.array(pickle.load(f))
with open(results_loc + f'/neural_network_f1_w_{n_sim}.pkl', 'rb') as f:
    nn_f1_w_list = np.array(pickle.load(f))

In [8]:
df_new_population = pd.read_csv(results_loc + '/new_population.csv')
df_new_population

Unnamed: 0,Churn,Call Failure,Complains,Subscription Length,Charge Amount,Seconds of Use,Frequency of use,Frequency of SMS,Distinct Called Numbers,Tariff Plan,Status,Age,Customer Value
0,0.0,8.0,0.0,38.0,0.0,4370.0,71.0,5.0,17.0,1.0,1.0,30.0,197.640
1,0.0,0.0,0.0,39.0,0.0,318.0,5.0,7.0,4.0,1.0,0.0,25.0,46.035
2,0.0,10.0,0.0,37.0,0.0,2453.0,60.0,359.0,24.0,1.0,1.0,30.0,1536.520
3,0.0,10.0,0.0,38.0,0.0,4198.0,66.0,1.0,35.0,1.0,1.0,15.0,240.020
4,0.0,3.0,0.0,38.0,0.0,2393.0,58.0,2.0,33.0,1.0,1.0,15.0,145.805
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3145,0.0,21.0,0.0,19.0,2.0,6697.0,147.0,92.0,44.0,2.0,1.0,25.0,721.980
3146,0.0,17.0,0.0,17.0,1.0,9237.0,177.0,80.0,42.0,1.0,1.0,55.0,261.210
3147,0.0,13.0,0.0,18.0,4.0,3157.0,51.0,38.0,21.0,1.0,1.0,30.0,280.320
3148,0.0,7.0,0.0,11.0,2.0,4695.0,46.0,222.0,12.0,1.0,1.0,30.0,1077.640


### Initial Quantitative Results:

In [9]:
print(f'The mean accuracy of the logistic regression: {logreg_acc_list.mean()}.')
print(f'The mean f1-score of the logistic regression: {logreg_f1_list.mean()}.')
print(f'The mean weighted f1-score of the logistic regression: {logreg_f1_w_list.mean()}.')
print(f'The mean accuracy of the neural network: {nn_acc_list.mean()}.')
print(f'The mean f1-score of the neural network: {nn_f1_list.mean()}.')
print(f'The mean weighted f1-score of the neural network: {nn_f1_w_list.mean()}.')

The mean accuracy of the logistic regression: 0.9726066666666666.
The mean f1-score of the logistic regression: 0.9728159576009966.
The mean accuracy of the neural network: 0.9770277777777777.
The mean f1-score of the neural network: 0.9769182182270102.


In [10]:
print(f'The standard error of the accuracy for the logistic regression: {logreg_acc_list.std()/np.sqrt(len(logreg_acc_list))}.')
print(f'The standard error of the f1-score for the logistic regression: {logreg_f1_list.std()/np.sqrt(len(logreg_f1_list))}.')
print(f'The standard error of the weighted f1-score for the logistic regression: {logreg_f1_w_list.std()/np.sqrt(len(logreg_f1_w_list))}.')
print(f'The standard error of the accuracy for the neural network: {nn_acc_list.std()/np.sqrt(len(nn_acc_list))}.')
print(f'The standard error of the f1-score for the neural network: {nn_f1_list.std()/np.sqrt(len(nn_acc_list))}.')
print(f'The standard error of the weighted f1-score for the neural network: {nn_f1_w_list.std()/np.sqrt(len(nn_acc_list))}.')

The standard error of the accuracy for the logistic regression: 0.00017650336267912022.
The standard error of the f1-score for the logistic regression: 0.00017615207381752187.
The standard error of the accuracy for the neural network: 0.000217463988788381.
The standard error of the f1-score for the neural network: 0.00021719965932344434.


### Qualitative Results:

In [None]:
#Still ad plots here!

### Number of Simulations Justification:

In [9]:
def n_sim_mean(d: float, performance_list: np.ndarray):
    """This function computes the desired number of simulations for the mean performance measures based on a 
    prespecified Monte Carlo Standard Error d for the mean.

    ARGS:
        d: The desired Monte Carlo Standard Error.
        performance_list: a numpy array containing the simulation results.

    OUTPUT:
        The minimum number of simulations required to satisfy the prespecified MCSE.
    """ 
    numerator = (performance_list.std())**2
    denominator = d**2
    return (numerator/denominator)

In [10]:
def n_sim_se(d: float, performance_list: np.ndarray):
    """This function computes the desired number of simulations based on a prespecified Monte Carlo Standard Error 
    d for the emperical standard error of the simulation results.

    ARGS:
        d: The desired Monte Carlo Standard Error.
        performance_list: a numpy array containing the simulation results.

    OUTPUT:
        The minimum number of simulations required to satisfy the prespecified MCSE.
    """
    empse = performance_list.std()/np.sqrt(len(performance_list))
    n_sim = ((empse**2)/(d**2) + 2)/2
    return n_sim

In [11]:
d = 0.0005
print(n_sim_mean(d, logreg_acc_list))

142.97481481481498
