In [1]:
# importing important libs
import random
import math
from statistics import mean, pstdev

In [2]:
def single_option_sampling(*, main_answers: list, options: list, population_size: int):
    # ensure that all options will be filled - with 0 at least
    population_answers = {option: 0 for option in options}
    
    # will take r times a random chosen option
    for _ in range(population_size):
        rand_idx = random.randrange(len(main_answers))
        random_option = main_answers[rand_idx]
        # increase option total of answers
        population_answers[random_option] += 1
        
    return population_answers

In [3]:
def multiple_option_sampling(*, main_answers: list, options: list, population_size: int):
    # ensure that all options will be filled - with 0 at least
    population_answers = {option: 0 for option in options}
    
    # will take r times a random chosen option
    for _ in range(population_size):
        rand_idx = random.randrange(len(main_answers))
        # in this case, we will have an array of options assigned
        random_option_list = main_answers[rand_idx]
        # increase option total of answers for each one assigned
        for random_option in random_option_list:
            population_answers[random_option] += 1
    
    return population_answers

In [4]:
def numerical_field_sampling(*, main_answers: list, population_size: int):
    population_answers = []
    
    # will take r times a random chosen option
    for _ in range(population_size):
        # only choose one value inside of what people have chosen
        rand_idx = random.randrange(len(main_answers))
        random_option = main_answers[rand_idx]
        # add new answer 
        population_answers.append(random_option)
        
    return population_answers

In [5]:
def bootstrapping(*, answers: list, all_options: list, replacements: int, population_size: int, question_type: str = 'single'):
    populations = []
    for _ in range(replacements):
        if question_type == 'single':
            population = single_option_sampling(main_answers=answers, options=all_options, 
                                                population_size=population_size)
        elif question_type == 'multiple':
            population = multiple_option_sampling(main_answers=answers, options=all_options, 
                                                  population_size=population_size)
        
        # add population new answers
        populations.append(population)
    
    # now we have all answers distributed over Replacements populations
    # lets compute the percentage of answers in each option
    population_metrics = {option: [] for option in all_options}
    for population in populations:
        for option in population:
            population_metrics[option].append( (population[option] / population_size) * 100 )
    
    return population_metrics

In [6]:
def bootstrapping_numerical_fields(*, answers: list, population_size: int):
    population = numerical_field_sampling(main_answers=answers, population_size=population_size)
    
    return population

In [7]:
def confidence_interval(*, data_points: list, confidence: float = 0.95):
    # https://www.indeed.com/career-advice/career-development/how-to-calculate-confidence-interval
    # mean
    X = mean(data_points)
    # population standard deviation
    S = pstdev(data_points)
    # data points length
    n = len(data_points)
    # square root data_points length
    sr_n = math.sqrt(n)
    # standard error
    standard_error = S / n
    # margin error
    margin_error = standard_error * 2
    
    lower_value = X - (confidence * (S / sr_n))
    upper_value = X + (confidence * (S / sr_n))
    
    return lower_value, X, upper_value