In [None]:
from joblib import Memory, Parallel, delayed
memory = Memory('accuracy_vs_spam', verbose=0)

@memory.cache
def calc_fraction_of_wrong_interval_estimates(
        num_samples: int,
        num_workers: int,
        confidence: float,
        evaluator_name: str,
        iter_count: int,
        error_rates: list[float],
        spammer_error_rates: list[float],
        spam_level: float):

    correct_interval_estimates = np.zeros(iter_count * num_workers, dtype=np.float32)
    error_rate_estimation_errors = np.zeros(iter_count * num_workers, dtype=np.float32)
    int_sizes = np.zeros(iter_count * num_workers, dtype=np.float32)

    # Saves for every sample generated whether the
    # majority vote estimates the label correctly (=1) or not (=0)
    majority_vote_estimations = np.zeros(iter_count * num_samples)

    # Saves for every sample generated whether the
    # weighted vote estimates the label correctly (=1) or not (=0)
    weighted_vote_estimations = np.zeros(iter_count * num_samples)

    for i in range(iter_count):
        spammer_p_true = np.random.choice(spammer_error_rates, size=num_workers)
        normal_p_true = np.random.choice(error_rates, size=num_workers)
        p_true = np.where(np.random.choice([1, 0], num_workers, p=[spam_level, 1-spam_level]) == 1, spammer_p_true, normal_p_true)
        dataset = SyntheticDataset(num_samples=num_samples, num_workers=num_workers, p_true=p_true)

        if evaluator_name == 'old':
            evaluator = ConfidenceEvaluatorOld(dataset)
            ps, confs = evaluator.evaluate_workers_with_confidence(
                dataset.workers,
                confidence=confidence,
                method='exhaustive'
            )
        elif evaluator_name == 'old greedy':
            evaluator = ConfidenceEvaluatorOld(dataset)
            ps, confs = evaluator.evaluate_workers_with_confidence(
                dataset.workers,
                confidence=confidence,
                method='greedy'
            )
        elif evaluator_name == 'majority':
            evaluator = MajorityEvaluator(dataset)
            ps = evaluator.evaluate_workers(dataset.workers)
            confs = np.zeros(num_workers)
        else:
            evaluator = ConfidenceEvaluatorNew(dataset)
            ps, confs = evaluator.evaluate_workers_with_confidence(
                dataset.workers,
                confidence=confidence,
            )

        ground_truth = dataset.get_ground_truth_for_samples(dataset.samples)
        maj_vote = VoteAggregator.majority_vote(dataset, dataset.samples)
        weighted_vote = VoteAggregator.weighted_vote(dataset, dataset.samples, ps, 0.5)

        min_limit, max_limit = ps-confs, ps+confs
        correct_interval_estimates[i*num_workers:(i+1)*num_workers] = np.where((min_limit <= p_true) & (p_true <= max_limit), 1, 0)
        int_sizes[i*num_workers:(i+1)*num_workers] = 2*confs
        error_rate_estimation_errors[i*num_workers:(i+1)*num_workers] = np.abs(ps - p_true)

        majority_vote_estimations[i*num_samples:(i+1)*num_samples] = (ground_truth == maj_vote)
        weighted_vote_estimations[i*num_samples:(i+1)*num_samples] = (ground_truth == weighted_vote)

    return correct_interval_estimates, int_sizes, error_rate_estimation_errors, \
           majority_vote_estimations, weighted_vote_estimations

In [1]:
from datasets import SyntheticDataset
from crowd_evaluation import ConfidenceEvaluatorNew, ConfidenceEvaluatorOld, MajorityEvaluator, VoteAggregator
import numpy as np

import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline
mpl.rcParams['figure.dpi'] = 300