Writing Notes
1. sampling is the same
2. bin choice is the same
3. only difference is reporting: increases positive findings by 50% and decreases negative ones by 50%. this is meant to mimic selectivity in what you report that is an exaggeration of your findings but not a complete fabrication. overreport the number of '1's and underreport the number of '0's by a rate of 50%.

In [1]:
import pandas as pd
import matplotlib.pylab as plt
import numpy as np
import scipy
from scipy.stats import beta, binom, entropy
import random
import json
import copy
import math
import pickle
import statistics

# my modules
import scientist
import scientist_bad_agents
import evaluation
import helper
import settings
import publisher

# global variables
num_bins = 30
num_draws = 10
num_participants = 10
num_generations = 100

bins_to_probs = {}
for i in range(0, num_bins):
    bins_to_probs[i] = np.random.uniform(low=0.0, high=1.0)

In [2]:
def make_participants_bad_actors(setting, alpha_value, percent):
    participants = []

    for i in range (0, round((1 - (percent/100.0)) * num_participants)):
        if setting == "rate":
            report_set = settings.ReportingSetting("rate")
        elif setting == "data":
            report_set = settings.ReportingSetting("data")
        elif setting == "subset":
            report_set = settings.ReportingSetting("subset")

        # make participant
        participant = scientist_bad_agents.Participant(alpha=alpha_value, reporting_setting=report_set, bad_agent = False)
                        
        participants.append(participant)
        
    for i in range (0, round((percent/100.0) * num_participants)):
        if setting == "rate":
            report_set = settings.ReportingSetting("rate")
        elif setting == "data":
            report_set = settings.ReportingSetting("data")
        elif setting == "subset":
            report_set = settings.ReportingSetting("subset")

        # make participant
        participant = scientist_bad_agents.Participant(alpha=alpha_value, reporting_setting=report_set, bad_agent = True)
                        
        participants.append(participant)

    return(participants)

In [3]:
def run_experiment_gen_info_bad_actors(setting, alpha_value, rel_pl_data_val, rel_pl_surprise_val, rel_pl_bias_val, percent):
    scientific_record = {}
    for bin_num in range(0, num_bins):
        scientific_record[bin_num] = {} 
        scientific_record[bin_num][0] = 1
        scientific_record[bin_num][1] = 1
    
    kl_per_gen = {}
    
    for generation in range(0, num_generations): 
        # each generation gets an entirely new set of participants
        participants = make_participants_bad_actors(setting, alpha_value, percent)

        # scientists explore and submit reports
        for participant in participants:
            for i in range(0, num_draws):
                bin_number, value = participant.sample(scientific_record, num_bins, bins_to_probs)

            bin_choice = participant.choose_bin(scientific_record, num_bins, num_draws)
            participant.report(num_bins, num_draws)
            
        # the peer review board selects reports for publication and returns the updated scientific record
        scientific_record = publisher.peer_review(participants, scientific_record, rel_pl_data_val, rel_pl_surprise_val, rel_pl_bias_val, num_draws)
        
        kl_per_gen[generation] = evaluation.total_entropy_score(scientific_record, bins_to_probs)
    
    return(evaluation.arm_parameter_score(scientific_record, bins_to_probs), evaluation.total_entropy_score(scientific_record, bins_to_probs), kl_per_gen)

In [4]:
def run_internal_experiment():
    percent_bad = [0, 10, 20, 30, 40, 50]
    percent_bad_to_kl_gens = {}
    
    for percent in percent_bad:
        print(percent)
        
        average_KL_per_generation = {}
        for i in range(0, num_generations):
            average_KL_per_generation[i] = 0

        for i in range(0, 10):
            print(f"   {i}")
            arm_score, entropy_score, kl_per_gen = run_experiment_gen_info_bad_actors("data", 0, 1, 1, 0, percent)
            for gen_no in kl_per_gen:
                average_KL_per_generation[gen_no] += kl_per_gen[gen_no]

        for key in average_KL_per_generation:
            average_KL_per_generation[key] /= 10
            
        percent_bad_to_kl_gens[percent] = average_KL_per_generation

    # save the results
    pickle.dump(
        percent_bad_to_kl_gens,
        open("/Users/marinamancoridis/Thesis/Thesis_Simulations/percent_bad_to_kl_gens_25.p", "wb")
    )

In [None]:
run_internal_experiment()

0
   0
   1
   2
   3
