# Introduction
This notebook explores how false publications in science can emerge as the product of a multi-generational simulation of science.

In [31]:
import seaborn as sns
import pandas as pd
import matplotlib.pylab as plt
import numpy as np
import scipy
from scipy.stats import beta, binom, entropy
import random
import json
import copy
import math
import pickle

# my modules
import scientist
import evaluation
import helper
import settings
import publisher

# global variables
num_bins = 4
num_draws = 10
num_participants = 10
num_generations = 3

In [32]:
# distribution of bins
bins_to_probs = {}
for i in range(0, num_bins):
    bins_to_probs[i] = 0.5

## Initialize participants

In [33]:
def make_participants(setting, alpha_value):
    participants = []

    for i in range (0, num_participants):
        if setting == "rate":
            report_set = settings.ReportingSetting("rate")
        elif setting == "data":
            report_set = settings.ReportingSetting("data")
        elif setting == "subset":
            report_set = settings.ReportingSetting("subset")

        # make participant
        participant = scientist.Participant(alpha=alpha_value, reporting_setting=report_set)
                        
        participants.append(participant)

    return(participants)

## Run an experiment

The multi-generational experiment is run, given reporting setting and exaggeration values.

In [34]:
def run_experiment(setting, alpha_value, rel_pl_data_val, rel_pl_surprise_val, rel_pl_bias_val):
    # each experiment starts with a blank cannon (starts with 1-1 prior)
    scientific_record = {}
    for bin_num in range(0, num_bins):
        scientific_record[bin_num] = {} 
        scientific_record[bin_num][0] = 1
        scientific_record[bin_num][1] = 1
    
    for generation in range(0, num_generations):
#         print(f"\n* Generation {generation}...")
#         helper.print_record(scientific_record, num_bins)
#         print(f"   Arm score: {evaluation.arm_parameter_score(scientific_record, bins_to_probs)}")
#         print(f"   Entropy score: {evaluation.total_entropy_score(scientific_record, bins_to_probs)}")
        
        # each generation gets an entirely new set of participants
        participants = make_participants(setting, alpha_value)

        # scientists explore and submit reports
        for participant in participants:
            # sample
            for i in range(0, num_draws):
                bin_number, value = participant.sample(scientific_record, num_bins, bins_to_probs)
                
#                 print(f"   sample from bin {bin_number}: {value}")

            # choose the bin
            bin_choice = participant.choose_bin(scientific_record, num_bins, num_draws)
#             print(f"   chose bin {bin_choice}")

            # make a report
            participant.report(num_bins, num_draws)
            
        # the peer review board selects reports for publication and returns the updated scientific record
        scientific_record = publisher.peer_review(participants, scientific_record, rel_pl_data_val, rel_pl_surprise_val, rel_pl_bias_val)
        
#     print("\n\n* FINAL RESULTS")
#     # final metric of how well scientists play the multi-armed bandit game
#     print(evaluation.arm_parameter_score(scientific_record, bins_to_probs))

#     # final metric of how well scientists reduce the entropy of the scientific record
#     print(evaluation.total_entropy_score(scientific_record, bins_to_probs))
    
    return(evaluation.arm_parameter_score(scientific_record, bins_to_probs), evaluation.total_entropy_score(scientific_record, bins_to_probs))

In [35]:
run_experiment("data", 0, 0, 0, 0)

(0.15625, 0.07998783325514161)

## Searching over the space of publishing policies

Search across relative weights of how much data is associated with a report, how surprising the report is, and publication bias

In [38]:
# scale over amount of supporting data
rel_pl_data = np.linspace(0, 10, 21)

# scale over how surprising the data is
rel_pl_surprise = np.linspace(0, 10, 21)

# rate of bump for publication bias (0.01 = 1% publication bias)
rel_pl_bias = np.linspace(0, 1, 20)

In [None]:
publishing_policies_space = {}
exp_no = 0

for rel_pl_data_val in rel_pl_data:
    print(f"examining value: {rel_pl_data_val}")
    for rel_pl_surprise_val in rel_pl_surprise:
        for rel_pl_bias_val in rel_pl_bias:
            total_arm_score = 0
            total_entropy_score = 0
           
            for i in range(0, 10):
                arm_score, entropy_score = run_experiment("data", 0, rel_pl_data_val, rel_pl_surprise_val, rel_pl_bias_val)
                total_arm_score += arm_score
                total_entropy_score += entropy_score
            
            key = (rel_pl_data_val, rel_pl_surprise_val, rel_pl_bias_val)
            publishing_policies_space[key] = [total_arm_score / 10, total_entropy_score / 10] # average over 10 runs of each combination
            exp_no += 1
            
# save the results
pickle.dump(
    publishing_policies_space,
    open("/Users/marinamancoridis/Thesis/Thesis_Simulations/publishing_policies.p", "wb")
)

examining value: 0.0
examining value: 0.5263157894736842
examining value: 1.0526315789473684
examining value: 1.5789473684210527
examining value: 2.1052631578947367
examining value: 2.631578947368421
examining value: 3.1578947368421053
examining value: 3.6842105263157894
examining value: 4.2105263157894735
examining value: 4.7368421052631575
examining value: 5.263157894736842
examining value: 5.789473684210526
examining value: 6.315789473684211
examining value: 6.842105263157895
examining value: 7.368421052631579
examining value: 7.894736842105263
examining value: 8.421052631578947
