# Introduction
This notebook explores how false publications in science can emerge as the product of a multi-generational simulation of science.

In [1]:
import pandas as pd
import matplotlib.pylab as plt
import numpy as np
import scipy
from scipy.stats import beta, binom, entropy
import random
import json
import copy
import math
import pickle
import statistics

# my modules
import scientist
import evaluation
import helper
import settings
import publisher

# global variables
num_bins = 30
num_draws = 10
num_participants = 10
num_generations = 15

In [2]:
# distribution of bins
bins_to_probs = {}
for i in range(0, num_bins):
    bins_to_probs[i] = 0.5

## Initialize participants

In [3]:
def make_participants(setting, alpha_value):
    participants = []

    for i in range (0, num_participants):
        if setting == "rate":
            report_set = settings.ReportingSetting("rate")
        elif setting == "data":
            report_set = settings.ReportingSetting("data")
        elif setting == "subset":
            report_set = settings.ReportingSetting("subset")

        # make participant
        participant = scientist.Participant(alpha=alpha_value, reporting_setting=report_set)
                        
        participants.append(participant)

    return(participants)

## Run an experiment

The multi-generational experiment is run, given reporting setting and exaggeration values.

In [4]:
def run_experiment(setting, alpha_value, rel_pl_data_val, rel_pl_surprise_val, rel_pl_bias_val):
    # each experiment starts with a blank cannon (starts with 1-1 prior)
    scientific_record = {}
    for bin_num in range(0, num_bins):
        scientific_record[bin_num] = {} 
        scientific_record[bin_num][0] = 1
        scientific_record[bin_num][1] = 1
    
    for generation in range(0, num_generations):
#         print(f"\n* Generation {generation}...")
#         helper.print_record(scientific_record, num_bins)
#         print(f"   Arm score: {evaluation.arm_parameter_score(scientific_record, bins_to_probs)}")
#         print(f"   Entropy score: {evaluation.total_entropy_score(scientific_record, bins_to_probs)}")
        
        # each generation gets an entirely new set of participants
        participants = make_participants(setting, alpha_value)

        # scientists explore and submit reports
        for participant in participants:
            # sample
            for i in range(0, num_draws):
                bin_number, value = participant.sample(scientific_record, num_bins, bins_to_probs)
                
#                 print(f"   sample from bin {bin_number}: {value}")

            # choose the bin
            bin_choice = participant.choose_bin(scientific_record, num_bins, num_draws)
#             print(f"   chose bin {bin_choice}")

            # make a report
            participant.report(num_bins, num_draws)
            
        # the peer review board selects reports for publication and returns the updated scientific record
        scientific_record = publisher.peer_review(participants, scientific_record, rel_pl_data_val, rel_pl_surprise_val, rel_pl_bias_val, num_draws)
        
#     print("\n\n* FINAL RESULTS")
#     helper.print_record(scientific_record, num_bins)
    
#     # final metric of how well scientists play the multi-armed bandit game
#     print(evaluation.arm_parameter_score(scientific_record, bins_to_probs))

#     # final metric of how well scientists reduce the entropy of the scientific record
#     print(evaluation.total_entropy_score(scientific_record, bins_to_probs))
    
    return(evaluation.arm_parameter_score(scientific_record, bins_to_probs), evaluation.total_entropy_score(scientific_record, bins_to_probs))

In [5]:
# setting, alpha_value, rel_pl_data_val, rel_pl_surprise_val, rel_pl_bias_val
run_experiment("data", 0, 1, 1, 0)

(0.21185185185185187, 0.13994660420474755)

## Searching over the space of publishing policies

Search across relative weights of how much data is associated with a report, how surprising the report is, and publication bias

In [6]:
# scale over amount of supporting data 
rel_pl_data = np.linspace(0, 10, 21)

# scale over how surprising the data is
rel_pl_surprise = np.linspace(0, 10, 21)

# rate of bump for publication bias (0.01 = 1% publication bias)
rel_pl_bias = np.linspace(0, 1, 21)

In [20]:
publishing_policies_space = {}
exp_no = 0

for rel_pl_data_val in rel_pl_data:
    print(f"examining value: {rel_pl_data_val}")
    for rel_pl_surprise_val in rel_pl_surprise:
        for rel_pl_bias_val in rel_pl_bias:
            total_arm_score = 0
            total_entropy_score = 0
            
            vals = []
           
            for i in range(0, 10):
                arm_score, entropy_score = run_experiment("data", 0, rel_pl_data_val, rel_pl_surprise_val, rel_pl_bias_val)
                total_arm_score += arm_score
                total_entropy_score += entropy_score
            
            key = (rel_pl_data_val, rel_pl_surprise_val, rel_pl_bias_val)
            publishing_policies_space[key] = [total_arm_score / 10, total_entropy_score / 10] # average over 10 runs of each combination
            exp_no += 1
            
# save the results
pickle.dump(
    publishing_policies_space,
    open("/Users/marinamancoridis/Thesis/Thesis_Simulations/publishing_policies_3_bins.p", "wb")
)

examining value: 0.0
examining value: 0.5
examining value: 1.0
examining value: 1.5
examining value: 2.0
examining value: 2.5
examining value: 3.0
examining value: 3.5
examining value: 4.0
examining value: 4.5
examining value: 5.0
examining value: 5.5
examining value: 6.0
examining value: 6.5
examining value: 7.0
examining value: 7.5
examining value: 8.0
examining value: 8.5
examining value: 9.0
examining value: 9.5
examining value: 10.0


In [7]:
# global variables
num_bins = 30
num_draws = 10
num_participants = 10
num_generations = 15

# distribution of bins
bins_to_probs = {}
for i in range(0, num_bins):
    bins_to_probs[i] = 0.5

In [11]:
publishing_policies_space = {}
exp_no = 0

for rel_pl_data_val in rel_pl_data:
    print(f"examining value: {rel_pl_data_val}")
    for rel_pl_surprise_val in rel_pl_surprise:
        print(f"   surprise value: {rel_pl_surprise_val}")
        for rel_pl_bias_val in rel_pl_bias:
            total_arm_score = 0
            total_entropy_score = 0
            vals = []

            for i in range(0, 10):
                arm_score, entropy_score = run_experiment("data", 0, rel_pl_data_val, rel_pl_surprise_val, rel_pl_bias_val)
                total_arm_score += arm_score
                total_entropy_score += entropy_score
                vals.append(total_entropy_score)

            std_dev = statistics.stdev(vals)
                
            key = (rel_pl_data_val, rel_pl_surprise_val, rel_pl_bias_val)
            publishing_policies_space[key] = [total_arm_score / 10, total_entropy_score / 10, std_dev] # average over 10 runs of each combination
            exp_no += 1

# save the results
pickle.dump(
    publishing_policies_space,
    open(f"/Users/marinamancoridis/Thesis/Thesis_Simulations/publishing_policies_30_bins_d2.p", "wb")
)

examining value: 0.0
   surprise value: 0.0
   surprise value: 0.5
   surprise value: 1.0
   surprise value: 1.5
   surprise value: 2.0
   surprise value: 2.5
   surprise value: 3.0
   surprise value: 3.5
   surprise value: 4.0
   surprise value: 4.5
   surprise value: 5.0
   surprise value: 5.5
   surprise value: 6.0
   surprise value: 6.5
   surprise value: 7.0
   surprise value: 7.5
   surprise value: 8.0
   surprise value: 8.5
   surprise value: 9.0
   surprise value: 9.5
   surprise value: 10.0
examining value: 0.5
   surprise value: 0.0
   surprise value: 0.5
   surprise value: 1.0
   surprise value: 1.5
   surprise value: 2.0
   surprise value: 2.5
   surprise value: 3.0
   surprise value: 3.5
   surprise value: 4.0
   surprise value: 4.5
   surprise value: 5.0
   surprise value: 5.5
   surprise value: 6.0
   surprise value: 6.5
   surprise value: 7.0
   surprise value: 7.5
   surprise value: 8.0
   surprise value: 8.5
   surprise value: 9.0
   surprise value: 9.5
   surprise va

KeyboardInterrupt: 

## Fix a setting and look at how the evaluation metric changes over generations

In [5]:
# global variables
num_bins = 30
num_draws = 10
num_participants = 10
num_generations = 50

# distribution of bins
bins_to_probs = {}
for i in range(0, num_bins):
    bins_to_probs[i] = 0.5

In [6]:
def run_experiment_gen_info(setting, alpha_value, rel_pl_data_val, rel_pl_surprise_val, rel_pl_bias_val):
    scientific_record = {}
    for bin_num in range(0, num_bins):
        scientific_record[bin_num] = {} 
        scientific_record[bin_num][0] = 1
        scientific_record[bin_num][1] = 1
    
    kl_per_gen = {}
    
    for generation in range(0, num_generations):       
        # each generation gets an entirely new set of participants
        participants = make_participants(setting, alpha_value)

        # scientists explore and submit reports
        for participant in participants:
            # sample
            for i in range(0, num_draws):
                bin_number, value = participant.sample(scientific_record, num_bins, bins_to_probs)

            # choose the bin
            bin_choice = participant.choose_bin(scientific_record, num_bins, num_draws)

            # make a report
            participant.report(num_bins, num_draws)
            
        # the peer review board selects reports for publication and returns the updated scientific record
        scientific_record = publisher.peer_review(participants, scientific_record, rel_pl_data_val, rel_pl_surprise_val, rel_pl_bias_val, num_draws)
        
        kl_per_gen[generation] = evaluation.total_entropy_score(scientific_record, bins_to_probs)
    
    return(evaluation.arm_parameter_score(scientific_record, bins_to_probs), evaluation.total_entropy_score(scientific_record, bins_to_probs), kl_per_gen)

In [None]:
values = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
final_map = {}
for i in range(0, len(values)):
    final_map[values[i]] = {}
    
for value in values:
    print(value)
    # distribution of bins
    bins_to_probs = {}
    for i in range(0, num_bins):
        bins_to_probs[i] = value

    average_KL_per_generation = {}
    for i in range(0, num_generations):
        average_KL_per_generation[i] = 0

    for i in range(0, 5):
        arm_score, entropy_score, kl_per_gen = run_experiment_gen_info("data", 0, 1, 1, 0)
        for gen_no in kl_per_gen:
            average_KL_per_generation[gen_no] += kl_per_gen[gen_no]

    for key in average_KL_per_generation:
        average_KL_per_generation[key] /= 5
        
    final_map[value] = average_KL_per_generation

# save the results
pickle.dump(
    final_map,
    open("/Users/marinamancoridis/Thesis/Thesis_Simulations/final_map_new.p", "wb")
)

0
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9


## Which parameters give us the publication crisis?

This analysis looks at the percentage of false publications and finds the settings that match the rate reported in science in 2015: around ⅓.
- Limit to one generation
- Define the false publication rate to be the average absolute deviation between the true and published rates across bins
- One interesting result would be to see that you need super high surprise values to get you there...

In [4]:
# global variables
num_bins = 3
num_draws = 10
num_participants = 10
num_generations = 3

# distribution of bins
bins_to_probs = {}
for i in range(0, num_bins):
    bins_to_probs[i] = 0.5

In [5]:
# aim is to graph scalar for surprise against false publication rate (also graph y = 0.3)
# plot a line corresponding to different scalar values for the amount of data...
# ... this will tell you how well an increase in data is able to curtail false publication values
surprise_values = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
data_values = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

In [6]:
# false publication rate = % of reports published with 

def run_experiment_publication_crisis(setting, alpha_value, rel_pl_data_val, rel_pl_surprise_val, rel_pl_bias_val):
    scientific_record = {}
    total_false_published = 0
    total_true_published = 0
    
    for bin_num in range(0, num_bins):
        scientific_record[bin_num] = {} 
        scientific_record[bin_num][0] = 1
        scientific_record[bin_num][1] = 1
        
    for generation in range(0, num_generations):      
        helper.print_record(scientific_record, num_bins)
        # each generation gets an entirely new set of participants
        participants = make_participants(setting, alpha_value)

        # scientists explore and submit reports
        for participant in participants:
            # sample
            for i in range(0, num_draws):
                bin_number, value = participant.sample(scientific_record, num_bins, bins_to_probs)

            # choose the bin
            bin_choice = participant.choose_bin(scientific_record, num_bins, num_draws)

            # make a report
            participant.report(num_bins, num_draws)
            
        # the peer review board selects reports for publication and returns the updated scientific record
        scientific_record, num_true_published, num_false_published = publisher.peer_review_with_fpr(participants, scientific_record, rel_pl_data_val, rel_pl_surprise_val, rel_pl_bias_val, num_draws, bins_to_probs)
        total_false_published += num_false_published
        total_true_published += num_true_published
       
    return(total_false_published / (total_false_published + total_true_published)) # return average false publication rate

In [7]:
run_experiment_publication_crisis("data", 0, 1, 10, 0)

Scientific record
   bin 0: 1 zero(s), 1 one(s)
   bin 1: 1 zero(s), 1 one(s)
   bin 2: 1 zero(s), 1 one(s)

REPORTED BIN: 2 ZEROS, 8 ONES
p_zero: 0.5
p value: 0.0546875
it was a false publication
REPORTED BIN: 4 ZEROS, 1 ONES
p_zero: 0.5
p value: 0.96875
it was a true publication
Scientific record
   bin 0: 7 zero(s), 10 one(s)
   bin 1: 1 zero(s), 1 one(s)
   bin 2: 1 zero(s), 1 one(s)

REPORTED BIN: 5 ZEROS, 1 ONES
p_zero: 0.5
p value: 0.984375
it was a true publication
REPORTED BIN: 8 ZEROS, 2 ONES
p_zero: 0.5
p value: 0.9892578125
it was a true publication
Scientific record
   bin 0: 7 zero(s), 10 one(s)
   bin 1: 9 zero(s), 3 one(s)
   bin 2: 6 zero(s), 2 one(s)

REPORTED BIN: 2 ZEROS, 4 ONES
p_zero: 0.5
p value: 0.3437500000000001
it was a true publication
REPORTED BIN: 3 ZEROS, 7 ONES
p_zero: 0.5
p value: 0.171875
it was a true publication


0.16666666666666666

In [76]:
fpr = {}

for surprise_val in surprise_values:
    print(surprise_val)
    for data_val in data_values:
        # take the average across ten experiments
        for i in range(0, 10):
            false_pub_rate += run_experiment_publication_crisis("data", 0, data_val, surprise_val, 0)
        key = (surprise_val, data_val)
        fpr[key] = false_pub_rate / 10
        
# save the results
pickle.dump(
    fpr,
    open("/Users/marinamancoridis/Thesis/Thesis_Simulations/fpr.p", "wb")
)

0
10
20
30
40
50
60
70
80
90
100


## Tying in behavioral findings for cogsci paper 

In [11]:
import pandas as pd
import json
import matplotlib.pyplot as plt
import numpy as np
# my modules
import scientist
import evaluation
import helper
import settings
import publisher
import pandas as pd
import matplotlib.pylab as plt
import numpy as np
import scipy
from scipy.stats import beta, binom, entropy
import random
import json
import copy
import math
import pickle

file_path = '../cos360_experiment/participant_data_ratbandit.csv'
df = pd.read_csv(file_path)

In [12]:
full_participants = []
subset_participants = []
headline_participants = []

In [13]:
# iterates through every participant
for index, row in df.iterrows():
    participant_id = row['id']

    data_dict = json.loads(row['report'])
    reporting_type = data_dict['report']['style']
    bin_choice = int(row['drug_to_report'])
    results_dict = json.loads(row['final_reported_answer'])
    participant = scientist.Participant(0, reporting_type)

    if reporting_type != "headline":
        report = json.loads(row['report'])
        order = []
        for data in report['data']:
            order.append(data[0])
        participant.bin_sample_order = order

        values = []
        for data in report['data']:
            if data[1] == 'died':
                values.append(0)
            elif data[1] == 'survived':
                values.append(1)
        participant.values_sampled = values

        participant.bin_choice = int(row['drug_to_report']) - 1

        reported_results = {}
        results_dict = json.loads(row['final_reported_answer'])

        if reporting_type == "subset":
            data = results_dict['Please select a subset of your data that you would like to report (you can select more than one item).']

            survived_count = 0
            died_count = 0

            for item in data:
                if "survived" in item:
                    survived_count += 1
                elif "died" in item:
                    died_count += 1

            reported_results['1'] = survived_count
            reported_results['0'] = died_count

        elif reporting_type == "full":
            reported_results['1'] = results_dict["Report the number of rats that survived for the chosen container:"]
            reported_results['0'] = results_dict["Report the number of rats that died for the chosen container:"]

        participant.reported_results = reported_results

    if reporting_type == "full":
        full_participants.append(participant)
    elif reporting_type == "subset":
        subset_participants.append(participant)
    elif reporting_type == "headline":
        headline_participants.append(participant)

In [14]:
# global variables
num_bins = 3
num_draws = 10
num_participants = 10
num_generations = 1

# distribution of bins
bins_to_probs = {}
for i in range(0, num_bins):
    bins_to_probs[i] = 0.5

In [15]:
scientific_record = {}
total_false_published = 0
total_true_published = 0
rel_pl_data_val = 1
rel_pl_surprise_val = 1
rel_pl_bias_val = 0

for bin_num in range(0, num_bins):
    scientific_record[bin_num] = {} 
    scientific_record[bin_num][0] = 1
    scientific_record[bin_num][1] = 1

print("FULL REPORTING")
scientific_record = publisher.peer_review(full_participants, scientific_record, rel_pl_data_val, rel_pl_surprise_val, rel_pl_bias_val, num_draws)
print(scientific_record)

scientific_record = {}
total_false_published = 0
total_true_published = 0
rel_pl_data_val = 1
rel_pl_surprise_val = 1
rel_pl_bias_val = 0

for bin_num in range(0, num_bins):
    scientific_record[bin_num] = {} 
    scientific_record[bin_num][0] = 1
    scientific_record[bin_num][1] = 1

print("SUBSET REPORTING")
scientific_record = publisher.peer_review(subset_participants, scientific_record, rel_pl_data_val, rel_pl_surprise_val, rel_pl_bias_val, num_draws)
print(scientific_record)

FULL REPORTING
{0: {0: 1, 1: 1}, 1: {0: 3, 1: 3}, 2: {0: 3, 1: 6}}
SUBSET REPORTING
{0: {0: 1, 1: 1}, 1: {0: 1, 1: 4}, 2: {0: 4, 1: 1}}
