# Introduction
This notebook explores how false publications in science can emerge as the product of a multi-generational simulation of science.

In [9]:
import seaborn as sns
import pandas as pd
import matplotlib.pylab as plt
import numpy as np
from scipy.stats import beta, binom
import random
import json

# simulation-wide global variables
num_bins = 3
num_draws = 10
num_participants = 100
num_generations = 3

##  Reporting Settings
A participant is in one of three settings for how they are allowed to report their data
1. **Rate**: Pick a single bin and report the survival rate of its pill contents.
2. **Data**: Pick a single bin and report the total number of rats that died and rats that stayed alive
3. **Subset**: Pick a single bin and choose a set of data to publish

In [18]:
class ReportingSetting:
    def __init__(self, name):
        if name not in {"rate", "data", "subset"}:
            raise ValueError("Improper setting name")
        self.name = name

## Strategy to sample a bin
To gather data, participants make a soft-selection over the options that they expect to yield the highest KL divergence from the scientific record.

In [11]:
def draw(draw_number, bin_sample_order, values_sampled):
    bin_number = 0
    value = 0
    
    return bin_number, value

## Strategy to select a bin
Participants use the following strategy to select a bin.

In [19]:
def choose_bin(bin_sample_order, values_sampled):
    bin_choice = 0
    
    return bin_choice

## Strategy to exaggerate findings
We hypothesize that the participants will report their results with some degree $\alpha$ of exaggeration. When $\alpha = 0$, this reduces to the strategy of reporting honest, unmanipulated results. When $\alpha = 1$, this reduces to the strategy of reporting maximum values.

In [21]:
class ReportingStrategy():
    def __init__(self):
        pass
    
    def report(self, reporting_setting, alpha, bin_history):
        num_zeros = bin_history.count(0)
        num_ones = bin_history.count(1)
        
        if alpha < 0 or alpha > 1:
            raise ValueError("Alpha must be between 0 and 1")
        
        # overreport by a proportion of alpha of the remaining rate to get to a value of 1
        if reporting_setting == "rate":
            if num_ones + num_zeros == 0:
                accurate_rate = 0.5
            else:
                accurate_rate = num_ones / (num_ones + num_zeros)
            return(accurate_rate + alpha * (1 - accurate_rate))
            
        # overreport the number of '1's and underreport the number of '0's by a rate of alpha 
        elif reporting_setting == "data":
            num_reported_zeros = round(num_zeros * (1 - alpha))
            num_reported_ones = round(num_ones * (1 + alpha))
            return({"0": num_reported_zeros, "1": num_reported_ones})
        
        # remove (100 * alpha)% of the '0' results
        elif reporting_setting == "subset":
            num_reported_zeros = round(num_zeros * (1 - alpha))
            return({"0": num_reported_zeros, "1": num_ones})

## Participants
Each participant has a personal records of draws and results.

In [13]:
class Participant:
    next_id = 1  # Class variable to keep track of the next available ID

    def __init__(self, strategy_report, reporting_setting):        
        self.id = Participant.next_id  # Assign a unique ID to the participant
        Participant.next_id += 1  # Update the next available ID for the next participant

        self.strategy_report = strategy_report                               # how exaggerated the findings are
        self.reporting_setting = reporting_setting                           # type of report they can make
        self.bin_sample_order = []                                           # order of bins sampled
        self.values_sampled = []                                             # values received across draws
        self.bin_choice = -1                                                 # the bin chosen to be reported
        reported_results = None                                              # the results reported
        
    def sample(self):
        sample_number = len(self.bin_sample_order)
        bin_number, value = draw(len(self.values_sampled), self.bin_sample_order, self.values_sampled)
        self.bin_sample_order.append(bin_number)
        self.values_sampled.append(value)
        
    def choose_bin(self, bin_sample_order, values_sampled):
        self.bin_choice = choose_bin(self.bin_sample_order, self.values_sampled)
        
    def report(self, alpha):
        history = get_full_history(self.bin_sample_order, self.values_sampled)
        bin_history = history[num_draws - 1][self.bin_choice]
        self.reported_results = self.strategy_report.report(self.reporting_setting.name, alpha, bin_history)

In [14]:
# returns a data structure that shows, on each draw, the values seen in each bin at that point
def get_full_history(bin_sample_order, values_sampled):
    history = {draw_number: {bin_number: [] for bin_number in range(num_bins)} for draw_number in range(num_draws)}

    for draw in range(len(bin_sample_order)):
        if draw == 0:
            history[draw][bin_sample_order[draw]].append(values_sampled[draw])
        else:
            prev_history = history[draw - 1].copy()
            for bin_num in prev_history:
                if bin_num == bin_sample_order[draw]:
                    history[draw][bin_num] = prev_history[bin_num] + [values_sampled[draw]]
                else:
                    history[draw][bin_num] = prev_history[bin_num][:]
    return history

## Initializing collection of empty participants

In [22]:
def make_participants(reporting_strategy, setting, alpha_value):
    participants = []

    # make all 100 participants
    for i in range (0, num_participants):
        # initialize reporting strategy
        if reporting_strategy == "rs":
            strat_report = ReportingStrategy()

        # initialize setting
        if setting == "rate":
            report_set = ReportingSetting("rate")
        elif setting == "data":
            report_set = ReportingSetting("data")
        elif setting == "subset":
            report_set = ReportingSetting("subset")

        # make participant
        participant = Participant(strategy_report=strat_report, reporting_setting=report_set)
                        
        participants.append(participant)

    return(participants)

## Defining the peer review layer
This layer takes in reports from scientists, selects reports for publication, and thereby updates the scientific record. The scientific record consists of the number of positive and negative draws associated with each bin. We do this because we assume that published data is given fully (not just publishing some sort of aggregation of the submitted results).

In [23]:
# TODO: update the published scientific record
def peer_review(participants, scientific_record):    
    rates = [participant.reported_results for participant in participants if participant.reporting_setting.name == "rate"]

    # if participants were only allowed to report a single rate as their finding
    if rates:
        final_reports = {}
        percentiles = np.percentile(rates, np.arange(0, 101, 1))
        
        for participant in participants:
            if participant.reporting_setting.name == "rate":
                reported_rate = participant.reported_results
                percentile_rank = np.searchsorted(percentiles, reported_rate) / len(percentiles) * 100
                
                final_reports[participant.id] = {
                    "reportedRate": reported_rate,
                    "score": percentile_rank / 10
                }
                
        for participant in participants:
            if final_reports[participant.id]["reportedRate"] > 0.5:
                oldScore = final_reports[participant.id]["score"]
                newScore = oldScore + (10 - oldScore) * 0.05
                final_reports[participant.id]["score"] = newScore
            else:
                oldScore = final_reports[participant.id]["score"]
                newScore = 0.95 * oldScore
                final_reports[participant.id]["score"] = newScore
                
                
        # Sort participants by score and select the top 20
        sorted_participants = sorted(final_reports.items(), key=lambda x: x[1]["score"], reverse=True)
        top_20_participants = sorted_participants[:20]

        # TODO: change this to just the reported values...
        # Calculate the percentage difference for each selected participant
        percentage_differences = [abs(participant[1]["reportedRate"] - 0.5) / 0.5 * 100
                                  for participant in top_20_participants]

        # Find the average percentage difference
        average_percentage_difference = round(np.mean(percentage_differences))

        return(average_percentage_difference)
    
    # if there is data associated with each participant (subset or full reporting)
    else:
        final_reports = {}
        reported_sums = [participant.reported_results['0'] + participant.reported_results['1']
                         for participant in participants]

        percentiles = np.percentile(reported_sums, np.arange(0, 101, 1))

        for participant in participants:
            reported_sum = participant.reported_results['0'] + participant.reported_results['1']
            percentile_rank = np.searchsorted(percentiles, reported_sum) / len(percentiles) * 100
            
            reported_rate = 0
            if participant.reported_results['1'] + participant.reported_results['0'] != 0:
                reported_rate = participant.reported_results['1'] / (participant.reported_results['1']  + participant.reported_results['0'])
            
            score = percentile_rank / 10
            final_reports[participant.id] = {
                    "reportedSum": reported_sum,
                    "reportedRate": reported_rate,
                    "score": score
            }
            
        # add surprise factor and publishing bias
        for participant in participants:
            reported_rate = final_reports[participant.id]["reportedRate"]
            oldScore = final_reports[participant.id]["score"]
            
            newScore = oldScore + (10 - oldScore)*(abs(reported_rate - 0.5)) # surprise factor
            
            # publishing bias
            if reported_rate < 0.5:
                newScore = 0.95 * oldScore
            else:
                newScore = oldScore + (10 - oldScore) * 0.05
            
            final_reports[participant.id]["score"] = newScore
            
        # Sort participants by score and select the top 20
        sorted_participants = sorted(final_reports.items(), key=lambda x: x[1]["score"], reverse=True)
        top_20_participants = sorted_participants[:20]

        # TODO: change this to just the reported values
        # Calculate the percentage difference for each selected participant
        percentage_differences = [abs(participant[1]["reportedRate"] - 0.5) / 0.5 * 100
                                  for participant in top_20_participants]

        # Find the average percentage difference
        average_percentage_difference = round(np.mean(percentage_differences))

        return(average_percentage_difference)

## Run experiments

The multi-generational experiment is run, given reporting setting and exaggeration values.

In [None]:
def run_experiment(reporting_strategy, setting, alpha_value):
    # each experiment starts with a blank cannon
    scientific_record = {}
    for bin_num in range(0, num_bins):
        scientific_record[bin_num] = {}
    
    participants = make_participants(reporting_strategy, setting, alpha_value)
    
    for generation in range(0, num_generations):
        # scientists explore and submit reports
        for participant in participants:
            # sample
            for i in range(0, num_draws):
                participant.sample()

            # choose the bin
            participant.choose_bin(participant.bin_sample_order, participant.values_sampled)

            # specify alpha value
            participant.report(alpha_value)
            
        # the peer review board selects reports for publication and updates the scientific record
        peer_review(participants, scientific_record)

In [None]:
reporting_strategies = ["rs"]
reporting_settings = ["rate", "data", "subset"]
alpha_values = [0, 0.25, 0.5, 0.75, 1]

results = [] # TODO: fix this into whatever you want

for reporting_strategy in reporting_strategies:
    for reporting_setting in reporting_settings:
        for alpha_value in alpha_values:
            result = run_experiment(reporting_strategy, setting, alpha_value)
            results.append(result)
            
            # TODO: save the results

## Analyze the results

In [None]:
# Initialize the dictionary to store mean percent errors
mean_percent_errors_dict = {}

# Number of runs for each key
num_runs = 10

for gathering_strategy in gathering_strategies:
    for bin_choosing_strategy in bin_choosing_strategies:
        for reporting_strategy in reporting_strategies:
            for setting in reporting_setting:
                for alpha_value in alpha_values:
                    # Initialize a list to store MPE for each run
                    mpe_list = []

                    for _ in range(num_runs):
                        participants = make_participants(gathering_strategy, bin_choosing_strategy, reporting_strategy, setting, alpha_value)
                        mean_percent_error = peer_review(participants)
                        mpe_list.append(mean_percent_error)

                    # Calculate the average MPE
                    avg_mpe = np.mean(mpe_list)

                    # Create a key based on the variable names
                    key = (gathering_strategy, bin_choosing_strategy, reporting_strategy, setting, alpha_value)

                    # Store the average MPE in the dictionary
                    mean_percent_errors_dict[key] = avg_mpe

print(mean_percent_errors_dict)

In [None]:
# Convert tuple keys to strings
string_keys_dict = {str(key): value for key, value in mean_percent_errors_dict.items()}

# Specify the file path to save the JSON file
json_file_path = 'mean_percent_errors.json'

# Save the dictionary with string keys to a JSON file
with open(json_file_path, 'w') as json_file:
    json.dump(string_keys_dict, json_file)

print(f"Mean percent errors saved to {json_file_path}")

## Find best 5 and worst 5 settings

In [None]:
# Convert tuple keys to strings
string_keys_dict = {str(key): value for key, value in mean_percent_errors_dict.items()}

# Sort the dictionary by values
sorted_dict = dict(sorted(string_keys_dict.items(), key=lambda item: item[1]))

# Print the best five settings with their MPE
print("Best 15 Settings:")
for key in list(sorted_dict)[:15]:
    setting_tuple = eval(key)  # Convert the string back to a tuple
    mpe = sorted_dict[key]
    print(f"{setting_tuple}: {mpe}")

# Print the worst five settings with their MPE
print("\nWorst 15 Settings:")
for key in list(sorted_dict)[-15:]:
    setting_tuple = eval(key)  # Convert the string back to a tuple
    mpe = sorted_dict[key]
    print(f"{setting_tuple}: {mpe}")

In [24]:
# Convert tuple keys to strings
string_keys_dict = {str(key): value for key, value in mean_percent_errors_dict.items()}

# Filter out settings where alpha is equal to 1
filtered_dict = {key: value for key, value in string_keys_dict.items() if eval(key)[-1] != 1}

# Sort the filtered dictionary by values in descending order
sorted_filtered_dict = dict(sorted(filtered_dict.items(), key=lambda item: item[1], reverse=True))
r
# Print the worst settings with alpha not equal to 1
print("Worst Settings (alpha not equal to 1) in Order:")
for key, value in sorted_filtered_dict.items():
    setting_tuple = eval(key)  # Convert the string back to a tuple
    print(f"{setting_tuple}: {value}")

NameError: name 'mean_percent_errors_dict' is not defined

In [None]:
from collections import defaultdict

# Convert tuple keys to strings
string_keys_dict = {str(key): value for key, value in mean_percent_errors_dict.items()}

# Create a defaultdict to store MPE values for each unique setting component
component_mpes = defaultdict(list)

# Populate the defaultdict with MPE values
for key, value in string_keys_dict.items():
    setting_tuple = eval(key)
    
    # Iterate over all components in the setting tuple
    for component in setting_tuple:
        component_mpes[component].append(value)

# Calculate average MPE for each unique setting component
average_mpes = {component: np.mean(mpe_list) for component, mpe_list in component_mpes.items()}

# Print the average MPE for each setting component
print("Average MPE for Each Setting Component:")
for component, average_mpe in average_mpes.items():
    print(f"{component}: {average_mpe}")