# Introduction

This notebook explores how different simulation settings affect the frequency of false results being published in a pseudo-scientific setting. It demonstrates that sevearal hypothesized effects emerged in single-generation simulations.

In [111]:
import seaborn as sns
import pandas as pd
import matplotlib.pylab as plt
import numpy as np
from scipy.stats import beta, binom
import random

# simulation-wide global variables
num_bins = 3
num_draws = 10
num_participants = 100

###  Reporting Settings
A participant is in one of three settings for how they are allowed to report their data
1. **Rate**: Pick a single bin and report the survival rate of its pill contents.
2. **Data**: Pick a single bin and report the total number of rats that died and rats that stayed alive
3. **Subset**: Pick a single bin and choose a set of data to publish

In [112]:
class ReportingSetting:
    def __init__(self, name):
        if name not in {"rate", "data", "subset"}:
            raise ValueError("Improper setting name")
        self.name = name

### Participants
A participant implements a given strategy for how they gather data and a strategy for how they report data

In [113]:
class Participant:
    def __init__(self, strategy_gather, strategy_bin, strategy_report, reporting_setting):
        self.strategy_gather = strategy_gather                               # strategy to collect data
        self.strategy_bin = strategy_bin                                     # strategy to select bin to report
        self.strategy_report = strategy_report                               # strategy to report data in the chosen bin
        self.reporting_setting = reporting_setting                           # type of report a participant can make
        self.bin_sample_order = []                                           # order of bins sampled
        self.values_sampled = []                                             # values received across draws
        self.bin_choice = -1                                                 # the bin chosen to be reported
        reported_results = None                                              # the results reported
        
    def sample(self):
        sample_number = len(self.bin_sample_order)
        bin_number, value = self.strategy_gather.draw(len(self.values_sampled), self.bin_sample_order, self.values_sampled)
        self.bin_sample_order.append(bin_number)
        self.values_sampled.append(value)
        
    def choose_bin(self, bin_sample_order, values_sampled):
        self.bin_choice = self.strategy_bin.choose_bin(self.bin_sample_order, self.values_sampled)
        
    def report(self, alpha):
        history = get_full_history(self.bin_sample_order, self.values_sampled)
        bin_history = history[num_draws - 1][self.bin_choice]
        self.reported_results = self.strategy_report.report(self.reporting_setting.name, alpha, bin_history)

In [114]:
# returns a data structure that shows, on each draw, the values seen in each bin at that point
def get_full_history(bin_sample_order, values_sampled):
    history = {draw_number: {bin_number: [] for bin_number in range(num_bins)} for draw_number in range(num_draws)}

    for draw in range(len(bin_sample_order)):
        if draw == 0:
            history[draw][bin_sample_order[draw]].append(values_sampled[draw])
        else:
            prev_history = history[draw - 1].copy()
            for bin_num in prev_history:
                if bin_num == bin_sample_order[draw]:
                    history[draw][bin_num] = prev_history[bin_num] + [values_sampled[draw]]
                else:
                    history[draw][bin_num] = prev_history[bin_num][:]
    return history

# Hypothesized Participant Strategies

### Gathering Strategies
There are three hypothesized strategies that participants will use to gather data
1. **Epsilon-Greedy**: It explores with probability epsilon and exploits with probability 1-epsilon. It's a basic strategy that balances exploration and exploitation. If your data shows a relatively uniform exploration of different options, with no clear bias toward exploiting a specific action, it might align with the epsilon-greedy pattern. If your data reflects a balance between trying out different options to gather information and exploiting the currently best-known option, it may be consistent with epsilon-greedy. (Look for uniform explanation and balanced trade-offs between exploring and exploiting)
2. **Thompson Sampling**: Thompson Sampling is a Bayesian approach to the multi-armed bandit problem. It models uncertainty using a probability distribution over the parameters and samples from this distribution to make decisions. It tends to perform well in various scenarios.
3. **Win-Stay, Lose-Shift**: The idea is to exploit actions that have been successful in the past (win-stay) and explore alternative actions when faced with failures (lose-shift).
4. AdaptiveGatheringStrategy
5. ConservativeGatheringStrategy
6. Random

In [115]:
class EpsilonGreedy():
    def __init__(self):
        pass
    
    def draw(self, draw_number, bin_sample_order, values_sampled):
        epsilon = 0.5
        
        # chooses random bin first
        if draw_number == 0:
            return(random.randint(0, num_bins - 1), random.choice([0, 1]))
        
        # finds the best bin
        history = get_full_history(bin_sample_order, values_sampled)
        best_bin = 0
        best_ratio = 0
        
        for bin_num in history[draw_number - 1]:
            num_zeros = history[draw_number - 1][bin_num].count(0)
            num_ones = history[draw_number - 1][bin_num].count(1)

            # you've sampled from the bin and it's the best so far
            if (num_ones + num_zeros != 0) and (num_ones/(num_ones + num_zeros) > best_ratio):
                best_ratio = num_ones/(num_ones + num_zeros)
                best_bin = bin_num
                
        # explore with probability epsilon, exploit otherwise
        random_number = random.uniform(0, 1)
        if random_number <= epsilon:
            bin_number = random.randint(0, num_bins - 1)
        else:
            bin_number = best_bin
        
        value = random.choice([0, 1])
        return(bin_number, value)

In [116]:
class ThompsonSampling():
    def __init__(self):
        # step 1: initialize prior beliefs
        self.alpha_priors = np.ones(num_bins)
        self.beta_priors = np.ones(num_bins)
    
    def draw(self, draw_number, bin_sample_order, values_sampled):
        # edge case: choose a random first bin
        if draw_number == 0:
            bin_num = random.randint(0, num_bins - 1)
            choice = random.choice([0, 1])
            if choice == 1:
                self.alpha_priors[bin_num] += 1
            else:
                self.beta_priors[bin_num] += 1
            return (bin_num, choice)
        
        # step 2: action selection
        bin_samples = []
        for bin_number in range(0, num_bins):
            sample = beta.rvs(self.alpha_priors[bin_number], self.beta_priors[bin_number])
            bin_samples.append(sample)
        best_bin = np.argmax(bin_samples)
        
        # step 3: observe reward
        value = random.choice([0, 1])
        
        # step 4: update probability distribution
        if value == 1:
            self.alpha_priors[best_bin] += 1
        else:
            self.beta_priors[best_bin] += 1 
        
        return best_bin, value

In [117]:
class AdaptiveGatheringStrategy():
    def __init__(self):
        pass
    
    def draw(self, draw_number, bin_sample_order, values_sampled):
        # first half: random
        if draw_number < num_draws/2:
            bin_number = random.randint(0, num_bins - 1)
        
        # second half: choose best bin so far
        else:
            history = get_full_history(bin_sample_order, values_sampled)
            
            # pick the bin that you've seen the greatest proportion of positive values
            best_bin = 0
            best_ratio = 0
            for bin_num in history[draw_number - 1]:
                num_zeros = history[draw_number - 1][bin_num].count(0)
                num_ones = history[draw_number - 1][bin_num].count(1)
                
                # you've sampled from the bin and it's the best so far
                if (num_ones + num_zeros != 0) and (num_ones/(num_ones + num_zeros) > best_ratio):
                    best_ratio = num_ones/(num_ones + num_zeros)
                    best_bin = bin_num
            
            bin_number = best_bin
            
        value = random.choice([0, 1])
        return(bin_number, value)

In [118]:
class ConservativeGatheringStrategy():
    def __init__(self):
        self.best_bin_at_halfway = -1
    
    def draw(self, draw_number, bin_sample_order, values_sampled):
        # first half: random
        if draw_number < num_draws/2:
            bin_number = random.randint(0, num_bins - 1)
         
        # second half: choose best bin at halfway point
        else:
            # if you've never chosen the best bin so far, pick one with the greatest proportion of positive values
            if (self.best_bin_at_halfway == -1):
                history = get_full_history(bin_sample_order, values_sampled)
                best_bin = 0
                best_ratio = 0
                for bin_num in history[draw_number - 1]:
                    num_zeros = history[draw_number - 1][bin_num].count(0)
                    num_ones = history[draw_number - 1][bin_num].count(1)

                    # you've sampled from the bin and it's the best so far
                    if (num_ones + num_zeros != 0) and (num_ones/(num_ones + num_zeros) > best_ratio):
                        best_ratio = num_ones/(num_ones + num_zeros)
                        best_bin = bin_num
                self.best_bin_at_halfway = best_bin
                
            # case where you've only seen negative results
            if (self.best_bin_at_halfway == -1):
                self.best_bin_at_halfway = 0
            
            bin_number = self.best_bin_at_halfway
            
        value = random.choice([0, 1])
        return(bin_number, value)

In [119]:
class WinStayLoseShift():
    def __init__(self):
        pass
    
    def draw(self, draw_number, bin_sample_order, values_sampled):        
        choice = random.choice([0, 1])
        
        if draw_number == 0:
            bin_num = random.randint(0, num_bins - 1)
            return (bin_num, choice)
       
        last_sample = values_sampled[draw_number - 1]
        if last_sample == 1:
            bin_num = bin_sample_order[draw_number - 1]
        elif last_sample == 0:
            bin_num = random.randint(0, num_bins - 1)
    
        return(bin_num, choice)

In [120]:
class Random():
    def __init__(self):
        pass
    
    def draw(self, draw_number, bin_sample_order, values_sampled):        
        choice = random.choice([0, 1])
        bin_num = random.randint(0, num_bins - 1)
        return (bin_num, choice)

### Bin-Choosing Strategies
We hypothesize that participants could use one of the following strategies for how to choose the single bin whose results they will be asked to report.
1. **Maximum Data**: Choose the bin for which you have collected the most data
2. **Maximum Success Rate**: Choose the bin for which you have the highest success rates

In [121]:
class MaximumDataChoosingStrategy():
    def __init__(self):
        pass
    
    # bin for which you've collected the most data
    def choose_bin(self, bin_sample_order, values_sampled):
        history = get_full_history(participant.bin_sample_order, participant.values_sampled)
        bin_with_most_data = -1
        most_draws = 0
        
        for bin_num in history[num_draws -1]:
            num_draws_in_bin = len(history[num_draws -1][bin_num])
            if num_draws_in_bin > most_draws:
                most_draws = num_draws_in_bin
                bin_with_most_data = bin_num
                
        return(bin_with_most_data)

In [122]:
class MaximumSuccessChoosingStrategy():
    def __init__(self):
        pass
    
    # bin for which you have the highest success rate
    def choose_bin(self, bin_sample_order, values_sampled):
        history = get_full_history(bin_sample_order, values_sampled)
        best_bin = 0
        best_ratio = 0
        
        for bin_num in history[num_draws - 1]:
            num_zeros = history[num_draws - 1][bin_num].count(0)
            num_ones = history[num_draws - 1][bin_num].count(1)

            # you've sampled from the bin and it's the best so far
            if (num_ones + num_zeros != 0) and (num_ones/(num_ones + num_zeros) > best_ratio):
                best_ratio = num_ones/(num_ones + num_zeros)
                best_bin = bin_num
                    
        return(best_bin)

### Reporting Strategies
We hypothesize that the participants will report their results with some degree $\alpha$ of exaggeration. When $\alpha = 0$, this reduces to the strategy of reporting honest, unmanipulated results. When $\alpha = 1$, this reduces to the strategy of reporting maximum values.
- Softmax over the utility function?

In [123]:
class ReportingStrategy():
    def __init__(self):
        pass
    
    def report(self, reporting_setting, alpha, bin_history):
        num_zeros = bin_history.count(0)
        num_ones = bin_history.count(1)
        
        if alpha < 0 or alpha > 1:
            raise ValueError("Alpha must be between 0 and 1")
        
        # overreport by a proportion of alpha of the remaining rate to get to a value of 1
        if reporting_setting == "rate":
            if num_ones + num_zeros == 0:
                accurate_rate = 0.5
            else:
                accurate_rate = num_ones / (num_ones + num_zeros)
            return(accurate_rate + alpha * (1 - accurate_rate))
            
        # overreport the number of '1's and underreport the number of '0's by a rate of alpha 
        elif reporting_setting == "data":
            num_reported_zeros = round(num_zeros * (1 - alpha))
            num_reported_ones = round(num_ones * (1 + alpha))
            return({"0": num_reported_zeros, "1": num_reported_ones})
        
        # remove (100 * alpha)% of the '0' results
        elif reporting_setting == "subset":
            num_reported_zeros = round(num_zeros * (1 - alpha))
            return({"0": num_reported_zeros, "1": num_ones})

# Simulations

In [127]:
def make_participants(gathering_strategy, bin_choosing_strategy, reporting_strategy, setting, alpha_value):
    participants = []

    # make all 100 participants
    for i in range (0, 100):
        # initialize gathering strategy
        if gathering_strategy == "random":
            strat_gather = Random()
        elif gathering_strategy == "eg":
            strat_gather = EpsilonGreedy()
        elif gathering_strategy == "ts":
            strat_gather = ThompsonSampling()
        elif gathering_strategy == "ag":
            strat_gather = AdaptiveGatheringStrategy()
        elif gathering_strategy == "cg":
            strat_gather = ConservativeGatheringStrategy()
        elif gathering_strategy == "wsls":
            strat_gather = WinStayLoseShift()

        # initialize bin choosing strategy
        if bin_choosing_strategy == "maxd":
            strat_bin = MaximumDataChoosingStrategy()
        elif bin_choosing_strategy == "maxs":
            strat_bin = MaximumSuccessChoosingStrategy()

        # initialize reporting strategy
        if reporting_strategy == "rs":
            strat_report = ReportingStrategy()

        # initialize setting
        if setting == "rate":
            report_set = ReportingSetting("rate")
        elif setting == "data":
            report_set = ReportingSetting("data")
        elif setting == "subset":
            report_set = ReportingSetting("subset")

        # make participant
        participant = Participant(strategy_gather=strat_gather, strategy_bin=strat_bin, strategy_report=strat_report, reporting_setting=report_set)

        # sample
        for i in range(0, num_draws):
            participant.sample()

        # choose the bin
        participant.choose_bin(participant.bin_sample_order, participant.values_sampled)

        # specify alpha value
        participant.report(alpha_value)
                        
        participants.append(participant)

    return(participants)

In [128]:
gathering_strategies = ["random", "eg", "ts", "ag", "cg", "wsls"]
bin_choosing_strategies = ["maxd", "maxs"]
reporting_strategies = ["rs"]
reporting_setting = ["rate", "data", "subset"]
alpha_values = [0, 0.25, 0.5, 1]

gathering_strategies = ["random"]
bin_choosing_strategies = ["maxd"]
reporting_strategies = ["rs"]
reporting_setting = ["rate"]
alpha_values = [0.5]

In [130]:
def peer_review(participants):
    return(1)

In [131]:
for gathering_strategy in gathering_strategies:
    for bin_choosing_strategy in bin_choosing_strategies:
        for reporting_strategy in reporting_strategies:
            for setting in reporting_setting:
                for alpha_value in alpha_values:
                    participants = make_participants(gathering_strategy, bin_choosing_strategy, reporting_strategy, setting, alpha_value)
                    results = peer_review(participants)

# Extra Scratch Code

In [78]:
strat_gather = Random()
# strat_gather = EpsilonGreedy()
# strat_gather = ThompsonSampling()
# strat_gather = AdaptiveGatheringStrategy()
# strat_gather = ConservativeGatheringStrategy()
# strat_gather = WinStayLoseShift()

strat_bin = MaximumDataChoosingStrategy()
# strat_bin = MaximumSuccessChoosingStrategy()

strat_report = ReportingStrategy()

report_set = ReportingSetting("rate")
# report_set = ReportingSetting("data")
# report_set = ReportingSetting("subset")

participant = Participant(strategy_gather=strat_gather, strategy_bin=strat_bin, strategy_report=strat_report, reporting_setting=report_set)

for i in range(0, num_draws):
    participant.sample()
    
print('Participant History')
print(get_full_history(participant.bin_sample_order, participant.values_sampled))
print()
print('Bins sampled, in order')
print(participant.bin_sample_order)
print()
print('Values Sampled, in order')
print(participant.values_sampled)
print()
print('Bin Chosen')
participant.choose_bin(participant.bin_sample_order, participant.values_sampled)
print(participant.bin_choice)
print()
print("Reported results")
participant.report(0)
print(participant.reported_results)

Participant History
{0: {0: [], 1: [], 2: [1]}, 1: {0: [], 1: [0], 2: [1]}, 2: {0: [], 1: [0, 0], 2: [1]}, 3: {0: [], 1: [0, 0, 1], 2: [1]}, 4: {0: [1], 1: [0, 0, 1], 2: [1]}, 5: {0: [1, 0], 1: [0, 0, 1], 2: [1]}, 6: {0: [1, 0], 1: [0, 0, 1], 2: [1, 0]}, 7: {0: [1, 0, 0], 1: [0, 0, 1], 2: [1, 0]}, 8: {0: [1, 0, 0, 0], 1: [0, 0, 1], 2: [1, 0]}, 9: {0: [1, 0, 0, 0], 1: [0, 0, 1, 1], 2: [1, 0]}}

Bins sampled, in order
[2, 1, 1, 1, 0, 0, 2, 0, 0, 1]

Values Sampled, in order
[1, 0, 0, 1, 1, 0, 0, 0, 0, 1]

Bin Chosen
0

Reported results
0.25
