<a href="https://www.kaggle.com/code/mihailferaru/primer-coinflip-study?scriptVersionId=92640915" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [136]:
seed = 42

import numpy as np
np.random.seed(seed)
np.random.RandomState(seed)

import random
random.seed(seed)

from sklearn.metrics import classification_report

from collections import Counter

In [137]:
HEADS, TAILS = 0, 1

def make_coin_flipper(p_heads):
    return lambda n: [HEADS if p < p_heads else TAILS for p in np.random.random(n)] 
        
def make_flippers(n):
    fair_coin = make_coin_flipper(0.5)
    return [[fair_coin, 0.5] if p > 0.5 else [make_coin_flipper(0.5 + p), 0.5 + p] for p in np.random.random(n)]

def new_game_state():
    return {
        'flips': 100,
        'reward': 15,
        'penalty': 30,
        'score': 0,
    }

In [138]:
# 1000 flippers x 100 flips
games = [[flipper(100), p_heads] for flipper, p_heads in make_flippers(1000)]

def check(games):
    x = [y[1] if y[1] > 0.5 else None for y in games]
    x = filter(lambda y: y is not None, x)
    x = list(x)
    assert np.abs(np.mean(x) - 0.75) < 0.03
    
check(games)

In [139]:
NEXT_FLIP, IS_FAIR, IS_CHEATER = 0, 1, 2

def default_strategy(flips, game_state, debug = False):
    return IS_FAIR

def random_strategy(flips, game_state, debug = False):
    if game_state['flips'] == 0:
        return np.random.choice([IS_FAIR, IS_CHEATER])
    return np.random.choice([NEXT_FLIP, IS_FAIR, IS_CHEATER], p=[0.7, 0.15, 0.15])

def invalid_strategy(flips, game_state, debug = False):
    return NEXT_FLIP

def naive_strategy(flips, game_state, debug = False):
    if game_state['flips'] == 0 or len(flips) >= 10:
        c = Counter(flips)
        if c[HEADS] > 6:
            return IS_CHEATER
        else:
            return IS_FAIR
        
    return NEXT_FLIP

def naive_dynamic_strategy(flips, game_state, debug = False):
    if game_state['flips'] == 0 or len(flips) >= 10: return naive_strategy(flips, game_state, debug)
    
    if len(flips) < 5: return NEXT_FLIP
    
    c = Counter(flips)
    if c[HEADS] / len(flips) > 0.7:
        return IS_CHEATER
    if c[TAILS] / len(flips) > 0.7:
        return IS_FAIR
    
    return NEXT_FLIP

def play(games, strategy, debug = False):
    if debug: print("Debug is ON.")
        
    state = new_game_state()
    y_true, y_predicted = [], []
    
    for flips, p_heads in games:
        
        i = 0
        action = NEXT_FLIP
        while state['flips'] >= 0 and action == NEXT_FLIP:
            action = strategy(flips[:i], state, debug)
            assert state['flips'] > 0 or action != NEXT_FLIP
            
            if action == NEXT_FLIP:
                i += 1
            
            if debug: print(f"Remaining flips: {state['flips']}. History: {flips[:i]}. Action: {action}.")
            state['flips'] -= 1
        
        correct = IS_FAIR if p_heads == 0.5 else IS_CHEATER
        y_true.append(correct)
        y_predicted.append(action)
        
        if action == correct:
            state['flips'] += state['reward']
            state['score'] += 1
        else:
            state['flips'] -= state['penalty']
        
        if debug: print(f"Prediction: {action}. True label: {correct}. Score: {state['score']}")
            
        if state['flips'] <= 0:
            break
    
    return state, y_true, y_predicted

In [140]:
state, y_true, y_predicted = play(games, default_strategy, debug = True)
print(f"Score: {state['score']}")
print(classification_report(y_true, y_predicted))

Debug is ON.
Remaining flips: 100. History: []. Action: 1.
Prediction: 1. True label: 2. Score: 0
Remaining flips: 69. History: []. Action: 1.
Prediction: 1. True label: 1. Score: 1
Remaining flips: 83. History: []. Action: 1.
Prediction: 1. True label: 1. Score: 2
Remaining flips: 97. History: []. Action: 1.
Prediction: 1. True label: 1. Score: 3
Remaining flips: 111. History: []. Action: 1.
Prediction: 1. True label: 2. Score: 3
Remaining flips: 80. History: []. Action: 1.
Prediction: 1. True label: 2. Score: 3
Remaining flips: 49. History: []. Action: 1.
Prediction: 1. True label: 2. Score: 3
Remaining flips: 18. History: []. Action: 1.
Prediction: 1. True label: 1. Score: 4
Remaining flips: 32. History: []. Action: 1.
Prediction: 1. True label: 1. Score: 5
Remaining flips: 46. History: []. Action: 1.
Prediction: 1. True label: 1. Score: 6
Remaining flips: 60. History: []. Action: 1.
Prediction: 1. True label: 2. Score: 6
Remaining flips: 29. History: []. Action: 1.
Prediction: 1. T

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [141]:
state, y_true, y_predicted = play(games, random_strategy, debug = True)
print(f"Score: {state['score']}")
print(classification_report(y_true, y_predicted))

Debug is ON.
Remaining flips: 100. History: [0]. Action: 0.
Remaining flips: 99. History: [0, 0]. Action: 0.
Remaining flips: 98. History: [0, 0]. Action: 1.
Prediction: 1. True label: 2. Score: 0
Remaining flips: 67. History: []. Action: 1.
Prediction: 1. True label: 1. Score: 1
Remaining flips: 81. History: [1]. Action: 0.
Remaining flips: 80. History: [1, 0]. Action: 0.
Remaining flips: 79. History: [1, 0]. Action: 1.
Prediction: 1. True label: 1. Score: 2
Remaining flips: 93. History: []. Action: 2.
Prediction: 2. True label: 1. Score: 2
Remaining flips: 62. History: []. Action: 1.
Prediction: 1. True label: 2. Score: 2
Remaining flips: 31. History: [0]. Action: 0.
Remaining flips: 30. History: [0]. Action: 1.
Prediction: 1. True label: 2. Score: 2
Score: 2
              precision    recall  f1-score   support

           1       0.40      0.67      0.50         3
           2       0.00      0.00      0.00         3

    accuracy                           0.33         6
   macro a

In [142]:
state, y_true, y_predicted = play(games, naive_strategy, debug = False)
print(f"Score: {state['score']}")
print(classification_report(y_true, y_predicted))

Score: 38
              precision    recall  f1-score   support

           1       0.73      1.00      0.84        19
           2       1.00      0.73      0.84        26

    accuracy                           0.84        45
   macro avg       0.87      0.87      0.84        45
weighted avg       0.89      0.84      0.84        45



In [143]:
state, y_true, y_predicted = play(games, naive_dynamic_strategy, debug = False)
print(f"Score: {state['score']}")
print(classification_report(y_true, y_predicted))

Score: 45
              precision    recall  f1-score   support

           1       0.81      0.79      0.80        28
           2       0.79      0.82      0.81        28

    accuracy                           0.80        56
   macro avg       0.80      0.80      0.80        56
weighted avg       0.80      0.80      0.80        56

