# Dice Roller - Initial Calculations and Tests

## Section 1 - Pen & paper

In [7]:
# average value of last diceroll
sum(range(1,7)) / 6

3.5

In [8]:
# at position 1, expected value of remaining dice roll, if we choose to reroll: 3.5
# decision: reroll if current value < 3.5, equal to reroll if in 1, 2, 3

In [9]:
# average value of diecroll if we reroll at 2: 
# <probability of stopping at 1> * <expected value at n-1, assuming we stop there> + 
# <probability of not stopping at 1> * <expected value rolled at n>
prob_of_stopping_at_1 = 0.5
expected_value_at_1 = (4 + 5 + 6) / 3
expected_value_at_0 = 3.5

# putting it together, value if we reroll at n-2:
prob_of_stopping_at_1 * expected_value_at_1 + (1-prob_of_stopping_at_1) * expected_value_at_0

4.25

In [10]:
# if at n - 2, we decide to reroll, and do an n-1th and potentially an nth roll, we can expect 4.25
# which means that we reroll at n - 2, if we roll 1, 2, 3, or 4

In [11]:
# average value of diecroll if we reroll at n-3: 
# <probability of stopping at n-2> * <expected value at n-2, assuming we stop there> + 
# <probability of not stopping at n-2> * <expected value assuming we go into n - 1>
prob_of_stopping_at_2 = 1 / 3
expected_value_at_2 = (5 + 6) / 2
expected_value_at_1 = 4.25

prob_of_stopping_at_2 * expected_value_at_2 + (1-prob_of_stopping_at_2) * expected_value_at_1

4.666666666666667

In [12]:
# once again, not rerolling if 5, 6, rerolling otherwise

In [13]:
 # question: where are we rerolling on 5 ? 

## Section 2 - Recursive formula

In [14]:
import numpy as np

def R(i):
    """
    Determines the expected outcome of i remaining dicerolls, 
    assuming we follow the strategy that maximizes expected value. 
    """
    if i == 1:
        result = np.mean(range(1,7))
    else: 
        # if we are not in the last diceroll, we are 
        # - going to re-roll, if current roll will be lower than expected of current -1 R
        # - not going to re-roll otherwise
        
        # probability that we are re-rolling after current: re-rolling 1, ..., k, 
        # where k is the last integer that is smaller than expected result of future
        prob_reroll = int(R(i-1)) / 6
        
        # if we are not rerolling the current, that means we are at 6, ..., k, 
        # where k is the firs integer that is higher than expected variable of future rolls
        
        not_rerolled_outcomes = \
        range(int(R(i-1)) + 1, 7)
        
        curr_expected_assuming_no_reroll = \
        np.mean(not_rerolled_outcomes)
        
        result = \
            prob_reroll * R(i-1) + \
            (1 - prob_reroll) * curr_expected_assuming_no_reroll
        
    return result

In [15]:
R(1)

3.5

In [16]:
R(2)

4.25

In [17]:
R(3)

4.666666666666666

In [18]:
R(4)

4.944444444444445

In [19]:
R(5)

5.12962962962963

## Section 3 - Completely Random Simulation

In [20]:
import random

In [21]:
def dice_roll(side = 6):
    """
    Returns a random integer i, 1 <= i <= side
    uniform distribution
    side is 6 by default
    """
    return np.random.randint(1,side+1)

def random_player_decision_reroll():
    """
    Returns a random True or False
    independent of game state
    """
    return random.choice([True, False])

# for testing, switch on always true
#     return True
    
# bool(random.getrandbits(1)) is a supposedly faster option

In [41]:
def game(number_of_rolls):
    """
    Runs one game in this sequence: 
   
    [
        roll a final dice roll
    ]
        is executed once at the beginning
   
   [
        roll a dice with dice_roll
        player has an option to stop or go further, decided by random_player_decision_reroll
    ]
        is repeated a maximum n-1 times
        
    assumes that number_of_rolls >= 2
    returns the states and the decisions the player made in a tuple, two elements are the two lists
    the decisions are always in the form of k times True and then one False
    final result of the game is the 
    
    """
    
    state = []
    decision = []
    
    state.append("Rem. rolls: " + str(number_of_rolls - 1) + " - Die value: " + str(dice_roll()))
    
    for i in range(1, number_of_rolls):
        player_decision_reroll = random_player_decision_reroll()
        decision.append(player_decision_reroll)
        if player_decision_reroll:
            state.append(
                "Rem. rolls: " + str(number_of_rolls - i - 1) + " - Die value: " + str(dice_roll())
            )
        else:
            break
    
    return (state, decision)
    

In [42]:
def game_series(number_of_games, number_of_rolls):
    """
    Runs a game number_of_games times with the number_of_rolls
    returns a list of tuples, which are the results of the individual games
    list is number_of_games long
    """
    game_series_result = []
    for i in range(0, number_of_games):
        game_series_result.append(game(number_of_rolls))
        
    return game_series_result
    

In [43]:
game_series_result = game_series(10000,3);

In [44]:
def get_dice_result(states):
    """
    Input is the state list, returns the last digit of the last element, that is where we stop
    """
    return int(states[len(states)-1][-1])

In [45]:
# loop through everything
# combine the different state - decision pairs
# e.g. Roll 1 - 2 & True
# puts those in a list
# in another list, put the final results

def create_summary_lists(game_series_result_to_process):
    """
    Assumes a game_series_result that is a list of (statelist - decisionlist tuples)
    creates two new lists, the length of these is the sum of length of individual states
    first list: state and decision pairings
    second list: dice result of particular 
    """
    state_decision = []
    outcome = []
    
    for current_game_result in game_series_result_to_process: 
        
        current_game_outcome = get_dice_result(current_game_result[0])
        current_game_states = current_game_result[0]
        current_game_decisions = current_game_result[1]

        for i in range(0, len(current_game_decisions)):
            state_decision.append(current_game_states[i] + ", Decision: " + str(current_game_decisions[i]))
            outcome.append(current_game_outcome)
            
    return (state_decision, outcome)
    

In [46]:
import pandas as pd

In [47]:
result_list = create_summary_lists(game_series_result)
simulation_df = pd.DataFrame()
simulation_df['state_decision'] = result_list[0]
simulation_df['outcome'] = result_list[1]

In [48]:
# what is the average of outcomes? i think it's going to be 3.5, 
# ultimately it is the average of random dice rolls

simulation_df['outcome'].mean()

3.4924318196972726

In [49]:
# pretty simple query from this point on
# we want to know the average outcomes grouped by state_decisions
simulation_df.groupby('state_decision')['outcome'].mean()

state_decision
Rem. rolls: 1 - Die value: 1, Decision: False    1.000000
Rem. rolls: 1 - Die value: 1, Decision: True     3.362025
Rem. rolls: 1 - Die value: 2, Decision: False    2.000000
Rem. rolls: 1 - Die value: 2, Decision: True     3.508475
Rem. rolls: 1 - Die value: 3, Decision: False    3.000000
Rem. rolls: 1 - Die value: 3, Decision: True     3.520833
Rem. rolls: 1 - Die value: 4, Decision: False    4.000000
Rem. rolls: 1 - Die value: 4, Decision: True     3.505800
Rem. rolls: 1 - Die value: 5, Decision: False    5.000000
Rem. rolls: 1 - Die value: 5, Decision: True     3.533654
Rem. rolls: 1 - Die value: 6, Decision: False    6.000000
Rem. rolls: 1 - Die value: 6, Decision: True     3.438679
Rem. rolls: 2 - Die value: 1, Decision: False    1.000000
Rem. rolls: 2 - Die value: 1, Decision: True     3.545018
Rem. rolls: 2 - Die value: 2, Decision: False    2.000000
Rem. rolls: 2 - Die value: 2, Decision: True     3.444828
Rem. rolls: 2 - Die value: 3, Decision: False    3.000000

In [57]:
# the issue is that the steps are random, so if we say go ahead, the expected outcome will be 3.5
# trying to group a different way now, by outcome

In [50]:
grouped_table = simulation_df.groupby(['outcome','state_decision'])['state_decision'].count()

In [51]:
pd.set_option('display.max_rows', 100)

In [52]:
grouped_table

outcome  state_decision                               
1        Rem. rolls: 1 - Die value: 1, Decision: False    438
         Rem. rolls: 1 - Die value: 1, Decision: True      70
         Rem. rolls: 1 - Die value: 2, Decision: True      66
         Rem. rolls: 1 - Die value: 3, Decision: True      80
         Rem. rolls: 1 - Die value: 4, Decision: True      71
         Rem. rolls: 1 - Die value: 5, Decision: True      73
         Rem. rolls: 1 - Die value: 6, Decision: True      71
         Rem. rolls: 2 - Die value: 1, Decision: False    786
         Rem. rolls: 2 - Die value: 1, Decision: True     147
         Rem. rolls: 2 - Die value: 2, Decision: True     146
         Rem. rolls: 2 - Die value: 3, Decision: True     154
         Rem. rolls: 2 - Die value: 4, Decision: True     159
         Rem. rolls: 2 - Die value: 5, Decision: True     127
         Rem. rolls: 2 - Die value: 6, Decision: True     136
2        Rem. rolls: 1 - Die value: 1, Decision: True      70
         Rem. r