# Analysing brute force solution

In [2]:
# this cell just makes imports around a directory easier. Its redundant atm 
import sys 
sys.path.append('..')

In [9]:
import pickle 
from itertools import islice

# these are all third party packages, you will have to import 
import numpy as np 
import plotly.graph_objects as go

In [5]:
# we have stored our policy which we get out of the brute force method using pickle. We
# can simply read it in 
with open('brute_force_solution.pkl', 'rb') as f:
    policy = pickle.load(f)

 we can open up this file and see what it is

In [11]:
for key, value in islice(policy.items(), 5):
    print(f'Dictionary item with key: {key} and value: {value}')

Dictionary item with key: (0, 0, 0) and value: 1
Dictionary item with key: (0, 0, 1) and value: 1
Dictionary item with key: (0, 0, 2) and value: 1
Dictionary item with key: (0, 0, 3) and value: 1
Dictionary item with key: (0, 0, 4) and value: 1


hence, we see it is a dictionary with keys of all the possible states, indexed by (opponents_score, player_score, points_unbanked). There is a binary value stored also, which is hit (1) or stick (0). 

In [None]:
def plot_highest_hit_surface(V: dict, target_score: int = 100):
    """
    Plots the highest value of z (turn total) for which V[x,y,z] == 1 before flipping to 0.
    This shows how aggressive the policy is — how far you're willing to roll.
    """
    PS = OS = target_score
    MAX_TURN = target_score * 2  # extra margin

    X = np.arange(OS) 
    Y = np.arange(PS)
    Z = np.full((PS, OS), np.nan)

    for x in range(OS):
        for y in range(PS):
            z_hit = None
            for z in range(MAX_TURN):
                if V.get((x, y, z), 0) == 0:
                    z_hit = z - 1
                    break
            if z_hit is not None and z_hit >= 0:
                Z[x, y] = z_hit

    X_grid, Y_grid = np.meshgrid(X, Y, indexing='ij')

    fig = go.Figure(data=[go.Surface(
        z=Z,
        x=X_grid,
        y=Y_grid,
        colorscale='RdYlGn',
        colorbar=dict(title="Highest Hit Turn Total"),
        showscale=True
    )])

    fig.update_layout(
        scene=dict(
            xaxis=dict(title='Opponent Score'),
            yaxis=dict(title='Player Score'),
            zaxis=dict(title='Highest Turn Total Rolled'),
        ),
        title="Aggressiveness Surface: Highest Hit Value Before Hold",
        width=900,
        height=750
    )

    fig.show()


In [13]:
plot_highest_hit_surface(policy)

We can see here that this looks close, however we do not have any of the nice overhang behaviour which we see in the paper. This is due to a simplification in the plotting method which only plots the up to the first non-hit value. 