In [None]:
import numpy as np
from dataclasses import dataclass

@dataclass
class SolveResult:
    p_opt: float
    q_opt: float 

def solve_for_p(p: float):
    """
    For a fixed p, compute q(b,s): probability of ever reaching (3,2) from (b,s)
    assuming optimal play (zero-sum) where the *payoff* is the batter's expected points.
    We solve each count's 2x2 stage game using minmax (intersection of two lines or boundary).
    """
    # states b=0..3, s=0..2
    V = [[0.0]*3 for _ in range(4)]   # ex points to batter
    q = [[0.0]*3 for _ in range(4)]   # p to reach full count

    # helpers for continuation values
    def contV_ball(b, s):   # (Wait, Ball)
        return 1.0 if b == 3 else V[b+1][s]
    def contV_strike(b, s): # strike increments
        return 0.0 if s == 2 else V[b][s+1]

    # work backward: each state depends only on (b+1,s) and (b,s+1)
    for b in range(3, -1, -1):
        for s in range(2, -1, -1):
            if b == 3 and s == 2:
                q[b][s] = 1.0  # already at full count

            A = contV_ball(b, s)             # payoff for w,b
            S = contV_strike(b, s)           # payoff for w,b and s,w
            D = 4.0*p + (1.0 - p) * S        # payoff for s,st

            def f1(x): return A + x*(S - A)
            def f2(x): return S + x*(D - S)

            # interior intersection candidate
            denom = 2*S - A - D
            x_mix = None
            if abs(denom) > 1e-15:
                xm = (S - A) / denom
                if -1e-12 <= xm <= 1.0 + 1e-12:
                    x_mix = 0.0 if xm < 0 else 1.0 if xm > 1 else xm

            # choose x to maximise min(f1, f2) - pitcher best-responds
            candidates = [0.0, 1.0] + ([x_mix] if x_mix is not None else [])
            best_val = -1e18
            best_x = best_y = 0.0

            for xc in candidates:
                v_ball = f1(xc)
                v_strk = f2(xc)
                # Pitcher min payoff
                if v_ball < v_strk - 1e-15:
                    val, y = v_ball, 0.0
                elif v_strk < v_ball - 1e-15:
                    val, y = v_strk, 1.0
                else:
                    val = v_ball  
                    # when equal, use the mixing that equalises rows
                    y = x_mix if x_mix is not None else 0.5
                if val > best_val + 1e-15:
                    best_val, best_x, best_y = val, float(xc), float(y)

            V[b][s] = best_val

            if not (b == 3 and s == 2):
                x, y = best_x, best_y
                # (Ball,Wait) -> (b+1,s) if b<3; if b==3 it's a walk (absorbing, no full count)
                p_bw = (1.0 - x) * (1.0 - y)
                # strike increment to (b,s+1): (S,W), (B,G), or (S,G) w.p. 1-p
                p_strike_inc = y*(1.0 - x) + (1.0 - y)*x + x*y*(1.0 - p)

                prob = 0.0
                if b < 3:
                    prob += p_bw * q[b+1][s]
                if s < 2:
                    prob += p_strike_inc * q[b][s+1]
                q[b][s] = prob

    return q[0][0]

def maximize_q():
    # exclude exact endpoints (p=0 or 1) to avoid tie degeneracy
    best_q, best_p = -1.0, None
    # scan
    p = 1e-6
    while p < 1.0 - 1e-6:
        qv = solve_for_p(p)
        if qv > best_q:
            best_q, best_p = qv, p
        p += 1e-3
    # refine locally by shrinking grid
    lo = max(1e-9, best_p - 5e-3)
    hi = min(1.0 - 1e-9, best_p + 5e-3)
    for step in [5e-4, 2e-4, 1e-4, 5e-5, 2e-5, 1e-5, 5e-6, 2e-6, 1e-6, 5e-7, 2e-7, 1e-7]:
        p = lo
        loc_best_q, loc_best_p = -1.0, None
        while p <= hi + 1e-15:
            qv = solve_for_p(p)
            if qv > loc_best_q:
                loc_best_q, loc_best_p = qv, p
            p += step
        best_q, best_p = loc_best_q, loc_best_p
        lo = max(1e-9, best_p - 10*step)
        hi = min(1.0 - 1e-9, best_p + 10*step)
    return SolveResult(p_opt=best_p, q_opt=best_q)

res = maximize_q()
print(f"Optimal p ≈ {res.p_opt:.12f}")
print(f"Max q     ≈ {res.q_opt:.12f}")
print(f"q (10 d.p.) = {res.q_opt:.10f}")


Optimal p ≈ 0.226973200000
Max q     ≈ 0.295967993374
q (10 d.p.) = 0.2959679934
