<a href="https://colab.research.google.com/github/LegendaryAKx3/algoverse-tsma/blob/main/KuhnPokerRiskEstimation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### 1. Setup

In [None]:
!pip install open_spiel

In [None]:
import pyspiel
from open_spiel.python.algorithms import outcome_sampling_mccfr
from collections import defaultdict

In [None]:
game = pyspiel.load_game("kuhn_poker")

cfr_solver = outcome_sampling_mccfr.OutcomeSamplingSolver(game)

###2. Training

In [None]:
#Run MCCFR for 10,000 iterations to converge to a Nash equilibrium
print("Running MCCFR for 10,000 iterations...")
iterations = 10000
for i in range(iterations):
    cfr_solver.iteration()
    if (i + 1) % 2000 == 0:
        print(f"  ...completed {i + 1} iterations")
print("Training complete.")

policy = cfr_solver.average_policy()

### 3. Print Policy Function

In [None]:
def print_policy(policy, game):
    """Traverses the game tree to print the policy for each information set."""
    print("\n## Nash Equilibrium Strategies")

    # Dictionary to store one representative state per information set
    info_set_states = {}

    # Recursive function to find a representative state for each info set
    def traverse(state):
        if state.is_terminal():
            return
        if state.is_chance_node():
            for action, _ in state.chance_outcomes():
                traverse(state.child(action))
            return

        info_set = state.information_state_string()
        player = state.current_player()

        if (info_set, player) not in info_set_states:
            info_set_states[(info_set, player)] = state.clone()

        for action in state.legal_actions():
            traverse(state.child(action))

    traverse(game.new_initial_state())

    # Print action probabilities for each information set, sorted for consistency
    for info_set, player in sorted(info_set_states.keys()):
        state = info_set_states[(info_set, player)]
        action_probs = policy.action_probabilities(state)

        action_names = {a: game.action_to_string(player, a) for a in action_probs.keys()}

        print(f"\n**Player {player} | Info Set: `{info_set}`**")
        for action, prob in action_probs.items():
            print(f"  - Action `{action_names[action]}`: {prob:.3f}")


### 4. Risk Calculation Function

In [None]:
def calculate_risk(info_set_str, action_str, policy, game):
    """
    Calculates the expected payoff and loss probability for a given action in a specific info set.
    This version correctly reconstructs game states instead of using deprecated methods.
    """
    # Deconstruct the info set string to understand the game state
    player_card_char = info_set_str[0]
    history_str = info_set_str[1:]

    player = len(history_str) % 2

    card_map = {'J': 0, 'Q': 1, 'K': 2}
    player_card = card_map[player_card_char]

    opponent_cards = [c for c in card_map.values() if c != player_card]

    total_expected_payoff = 0.0
    total_loss_prob = 0.0

    belief = 1.0 / len(opponent_cards)

    # Iterate through each "possible world" (each possible opponent hand)
    for opp_card in opponent_cards:

        # Reconstruct the specific game state from this world
        if player == 0:
            deal = [player_card, opp_card]
        else:
            deal = [opp_card, player_card]

        # Find the initial state corresponding to this specific deal
        state = None
        state = game.new_initial_state()
        state.apply_action(deal[0])
        state.apply_action(deal[1])

        action_map = {'p': 0, 'b': 1}
        for act_char in history_str:
            state.apply_action(action_map[act_char])

        action_map = {'Pass': 0, 'Bet': 1, 'p': 0, 'b': 1}
        action_to_eval = action_map[action_str]

        state_after_action = state.clone()
        state_after_action.apply_action(action_to_eval)

        world_payoff = 0.0
        world_loss_prob = 0.0

        if state_after_action.is_terminal():
            payoff = state_after_action.returns()[player]
            world_payoff = payoff
            if payoff < 0:
                world_loss_prob = 1.0
        else:
            opp_policy = policy.action_probabilities(state_after_action)
            for opp_action, opp_prob in opp_policy.items():
                terminal_state = state_after_action.clone()
                terminal_state.apply_action(opp_action)
                payoff = terminal_state.returns()[player]

                world_payoff += opp_prob * payoff
                if payoff < 0:
                    world_loss_prob += opp_prob

        total_expected_payoff += belief * world_payoff
        total_loss_prob += belief * world_loss_prob

    return total_expected_payoff, total_loss_prob

### 5. Execution

In [None]:
print_policy(policy, game)

print("\n" + "="*40)
print("## Risk Calculation Example")
print("="*40)

# Format: [Card][Action History]
# First character: J(Jack), Q(Queen), or K(King) - the player's card
# Remaining characters: p(pass) or b(bet) - sequence of actions taken, Player 0, then Player 1, then Player 0, etc
# Example: "Kp" = Player has King, opponent passed
info_set = "Jp"
action = "Bet" #Bet or Pass

expected_payoff, loss_prob = calculate_risk(info_set, action, policy, game)

print(f"\nCalculating risk for Player 1 with Info Set `{info_set}` and Action `{action}`...")
print(f"\n  - **Expected Payoff:** ${expected_payoff:.3f}")
print(f"  - **Probability of Loss:** {loss_prob:.3f} ({loss_prob:.1%})")