In [6]:
import WesBotTWO as wb
import HandOddsCalcWes as hoc

import texasholdem as th
import texasholdem.evaluator as eval
import numpy as np
import pandas as pd
import random
from tqdm import trange

In [7]:
# smort_decision(our_bot:PokerBot, game:th.TexasHoldEm)
# actually make a goddamn decision using my scuffed bot thingy, and record
# all the necessary information for Shro' to use in his thing
# INPUTS:
#   our_bot: a PokerBot object, supposed to be intelligent or something
#   game:    a TexasHoldEm game object that we're playing
# OUTPUT:
#   a list containing:
#     [hand_phase:str,
#      my_hand:list of th.Card,
#      board:list of th.Card,
#      EV:float,
#      my_chips_betting:int,
#      their_chips_betting:int,
#      my_decision:Decision]
# SIDE EFFECT:
#   makes the decision in the TexasHoldEm game object passed in
def smort_decision(our_bot:wb.PokerBot, game:th.TexasHoldEm):
  # get information regarding our hand, the game board, our win prob,
  # their chips bet, and our EV
  win_prob =  hoc.estimate_win_prob(game, game.current_player, 2, 1000)
  loss_prob = 1 - win_prob
  my_chips_betting = game.player_bet_amount(0)
  their_chips_betting = game.player_bet_amount(1)
  EV = win_prob*their_chips_betting - loss_prob*(my_chips_betting + \
                                                 game.chips_to_call(0))
  # get features for Shro'
  my_decision = our_bot.make_decision(EV, game)
  my_hand = game.get_hand(game.current_player)
  board = []
  for card in game.board:
    board.append(card)
  # record hand phase
  hand_phase = game.hand_phase.name
  # actually make the decision
  if (my_decision.type == "RAISE"):
    if (game.validate_move(action = th.ActionType.RAISE,\
                           value = my_decision.size)):
      game.take_action(th.ActionType.RAISE, my_decision.size)
    else:
      game.take_action(th.ActionType.ALL_IN)
  elif (my_decision.type == "CALL/CHECK"):
    if (game.validate_move(action = th.ActionType.CALL)):
      game.take_action(th.ActionType.CALL)
    else:
      game.take_action(th.ActionType.CHECK)
  elif (my_decision.type == "FOLD"):
    game.take_action(th.ActionType.FOLD)
  else:
    game.take_action(th.ActionType.ALL_IN)
  output = [hand_phase, my_hand, board, EV, my_chips_betting, their_chips_betting, my_decision]
  return output

# baby_decision(game:th.TexasHoldEm)
# make a random decision for the opponent, just to get someone to play against
# INPUTS:
#   game: a th.TexasHoldEm object that we're playing
# SIDE EFFECT:
#   makes the random decision in the game object passed in
def baby_decision(game:th.TexasHoldEm):
  # make random decision
  if (np.random.rand() > 0.5):
    # baby gonna raise
    min_raise = int(game.get_available_moves().raise_range.start)
    max_raise = int(np.min([game.players[0].chips,
                            game.get_available_moves().raise_range.stop]))
    # make sure we only raise by some reasonable amount
    max_raise = int((3*min_raise + max_raise) // 4)
    if (max_raise - min_raise > 20):
      max_raise = min_raise + 20
    decision = \
      wb.Decision("RAISE", int(np.random.uniform(min_raise, max_raise)))
  else:
    # baby gonna call/check
    decision = wb.Decision("CALL/CHECK")
  # ensure validity of decision and actually make decision
  if (decision.type == "RAISE" and
      game.validate_move(action = th.ActionType.RAISE, value = decision.size)):
    game.take_action(th.ActionType.RAISE, decision.size)
    return -1
  elif (game.validate_move(action = th.ActionType.CALL)):
    game.take_action(th.ActionType.CALL)
    return -1
  elif (game.validate_move(action = th.ActionType.CHECK)):
    game.take_action(th.ActionType.CHECK)
    return -1
  game.take_action(th.ActionType.FOLD)
  return 0
  
      

In [36]:
epochs = 5000
our_bot = wb.PokerBot(maturity=epochs//4)

rounds_list = []

for i in trange(epochs):
  game = th.TexasHoldEm(buyin=500, big_blind=5, small_blind=2, max_players=2)
  game.start_hand()
  current_round = []
  while (game.is_hand_running()):
    # branch on whose turn
    if (game.current_player == 0):
      our_decision = smort_decision(our_bot, game)
      current_round.append(our_decision)
      if (our_decision[6].type == "FOLD"):
        who_won = 1
      else:
        who_won = -1
    else:
      who_won = baby_decision(game)
    if (who_won == -1 and len(game.board) == 5):
      jerry_hand_rank = eval.evaluate(game.get_hand(0), game.board)
      tom_hand_rank = eval.evaluate(game.get_hand(1), game.board)
      who_won = int(jerry_hand_rank >= tom_hand_rank)
  # now round is done, close round and go again
  won_or_lost_chips = game._get_last_pot().amount*((-1)**who_won)
  our_bot.log_round(won_or_lost_chips)
  rounds_list.append([current_round, won_or_lost_chips])


100%|██████████| 5000/5000 [2:24:22<00:00,  1.73s/it]  


In [37]:
outcomes = np.zeros(len(rounds_list))

for i in range(len(rounds_list)):
  outcomes[i] = rounds_list[i][1]

print("Avg. outcomes:", np.mean(outcomes))
print("Avg. win rate:", np.mean(outcomes > 0))

Avg. outcomes: 13.913
Avg. win rate: 0.295


In [43]:
class Node:
    def __init__(self, decision, children=None):
        self.decision = decision
        self.children = children if children else []

    def add_child(self, child_node):
        self.children.append(child_node)

class DecisionTree:
    def __init__(self, root, outcome):
        self.root = root
        self.outcome = outcome

# Create decision trees from rounds_list
decision_trees = []
for round_entry in rounds_list:
    decisions = round_entry[0]  # The list of decisions
    outcome = round_entry[1]    # The outcome (tail of the entry)

    # Build the tree structure
    root = Node(decisions[0])  # First decision as the root
    current_node = root
    for decision in decisions[1:]:
        new_node = Node(decision)
        current_node.add_child(new_node)
        current_node = new_node

    decision_trees.append(DecisionTree(root, outcome))



# Print the structure of the first tree
def print_tree(node, depth=0):
    print("  " * depth + f"Decision: {node.decision}")
    for child in node.children:
        print_tree(child, depth + 1)


print_tree(decision_trees[0].root)
print("Outcome:", decision_trees[0].outcome)

Decision: ['PREFLOP', [Card("4h"), Card("6h")], [], np.float64(-2.6400000000000006), 5, 20, <WesBotTWO.Decision object at 0x00000126D15F2750>]
  Decision: ['FLOP', [Card("4h"), Card("6h")], [Card("Ac"), Card("Qd"), Card("5h")], np.float64(0.0), 0, 0, <WesBotTWO.Decision object at 0x00000126C83FEA50>]
    Decision: ['TURN', [Card("4h"), Card("6h")], [Card("Ac"), Card("Qd"), Card("5h"), Card("4d")], np.float64(0.0), 0, 0, <WesBotTWO.Decision object at 0x00000126C83FE270>]
      Decision: ['TURN', [Card("4h"), Card("6h")], [Card("Ac"), Card("Qd"), Card("5h"), Card("4d")], np.float64(-1.6820000000000022), 10, 29, <WesBotTWO.Decision object at 0x00000126D15F1340>]
        Decision: ['RIVER', [Card("4h"), Card("6h")], [Card("Ac"), Card("Qd"), Card("5h"), Card("4d"), Card("Th")], np.float64(0.0), 0, 0, <WesBotTWO.Decision object at 0x00000126D15ED1F0>]
          Decision: ['RIVER', [Card("4h"), Card("6h")], [Card("Ac"), Card("Qd"), Card("5h"), Card("4d"), Card("Th")], np.float64(-3.1320000000

In [60]:
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from gymnasium import spaces, Env  # Use gymnasium instead of gym
import numpy as np

# Define a custom environment for decision trees
class DecisionTreeEnv(Env):
    metadata = {'render.modes': ['human']}
    def __init__(self, decision_trees):
        self.decision_trees = decision_trees
        self.current_tree_idx = 0
        self.current_node = None
        self.action_space = spaces.Discrete(3)  # Example: 3 actions (RAISE, CALL, FOLD)
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(7,), dtype=np.float32)

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)  # Ensure compatibility with gymnasium
        self.current_tree_idx = np.random.randint(len(self.decision_trees))
        self.current_node = self.decision_trees[self.current_tree_idx].root
        return self._get_observation(), {}

    def step(self, action):
        # Map action to decision
        if action == 0:
            decision = "RAISE"
        elif action == 1:
            decision = "CALL/CHECK"
        else:
            decision = "FOLD"

        info = {}  # Additional information can be added here
        reward = self.decision_trees[self.current_tree_idx].outcome
        done = True  # End after one decision for simplicity
        return self._get_observation(), reward, done, {"decision": decision}, info

    def _get_observation(self):
        # Convert the current node's decision into a numerical observation
        hand_phase = self.current_node.decision[0]
        my_hand = self.current_node.decision[1]
        board = self.current_node.decision[2]
        EV = self.current_node.decision[3]
        my_chips_betting = self.current_node.decision[4]
        their_chips_betting = self.current_node.decision[5]
        decision_type = self.current_node.decision[6].type

        # Map hand_phase and decision_type to numerical values
        hand_phase_map = {"PREFLOP": 0, "FLOP": 1, "TURN": 2, "RIVER": 3}
        decision_type_map = {"RAISE": 0, "CALL/CHECK": 1, "FOLD": 2}

        hand_phase_num = hand_phase_map.get(hand_phase, -1)
        decision_type_num = decision_type_map.get(decision_type, -1)

        # Create the observation array
        return np.array([
            hand_phase_num,
            len(my_hand),  # Number of cards in hand
            len(board),    # Number of cards on the board
            EV,
            my_chips_betting,
            their_chips_betting,
            decision_type_num
        ], dtype=np.float32)

# Wrap the environment
env = DummyVecEnv([lambda: DecisionTreeEnv(decision_trees)])

# Create the PPO model
model = PPO("MlpPolicy", env, verbose=1)

# Train the model
model.learn(total_timesteps=10000)

# Save the model
model.save("ppo_decision_tree_model")


Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1575 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 1005        |
|    iterations           | 2           |
|    time_elapsed         | 4           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.010596716 |
|    clip_fraction        | 0.0535      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.09       |
|    explained_variance   | -0.00024    |
|    learning_rate        | 0.0003      |
|    loss                 | 8.5e+04     |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.00427    |
|    value_loss           | 1.13e+05    |
-----------------------------------------
-----------------

In [63]:
from tqdm import tqdm  # tqdm is already imported, but this ensures clarity

# Reset metrics
ppo_bot_wins = 0
ppo_bot_chips = 0
ppo_bot_decisions = []

smort_bot_wins = 0
smort_bot_chips = 0
smort_bot_decisions = []

total_games = 100  # Number of games to simulate

for _ in tqdm(range(total_games), desc="Simulating games"):
    # Initialize a new game
    game = th.TexasHoldEm(buyin=500, big_blind=5, small_blind=2, max_players=2)
    game.start_hand()

    while game.is_hand_running():
        if game.current_player == 0:  # SMORT bot's turn
            smort_decision_result = smort_decision(our_bot, game)
            smort_bot_decisions.append(smort_decision_result)
            if smort_decision_result[6].type == "FOLD":
                who_won = 1  # PPO bot wins
            else:
                who_won = -1
        else:  # PPO bot's turn
            obs = env.reset()
            action, _ = model.predict(obs)
            decision = None
            if action == 0:
                EV = obs[0][3]  # Extract EV from observation
                min_raise = int(game.get_available_moves().raise_range.start)
                max_raise = int(np.min([game.players[0].chips, game.get_available_moves().raise_range.stop]))
                raise_amount = int(min_raise + (max_raise - min_raise) * max(0, min(1, EV)))
                decision = (th.ActionType.RAISE, raise_amount)
            elif action == 1:
                decision = (th.ActionType.CALL, None)
            else:
                decision = (th.ActionType.FOLD, None)

            if decision[0] == th.ActionType.RAISE and game.validate_move(action=decision[0], value=decision[1]):
                game.take_action(decision[0], decision[1])
            elif decision[0] == th.ActionType.CALL and game.validate_move(action=decision[0]):
                game.take_action(decision[0])
            elif decision[0] == th.ActionType.FOLD:
                game.take_action(decision[0])
                who_won = 0  # SMORT bot wins
            else:
                # Ensure CHECK is only attempted when valid
                if game.validate_move(action=th.ActionType.CHECK):
                    game.take_action(th.ActionType.CHECK)
                else:
                    game.take_action(th.ActionType.FOLD)

    # Determine the winner if the hand reaches showdown
    if who_won == -1 and len(game.board) == 5:
        jerry_hand_rank = eval.evaluate(game.get_hand(0), game.board)
        tom_hand_rank = eval.evaluate(game.get_hand(1), game.board)
        who_won = int(jerry_hand_rank >= tom_hand_rank)

    # Update metrics
    won_or_lost_chips = game._get_last_pot().amount * ((-1) ** who_won)
    if who_won == 0:
        ppo_bot_wins += 1
        ppo_bot_chips += won_or_lost_chips
    else:
        smort_bot_wins += 1
        smort_bot_chips += -won_or_lost_chips

# Print metrics
print("PPO Bot Wins:", ppo_bot_wins)
print("PPO Bot Total Chips:", ppo_bot_chips)
print("SMORT Bot Wins:", smort_bot_wins)
print("SMORT Bot Total Chips:", smort_bot_chips)

Simulating games: 100%|██████████| 100/100 [02:26<00:00,  1.47s/it]

PPO Bot Wins: 47
PPO Bot Total Chips: 15323
SMORT Bot Wins: 53
SMORT Bot Total Chips: 15008.0



