In [26]:
import WesBot as wb
import HandOddsCalcWes as hoc

import texasholdem as th
import texasholdem.evaluator as eval
import numpy as np
import pandas as pd
import random
from tqdm import trange

In [27]:
class Round:
  def __init__(self):
    self.decisions = []
    self.outcome = int(0)
  
  def add_decision(self, decision:list):
    self.decisions.append(decision)
  
  def close_round(self, outcome:int):
    self.outcome = outcome
  
  def get_decisions(self):
    return self.decisions
  
  def get_one_decision(self, index:int):
    return self.decisions[index]
  
  def get_outcome(self):
    return self.outcome
  
  def copy(self):
    output = Round()
    for decision in self.decisions:
      output.add_decision(decision)
    output.close_round(self.outcome)
    return output

In [28]:
# smort_decision(our_bot:PokerBot, game:th.TexasHoldEm)
# actually make a goddamn decision using my scuffed bot thingy, and record
# all the necessary information for Shro' to use in his thing
# INPUTS:
#   our_bot: a PokerBot object, supposed to be intelligent or something
#   game:    a TexasHoldEm game object that we're playing
# OUTPUT:
#   a list containing:
#     [hand_phase:str,
#      my_hand:list of th.Card,
#      board:list of th.Card,
#      EV:float,
#      my_chips_betting:int,
#      their_chips_betting:int,
#      my_decision:Decision]
# SIDE EFFECT:
#   makes the decision in the TexasHoldEm game object passed in
def smort_decision(our_bot:wb.PokerBot, game:th.TexasHoldEm):
  # get information regarding our hand, the game board, our win prob,
  # their chips bet, and our EV
  win_prob, my_hand_odds = \
    hoc.estimate_win_and_hand_probs(game, game.current_player, 2, 1000)
  loss_prob = 1 - win_prob
  my_chips_betting = game.player_bet_amount(0) + game.chips_to_call(0)
  their_chips_betting = game.player_bet_amount(1)
  EV = win_prob*their_chips_betting - loss_prob*my_chips_betting
  # get features for Shro'
  my_decision = our_bot.make_decision(EV, my_hand_odds, game)
  my_hand = game.get_hand(game.current_player)
  board = []
  for card in game.board:
    board.append(card)
  # record hand phase
  hand_phase = game.hand_phase.name
  # actually make the decision
  if (my_decision.type == "RAISE"):
    game.take_action(th.ActionType.RAISE, my_decision.size)
  elif (my_decision.type == "CALL/CHECK"):
    if (game.validate_move(action = th.ActionType.CALL)):
      game.take_action(th.ActionType.CALL)
    else:
      game.take_action(th.ActionType.CHECK)
  elif (my_decision.type == "FOLD"):
    game.take_action(th.ActionType.FOLD)
  else:
    game.take_action(th.ActionType.ALL_IN)
  output = [hand_phase, my_hand, board, EV, my_chips_betting, their_chips_betting, my_decision]
  return output

# baby_decision(game:th.TexasHoldEm)
# make a random decision for the opponent, just to get someone to play against
# INPUTS:
#   game: a th.TexasHoldEm object that we're playing
# SIDE EFFECT:
#   makes the random decision in the game object passed in
def baby_decision(game:th.TexasHoldEm):
  # opponent makes random decision (reused baby code)
  babys_decision = np.random.choice(3)
  if (babys_decision == 0):
    # baby will call/check if possible
    if (game.validate_move(action = th.ActionType.CALL) or
        game.validate_move(action = th.ActionType.CHECK)):
      decision = wb.Decision("CALL/CHECK")
    elif (game.validate_move(action = th.ActionType.ALL_IN)):
      decision = wb.Decision("ALLIN")
    else:
      decision = wb.Decision("FOLD")
  elif (babys_decision == 1):
    # baby will fold
    decision = wb.Decision("FOLD")
  else:
    # baby will raise if possible
    min_raise = game.get_available_moves().raise_range.start
    max_raise = int(np.min([game.players[game.current_player].chips,
                            game.get_available_moves().raise_range.stop]))
    if (min_raise <= max_raise and
        game.validate_move(action = th.ActionType.RAISE, value = min_raise) and
        game.validate_move(action = th.ActionType.RAISE, value = max_raise)):
      decision = \
        wb.Decision("RAISE", int(np.random.uniform(min_raise, max_raise)))
    elif (game.validate_move(action = th.ActionType.CALL) or
          game.validate_move(action = th.ActionType.CHECK)):
      decision = wb.Decision("CALL/CHECK")
    elif (game.validate_move(action = th.ActionType.ALL_IN)):
      decision = wb.Decision("ALLIN")
    else:
      decision = wb.Decision("FOLD")
  # actually make the decision
  if (decision.type == "RAISE"):
    game.take_action(th.ActionType.RAISE, decision.size)
  elif (decision.type == "CALL/CHECK"):
    if (game.validate_move(action = th.ActionType.CALL)):
      game.take_action(th.ActionType.CALL)
    else:
      game.take_action(th.ActionType.CHECK)
  elif (decision.type == "FOLD"):
    game.take_action(th.ActionType.FOLD)
    return 0
  else:
    game.take_action(th.ActionType.ALL_IN)
  return 1

# This loop is outdated now.

In [None]:
# game = th.TexasHoldEm(buyin=500, big_blind=5, small_blind=2, max_players=2)
# game.start_hand()
# current_round = Round()


# epochs = 1000
# our_bot = wb.PokerBot(k=50, EV_weight=10.0, maturity=epochs//4)



# rounds_list = [] # a list of Round objects

# for i in trange(epochs):

#   if (game.is_hand_running()):
#     # control flow time! we are player 0.
#     if (game.current_player == 0):
#       # make decision and record decision
#       our_decision = smort_decision(our_bot, game)
#       current_round.add_decision(our_decision)
#       if (our_decision[6].type == "FOLD"):
#         who_won = 1
#     else:
#       # the opponent (who is baby) makes a decision
#       # this also sees if baby folded. If he did, it sets who_won to 0
#       who_won = baby_decision(game)

#   # ensure a game and hand is running
#   if (not game.is_game_running()):
#     game = th.TexasHoldEm(buyin=500, big_blind=5, small_blind=2, max_players=2)
#     game.start_hand()
#     current_round = Round()
#   # start a new hand if needed
#   if (not game.is_hand_running()):
#     current_round.close_round(game._get_last_pot().amount*((-1)**who_won))
#     rounds_list.append(current_round.copy())
#     game.start_hand()
#     current_round = Round()

100%|██████████| 1000/1000 [00:46<00:00, 21.65it/s]


# Trying the loop again, without the Round object. Also, try using more epochs.

In [38]:
game = th.TexasHoldEm(buyin=500, big_blind=5, small_blind=2, max_players=2)
game.start_hand()
current_round = []


epochs = 100000
our_bot = wb.PokerBot(k=500, EV_weight=10.0, maturity=epochs//4)



rounds_list = [] # a list of Round objects

for i in trange(epochs):

  if (game.is_hand_running()):
    # control flow time! we are player 0.
    if (game.current_player == 0):
      # make decision and record decision
      our_decision = smort_decision(our_bot, game)
      # current_round.add_decision(our_decision)
      current_round.append(our_decision)
      if (our_decision[6].type == "FOLD"):
        who_won = 1
    else:
      # the opponent (who is baby) makes a decision
      # this also sees if baby folded. If he did, it sets who_won to 0
      who_won = baby_decision(game)

  # ensure a game and hand is running
  if (not game.is_game_running()):
    game = th.TexasHoldEm(buyin=500, big_blind=5, small_blind=2, max_players=2)
    game.start_hand()
    current_round = []
  # start a new hand if needed
  if (not game.is_hand_running()):
    # current_round.close_round(game._get_last_pot().amount*((-1)**who_won))
    won_or_lost_chips = game._get_last_pot().amount*((-1)**who_won)
    rounds_list.append([current_round, won_or_lost_chips])
    game.start_hand()
    current_round = []

100%|██████████| 100000/100000 [1:27:58<00:00, 18.95it/s] 


In [39]:
# Create a DataFrame with decisions and outcomes
data = {
    "decisions": [round_obj[0] for round_obj in rounds_list],  # Extract decisions (first element of each sublist)
    "outcomes": [round_obj[1] for round_obj in rounds_list]   # Extract outcomes (second element of each sublist)
}
decisions_outcomes_df = pd.DataFrame(data)

# Display the DataFrame
print(decisions_outcomes_df)

# Calculate total earnings
total_earnings = decisions_outcomes_df['outcomes'].sum()
print(f"Total Earnings: {total_earnings}")



                                               decisions  outcomes
0      [[PREFLOP, [Qh, 7h], [], 0.7699999999999996, 5...       -10
1      [[PREFLOP, [Ad, Ts], [], 1.21, 5, 5, <WesBot.D...       476
2                                                     []         7
3      [[PREFLOP, [2s, 9s], [], -0.56, 5, 5, <WesBot....        10
4                                                     []         7
...                                                  ...       ...
37624  [[PREFLOP, [7s, 5c], [], -0.77, 5, 5, <WesBot....       626
37625  [[PREFLOP, [Js, 3d], [], -2.309999999999995, 3...       -70
37626  [[PREFLOP, [5d, 9d], [], 0.03000000000000025, ...       -70
37627  [[PREFLOP, [Td, Ad], [], 29.84800000000001, 91...      -161
37628  [[PREFLOP, [3d, 2d], [], -1.21, 5, 5, <WesBot....        -7

[37629 rows x 2 columns]
Total Earnings: -1857342


In [40]:
class DecisionNode:
    def __init__(self, decision=None, outcome=None):
        self.decision = decision
        self.outcome = outcome
        self.children = []

    def add_child(self, child_node):
        self.children.append(child_node)

    def __repr__(self):
        if self.outcome is not None:
            return f"Outcome: {self.outcome}"
        return f"Decision: {self.decision}"


def format_decision(decision):
    return {
        "hand_phase": decision[0],
        "my_hand": decision[1],
        "board": decision[2],
        "EV": round(float(decision[3]), 4),
        "my_chips_betting": decision[4],
        "their_chips_betting": decision[5],
        "my_decision": decision[6]
    }

def build_decision_tree_with_format(row):
    # Start with the outcome as the root node
    root = DecisionNode(outcome=row['outcomes'])
    current_node = root

    # Traverse the decisions in order
    for decision in row['decisions']:
        formatted_decision = format_decision(decision)
        new_node = DecisionNode(decision=formatted_decision)
        current_node.add_child(new_node)
        current_node = new_node  # Move to the new node for the next decision

    return root

# Build decision trees with formatted decisions for all rows in the DataFrame
formatted_decision_trees = [build_decision_tree_with_format(row) for _, row in decisions_outcomes_df.iterrows()]

def print_tree(node, level=0):
    indent = "  " * level
    print(f"{indent}{node}")
    for child in node.children:
        print_tree(child, level + 1)

# Print the entire tree for the first formatted decision tree
print_tree(formatted_decision_trees[3])

Outcome: 10
  Decision: {'hand_phase': 'PREFLOP', 'my_hand': [Card("2s"), Card("9s")], 'board': [], 'EV': -0.56, 'my_chips_betting': 5, 'their_chips_betting': 5, 'my_decision': <WesBot.Decision object at 0x0000019D242D4FE0>}


In [41]:
from stable_baselines3 import PPO

# Import stable_baselines3 without manually defining a __version__ attribute
import stable_baselines3
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.env_util import make_vec_env
from gymnasium import spaces, Env  # Ensure gymnasium is used for spaces and Env
import numpy as np

# Define a custom environment for reinforcement learning
class PokerEnv(Env):
    def __init__(self, decision_trees):
        super(PokerEnv, self).__init__()
        self.decision_trees = decision_trees
        self.current_tree_index = 0
        self.current_node = None

        # Define action and observation space
        # Actions: 0 = Fold, 1 = Call/Check, 2 = Raise
        self.action_space = spaces.Discrete(3)

        # Observations: EV, my_chips_betting, their_chips_betting
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(3,), dtype=np.float32)

    def reset(self, seed=None, options=None):
        # Handle the seed argument if provided
        if seed is not None:
            np.random.seed(seed)
        # Reset to the root of the next decision tree
        self.current_tree_index = (self.current_tree_index + 1) % len(self.decision_trees)
        self.current_node = self.decision_trees[self.current_tree_index]
        return self._get_observation(), {}

    def step(self, action):
        # Simulate the action and move to the next node
        if len(self.current_node.children) > 0:
            self.current_node = self.current_node.children[0]  # Move to the first child for simplicity
        else:
            done = True
            reward = self.current_node.outcome
            return self._get_observation(), reward, done, False, {}

        # Calculate reward based on the outcome
        reward = self.current_node.outcome if self.current_node.outcome is not None else 0
        done = len(self.current_node.children) == 0
        return self._get_observation(), reward, done, False, {}

    def _get_observation(self):
        # Extract relevant features from the current node
        if self.current_node.decision is not None:
            return np.array([
                self.current_node.decision["EV"],
                self.current_node.decision["my_chips_betting"],
                self.current_node.decision["their_chips_betting"]
            ], dtype=np.float32)
        else:
            return np.zeros(3, dtype=np.float32)

# Create the environment using the formatted decision trees
env = DummyVecEnv([lambda: PokerEnv(formatted_decision_trees)])

# Train a PPO model on the environment
ppo_model = PPO("MlpPolicy", env, verbose=1)
ppo_model.learn(total_timesteps=10000)



Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1423 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 913         |
|    iterations           | 2           |
|    time_elapsed         | 4           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.003622172 |
|    clip_fraction        | 0.0124      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.1        |
|    explained_variance   | 3.67e-05    |
|    learning_rate        | 0.0003      |
|    loss                 | 1.99        |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.000264   |
|    value_loss           | 410         |
-----------------------------------------
-----------------

<stable_baselines3.ppo.ppo.PPO at 0x19d270fa960>

In [42]:
# Initialize metrics for tracking total earnings and win rate
metrics = {
    "total_earnings": 0,
    "model_win_rate": 0.0
}

model_wins = 0  # Counter for model's wins
total_games = 0  # Counter for total games played

epochs = 1000  # Number of epochs for simulation
rounds_list = []  # List to store rounds

# Simulate games between the RL model and the smart bot
for epoch in trange(epochs):
    # Ensure a game and hand are running
    game = th.TexasHoldEm(buyin=500, big_blind=5, small_blind=2, max_players=2)
    game.start_hand()
    while not game.is_hand_running():
        game.start_hand()

    current_round = Round()
    done = False

    while not done:
        if game.current_player == 0:
            # RL model's turn
            obs = env.reset()
            action, _ = ppo_model.predict(obs, deterministic=True)
            if random.random() == 0:
                if game.validate_move(action=th.ActionType.ALL_IN):
                    game.take_action(th.ActionType.ALL_IN)
                else:
                    game.take_action(th.ActionType.FOLD)  # Fallback action
            elif action == 0:
                if game.validate_move(action=th.ActionType.FOLD):
                    game.take_action(th.ActionType.FOLD)
            elif action == 1:
                if game.validate_move(action=th.ActionType.CALL):
                    game.take_action(th.ActionType.CALL)
                elif game.validate_move(action=th.ActionType.CHECK):
                    game.take_action(th.ActionType.CHECK)
                else:
                    game.take_action(th.ActionType.FOLD)  # Fallback action
            elif action == 2:
                min_raise = game.get_available_moves().raise_range.start
                max_raise = int(np.min([game.players[game.current_player].chips,
                                        game.get_available_moves().raise_range.stop]))
                if min_raise > max_raise or min_raise <= 0:
                    # Fallback to a valid action if min_raise is invalid
                    if game.validate_move(action=th.ActionType.CALL):
                        game.take_action(th.ActionType.CALL)
                    elif game.validate_move(action=th.ActionType.CHECK):
                        game.take_action(th.ActionType.CHECK)
                    else:
                        game.take_action(th.ActionType.FOLD)
                else:
                    if game.validate_move(action=th.ActionType.RAISE, value=min_raise):
                        game.take_action(th.ActionType.RAISE, min_raise)
                    else:
                        game.take_action(th.ActionType.FOLD)  # Fallback action
            else:
                if game.validate_move(action=th.ActionType.ALL_IN):
                    game.take_action(th.ActionType.ALL_IN)
                else:
                    game.take_action(th.ActionType.FOLD)  # Fallback action
        else:
            # Smart bot's turn
            # Smart bot decision
            smort_decision(our_bot, game)
            who_won = 0 if game.current_player == 1 else 1  # Determine the winner based on the current player

        if not game.is_hand_running():
            done = True
    outcome = game._get_last_pot().amount * (-1 if who_won == 1 else 1)
    # Record the outcome
    outcome = game._get_last_pot().amount * ((-1) ** who_won)
    current_round.close_round(outcome)
    rounds_list.append(current_round.copy())

    # Update metrics
    total_games += 1
    if who_won == 0:
        model_wins += 1
    metrics["total_earnings"] += outcome
    metrics["model_win_rate"] = model_wins / total_games

# Print the final metrics
print(f"Final Metrics:")
print(f"Total games played: {total_games}")
print(f"Model wins: {model_wins}")
print(f"Total earnings: {metrics['total_earnings']}")
print(f"Model win rate: {metrics['model_win_rate']:.2f}")
print(f"Model earnings per game: {metrics['total_earnings'] / total_games:.2f}")


100%|██████████| 1000/1000 [03:59<00:00,  4.18it/s]

Final Metrics:
Total games played: 1000
Model wins: 0
Total earnings: -114842
Model win rate: 0.00
Model earnings per game: -114.84





In [35]:
from stable_baselines3 import A2C

# Create and train the A2C model
a2c_model = A2C("MlpPolicy", env, verbose=1)
a2c_model.learn(total_timesteps=10000)

# Save the new model
a2c_model.save("poker_a2c_model")

# Replace the existing model with the new A2C model for evaluation
model = a2c_model

Using cpu device
------------------------------------
| time/                 |          |
|    fps                | 603      |
|    iterations         | 100      |
|    time_elapsed       | 0        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -0.992   |
|    explained_variance | nan      |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | -0.36    |
|    value_loss         | 0.169    |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 583      |
|    iterations         | 200      |
|    time_elapsed       | 1        |
|    total_timesteps    | 1000     |
| train/                |          |
|    entropy_loss       | -1.06    |
|    explained_variance | 0.0285   |
|    learning_rate      | 0.0007   |
|    n_updates          | 199      |
|    policy_loss        | 1.08     |
|    value_loss      

AttributeError: module 'stable_baselines3' has no attribute '__version__'

# AFTER THIS POINT, NOTHING IS IN USE AND ONLY REMAINS FOR REFERENCE.

In [None]:
print(game.hand_phase.name)

PREFLOP


In [None]:
print(game.current_player)

1


In [None]:
# get information regarding our hand, the game board, our win prob, their chips bet, and our EV
win_prob, my_hand_odds = hoc.estimate_win_and_hand_probs(game, 0, 2, 1000)
loss_prob = 1 - win_prob
my_chips_betting = game.player_bet_amount(0) + game.chips_to_call(0)
their_chips_betting = game.player_bet_amount(1)
EV = win_prob*their_chips_betting - loss_prob*my_chips_betting
print(game.get_hand(0), game.board, win_prob, their_chips_betting, EV)

[Card("3c"), Card("4d")] [] 0.402 5 -155.862


In [None]:
# get decision
my_decision = our_bot.make_decision(EV, my_hand_odds, game)
print(my_decision.type, my_decision.size)

FOLD 0


In [None]:
# actually make decision
if (my_decision.type == "RAISE"):
  game.take_action(th.ActionType.RAISE, my_decision.size)
elif (my_decision.type == "CALL/CHECK"):
  if (game.validate_move(action = th.ActionType.CALL)):
    game.take_action(th.ActionType.CALL)
  else:
    game.take_action(th.ActionType.CHECK)
elif (my_decision.type == "FOLD"):
  game.take_action(th.ActionType.FOLD)
else:
  game.take_action(th.ActionType.ALL_IN)

In [None]:
# opponent makes random decision (reused baby code)
babys_decision = np.random.choice(3)
if (babys_decision == 0):
  # baby will call/check if possible
  if (game.validate_move(action = th.ActionType.CALL) or
      game.validate_move(action = th.ActionType.CHECK)):
    decision = wb.Decision("CALL/CHECK")
  elif (game.validate_move(game.current_player, th.ActionType.ALL_IN)):
    decision = wb.Decision("ALLIN")
  else:
    decision = wb.Decision("FOLD")
elif (babys_decision == 1):
  # baby will fold
  decision = wb.Decision("FOLD")
else:
  # baby will raise if possible
  min_raise = game.get_available_moves().raise_range[0]
  max_raise = int(np.min([game.players[game.current_player].chips,
                          game.get_available_moves().raise_range[-1]]))
  if (min_raise <= max_raise and
      game.validate_move(action = th.ActionType.RAISE, value = min_raise) and
      game.validate_move(action = th.ActionType.RAISE, value = max_raise)):
    decision = \
      wb.Decision("RAISE", int(np.random.uniform(min_raise, max_raise)))
  elif (game.validate_move(action = th.ActionType.CALL) or
        game.validate_move(action = th.ActionType.CHECK)):
    decision = wb.Decision("CALL/CHECK")
  elif (game.validate_move(action = th.ActionType.ALL_IN)):
    decision = wb.Decision("ALLIN")
  else:
    decision = wb.Decision("FOLD")
print(decision.type, decision.size)

CALL/CHECK 0


In [None]:
# actually make decision
if (decision.type == "RAISE"):
  game.take_action(th.ActionType.RAISE, decision.size)
elif (decision.type == "CALL/CHECK"):
  if (game.validate_move(action = th.ActionType.CALL)):
    game.take_action(th.ActionType.CALL)
  else:
    game.take_action(th.ActionType.CHECK)
elif (decision.type == "FOLD"):
  game.take_action(th.ActionType.FOLD)
else:
  game.take_action(th.ActionType.ALL_IN)

ValueError: No hand is running