In [43]:
import gym
from gym import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env

In [44]:
import gymnasium as gym
from gymnasium import spaces
import texasholdem as th
import numpy as np
import random
from types import SimpleNamespace
from TexasHoldEmBot import get_features
from texasholdem.evaluator import evaluate

In [45]:
class PokerEnv(gym.Env):
    metadata = {"render_modes": []}

    def __init__(self):
        super().__init__()
        self.action_space = spaces.Discrete(4) # 0 FOLD 1 CALL 2 CHECK 3 ALL IN!!!!!!
        self.observation_space = spaces.Box(
            low=0.0, high=np.inf, shape=(131,), dtype=np.float32
        )
        self.game = None # placeholder for instance created later

    def _patch_prehand(self):
        # ************************
        # this is for if the game is still in its initial prehand phase
        # overrides it so that it becomes preflop otherwise
        # get_features() keeps crashing on an "unknown phase"
        if self.game.hand_phase.name == "PREHAND":
            self.game.hand_phase = SimpleNamespace(name="PREFLOP")

    def reset(self, *, seed=None, options=None):
        super().reset(seed=seed)
        self.game = th.TexasHoldEm(
            buyin=500, big_blind=5, small_blind=2, max_players=2
        )
        self.game.start_hand()
        # ** this part is needed so get_features() stops crashing for now
        self._patch_prehand()
        obs = get_features(self.game).detach().numpy().squeeze()
        return obs, {}

    # for advancing one step (one full hand)
    def step(self, action):
        # possible action types
        # *********** IMPORTANT: RAISE NEEDS TO BE ADDED BACK IN
        if action == 4:  # RAISE action
            raise_amount = self.game.chips_to_call(self.game.current_player) + self.game.big_blind
            action_type = th.ActionType.RAISE
            self.game.take_action(action_type, raise_amount)
        else:
            action_type = [
            th.ActionType.FOLD,
            th.ActionType.CALL,
            th.ActionType.CHECK,
            th.ActionType.ALL_IN
            ][action]

        # translates integers into actions 
        if action_type is th.ActionType.CHECK and \
           self.game.chips_to_call(self.game.current_player) > 0:
            action_type = th.ActionType.CALL

        # check for illegal move: you cannot check when there is money to call -> change action to a call
        if action_type is th.ActionType.FOLD:
            self.game.take_action(th.ActionType.FOLD)
            self._patch_prehand() # if illegal action of a raise is attempted
            obs = get_features(self.game).detach().numpy().squeeze()
            return obs, -1.0, True, False, {}

        # agentâ€™s actual move (fallback to fold on error)
        try:
            self.game.take_action(action_type)
        except ValueError:
            self.game.take_action(th.ActionType.FOLD)
            self._patch_prehand() # if illegal action of a raise is attempted
            obs = get_features(self.game).detach().numpy().squeeze()
            return obs, -1.0, True, False, {}

        self._patch_prehand()

        # simulates an opponent until showdown phase
        while self.game.hand_phase.name != "SHOWDOWN":
            opp_action = random.choice([th.ActionType.FOLD, th.ActionType.CALL])
            try:
                self.game.take_action(opp_action)
            except ValueError:
                continue

            # if opponent folds, agent wins right away
            if opp_action is th.ActionType.FOLD:
                self._patch_prehand()
                obs = get_features(self.game).detach().numpy().squeeze()
                return obs, 1.0, True, False, {}

            # patch just to be sure
            self._patch_prehand()

        # juuuuust to be extra sure
        self._patch_prehand()
        obs = get_features(self.game).detach().numpy().squeeze()

        # compute showdown rewards
        ranks = [
            evaluate(self.game.get_hand(p), self.game.board)
            for p in range(self.game.max_players)
        ]
        you  = ranks[self.game.current_player]
        best = min(ranks)
        reward = 1.0 if you == best else -1.0

        # returns final tuple
        return obs, reward, True, False, {}

In [None]:
env = PokerEnv()

# verify spaces are correct 
print("Action space:", env.action_space)               
print("Observation space:", env.observation_space)      

# reset environment before beginning game
obs, info = env.reset()

# takes a random step that runs through a full hand simulation
# should apply the agent's move -> simulate opponent -> showdown -> return reward
action = env.action_space.sample()
new_obs, reward, terminated, truncated, info = env.step(action)
print("-> new_obs shape =", new_obs.shape)
print("-> reward =", reward) # +1 won the hand -1 lost the hand
print("-> terminated =", terminated) # should always be true but just to check
print("-> truncated =", truncated) 
print("-> info =", info)

# runs through one full episode
obs, info = env.reset()
terminated = False
total_reward = 0.0
while not terminated:
    action = env.action_space.sample()
    obs, reward, terminated, truncated, info = env.step(action)
    total_reward += reward
print("Episode finished, total_reward =", total_reward)


Action space: Discrete(4)
Observation space: Box(0.0, inf, (131,), float32)
-> new_obs shape = (131,)
-> reward = 1.0
-> terminated = True
-> truncated = False
-> info = {}
Episode finished, total_reward = 1.0


In [47]:
# randomly play 1000 hands
# set as a baseline, so once trained the agent should actually beat whatever win rate is gotten
# ******* NOTE: the win rate is currently a bit too high than it should be because RAISE is not properly implemented
# also opponent currently only does FOLD or CALL
N = 10000
wins = 0
for _ in range(N):
    obs, info = env.reset()
    done = False
    while not done:
        action = env.action_space.sample()
        obs, reward, done, truncated, info = env.step(action)
    if reward > 0:
        wins += 1
print(f"win rate = {wins/N:.2%}")

# calculate total earnings after 1000 hands
total_earnings = 0
for _ in range(N):
    obs, info = env.reset()
    done = False
    while not done:
        action = env.action_space.sample()
        obs, reward, done, truncated, info = env.step(action)
        total_earnings += reward
print(f"Total earnings after {N} hands = {total_earnings}")
print(f"Average earnings per hand = {total_earnings/N:.2f}")

win rate = 75.09%
Total earnings after 10000 hands = 4992.0
Average earnings per hand = 0.50
