In [3]:
import gym
from gym import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env

In [4]:
import gymnasium as gym
from gymnasium import spaces
import texasholdem as th
import numpy as np
import random
from types import SimpleNamespace
from TexasHoldEmBot import get_features
from texasholdem.evaluator import evaluate

In [38]:
class PokerEnv(gym.Env):
    metadata = {"render_modes": []}

    def __init__(self):
        super().__init__()
        self.action_space = spaces.Discrete(5)  # 0 FOLD, 1 CALL, 2 CHECK, 3 RAISE SMALL, 4 RAISE LARGE 
        self.observation_space = spaces.Box(
            low=0.0, high=np.inf, shape=(131,), dtype=np.float32
        )
        self.game = None # will eventually have the texasholdem engine

    # this is needed for get_features() to work properly
    def _patch_prehand(self):
        if self.game.hand_phase.name == "PREHAND":
            self.game.hand_phase = SimpleNamespace(name="PREFLOP")

    # pot size
    # forced: the two mandatory blinds (small and big)
    # voluntary: how many chips each player put in (bets and calls)
    def pot_size(self):
        forced = self.game.small_blind + self.game.big_blind
        # sum chips each player has at stake this hand
        voluntary = sum(
            self.game.chips_at_stake(p)
            for p in range(self.game.max_players)    
        )
        return forced + voluntary

    def reset(self, *, seed=None, options=None):
        super().reset(seed=seed)
        self.game = th.TexasHoldEm(
            buyin=500, big_blind=5, small_blind=2, max_players=2
        )
        self.game.start_hand()
        self._patch_prehand()
        obs = get_features(self.game).detach().numpy().squeeze()
        return obs, {}

    def step(self, action):
            # small raise
            if action == 3:  # RAISE_SMALL
                to_call = self.game.chips_to_call(self.game.current_player)
                min_r = max(self.game.last_raise, self.game.big_blind)
                total = to_call + min_r
                return self._take_and_finalize(th.ActionType.RAISE, total)  

            # big raise
            if action == 4:  # RAISE_LARGE
                to_call = self.game.chips_to_call(self.game.current_player)
                pot     = self.pot_size()           # ✚ use the new helper
                total   = to_call + pot
                return self._take_and_finalize(th.ActionType.RAISE, total)

            # (other non-raise actions)
            base_actions = [
                th.ActionType.FOLD,
                th.ActionType.CALL,
                th.ActionType.CHECK
            ]
            action_type  = base_actions[action] 

            # CASE: try to CHECK when there's a bet to CALL -> convert to CALL
            if action_type is th.ActionType.CHECK and self.game.chips_to_call(self.game.current_player) > 0:
                action_type = th.ActionType.CALL

            # CASE: if they folded, immediately finalize with -1 reward
            if action_type is th.ActionType.FOLD:
                return self._take_and_finalize(th.ActionType.FOLD, None, reward=-1.0)

            # ELSE: proceed to the helper
            return self._take_and_finalize(action_type, None)


    # executes action types or + total for RAISE
    # simulates opponent
    # extracts features
    def _take_and_finalize(self, action_type, total=None, reward=None):
        player_idx = self.game.current_player
        # 1) perform the action
        try:
            if total is None:
                self.game.take_action(action_type)
            else:
                self.game.take_action(action_type, total)
        except ValueError: # illegal move -> FOLD and penalize
            self.game.take_action(th.ActionType.FOLD)
            reward = -1.0

        # 2) patch PREHAND -> PREFLOP
        self._patch_prehand()

        # 3) if we already have a terminal reward (fold branch), return now
        if reward is not None:
            # compute chips you put in
            contrib     = self.game.chips_at_stake(player_idx)
            chips_delta = -contrib
            obs = get_features(self.game).detach().numpy().squeeze()
            return obs, reward, True, False, {"chips_delta": chips_delta}
        
        # 4) simulate the opponent including raises
        while self.game.hand_phase.name != "SHOWDOWN":
            # RANDOMLY pick an action
            opp_idx = random.choice([0,1,2,3,4])
            to_call = self.game.chips_to_call(self.game.current_player ^ 1)
            try:
                if opp_idx == 0:    # FOLD
                    opp_act = th.ActionType.FOLD
                    self.game.take_action(opp_act)
                elif opp_idx == 1:  # CALL
                    opp_act = th.ActionType.CALL
                    self.game.take_action(opp_act)
                elif opp_idx == 2:  # CHECK
                    opp_act = th.ActionType.CHECK
                    self.game.take_action(opp_act)
                elif opp_idx == 3:  # RAISE_SMALL
                    opp_act = th.ActionType.RAISE
                    min_r   = max(self.game.last_raise, self.game.big_blind)
                    total   = to_call + min_r
                    self.game.take_action(opp_act, total)
                else:               # RAISE_LARGE
                    opp_act = th.ActionType.RAISE
                    pot     = self.pot_size()
                    total   = to_call + pot
                    self.game.take_action(opp_act, total)
            except ValueError:
                continue # for an illegal opponent move -> resample

            # agent responds
            self._patch_prehand()
            call_amt = self.game.chips_to_call(self.game.current_player)
            if call_amt > 0:
                self.game.take_action(th.ActionType.CALL)
            else:
                self.game.take_action(th.ActionType.CHECK)

            # if opponent folds, we instantly win (yipee)
            if opp_act is th.ActionType.FOLD:
                pot         = self.pot_size()
                contrib     = self.game.chips_at_stake(player_idx)
                chips_delta = pot - contrib
                self._patch_prehand()
                obs = get_features(self.game).detach().numpy().squeeze()
                return obs, 1.0, True, False, {"chips_delta": chips_delta}

            # patch again before next opp iteration
            self._patch_prehand()

        # 5) showdown
        self._patch_prehand()
        obs = get_features(self.game).detach().numpy().squeeze()
        ranks = [evaluate(self.game.get_hand(p), self.game.board)
                 for p in range(self.game.max_players)]
        you  = ranks[self.game.current_player]
        best = min(ranks)
        final_reward = 1.0 if you == best else -1.0
        
        # compute chips delta at showdown
        pot         = self.pot_size()
        contrib     = self.game.chips_at_stake(player_idx)
        chips_delta = (pot - contrib) if you == best else -contrib
        return obs, final_reward, True, False, {"chips_delta": chips_delta}

In [34]:
env = PokerEnv()

# verify spaces are correct 
print("Action space:", env.action_space)               
print("Observation space:", env.observation_space)      

# reset environment before beginning game
obs, info = env.reset()

# takes a random step that runs through a full hand simulation
# should apply the agent's move -> simulate opponent -> showdown -> return reward
action = env.action_space.sample()
new_obs, reward, terminated, truncated, info = env.step(action)
print("-> new_obs shape =", new_obs.shape)
print("-> reward =", reward) # +1 won the hand -1 lost the hand
print("-> terminated =", terminated) # should always be true but just to check
print("-> truncated =", truncated) 
print("-> info =", info)

# runs through one full episode
obs, info = env.reset()
terminated = False
total_reward = 0.0
while not terminated:
    action = env.action_space.sample()
    obs, reward, terminated, truncated, info = env.step(action)
    total_reward += reward
print("Episode finished, total_reward =", total_reward)


Action space: Discrete(5)
Observation space: Box(0.0, inf, (131,), float32)
-> new_obs shape = (131,)
-> reward = 1.0
-> terminated = True
-> truncated = False
-> info = {'chips_delta': 391}
Episode finished, total_reward = 1.0


In [37]:
# randomly play 1000 hands
# set as a baseline, so once trained the agent should actually beat whatever win rate is gotten
# ******* NOTE: the win rate is currently a bit too high than it should be because RAISE is not properly implemented
# also opponent currently only does FOLD or CALL
N = 10000
wins = 0
total_chips = 0

for _ in range(N):
    obs, info = env.reset()
    done = False
    while not done:
        action = env.action_space.sample()
        obs, reward, done, truncated, info = env.step(action)
    # reward is ±1
    if reward > 0:
        wins += 1
    # info['chips_delta'] is the raw chip swing for this hand
    total_chips += info.get('chips_delta', 0)

print(f"Win rate          = {wins/N:.2%}")
print(f"Total chips won   = {total_chips}")
print(f"Avg chips per hand= {total_chips/N:.2f}")

Win rate          = 60.78%
Total chips won   = 466518
Avg chips per hand= 46.65
