In [1]:
import rlcard
from rlcard.agents.random_agent import RandomAgent
from rlcard.games.nolimitholdem.round import Action
from determism_util import is_deterministic

测试状态空间

In [15]:
env = rlcard.make('no-limit-holdem')
state, player_id = env.reset()
print('size of state:',state['obs'].size)
print('player id:',player_id)
print('state:',state)

size of state: 54
player id: 0
state: {'legal_actions': [0, 2, 3, 4, 5], 'obs': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
       0., 1., 2.])}


测试是否为deterministic

In [11]:
print(is_deterministic('no-limit-holdem'))

True


测试获取合法动作  
每一步都可以输出当前的合法动作

In [12]:
env._get_legal_actions()

[<Action.FOLD: 0>,
 <Action.CALL: 2>,
 <Action.RAISE_HALF_POT: 3>,
 <Action.RAISE_POT: 4>,
 <Action.ALL_IN: 5>]

测试decode action  
把动作id转换为指令

In [14]:
env._decode_action(0)

<Action.FOLD: 0>

测试env.step

In [18]:
print('player_id:',env.get_player_id())
action = state['legal_actions'][0]
print('action:',action)
next_state, player_id = env.step(action)
print('next_state:',next_state)
print('player_id:',player_id)

player_id: 0
action: 0
next_state: {'legal_actions': [0, 1, 3, 4, 5], 'obs': array([0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 2., 2.])}
player_id: 1


测试run

In [19]:
env = rlcard.make('no-limit-holdem')
agents = [RandomAgent(env.action_num) for _ in range(env.player_num)]
env.set_agents(agents)
trajectories, payoffs = env.run(is_training=False)
total = 0
for payoff in payoffs:
    total += payoff
print('total:',total)

total: 0


测试chips  
这个的测试结果不对

In [20]:
env = rlcard.make('no-limit-holdem', config={'game_player_num':5, 'chips_for_each':[100, 200, 400, 600, 900]})
#可以指定游戏人数 以及每个人的筹码数
env.game.init_game()
players = env.game.players
chips = []
for i in range(5):
    chips.append(players[i].remained_chips + players[i].in_chips)
print('chips:',chips)

chips: [100, 100, 100, 100, 100]


#### rlcard/rlcard/games/nolimitholdem/game.py

In [11]:
from enum import Enum

import numpy as np
from copy import deepcopy
from rlcard.games.limitholdem import Game
from rlcard.games.limitholdem import PlayerStatus

from rlcard.games.nolimitholdem import Dealer
from rlcard.games.nolimitholdem import Player
from rlcard.games.nolimitholdem import Judger
from rlcard.games.nolimitholdem import Round, Action

In [12]:
class Stage(Enum):
    #关于游戏阶段的定义
    PREFLOP = 0
    FLOP = 1
    TURN = 2
    RIVER = 3
    END_HIDDEN = 4
    SHOWDOWN = 5

In [13]:
class NolimitholdemGame(Game):
    
    def __init__(self, allow_step_back=False, num_players=2):
        #进行初始化
        self.allow_step_back = allow_step_back
        self.np_random = np.random.RandomState()
        
        #设定大小盲
        self.small_blind = 1
        self.big_blind = 2 * self.small_blind
        
        #指定玩家人数以及初始筹码
        self.num_players = num_players
        self.init_chips = [100] * num_players
        
        #指定dealer 若不指定则随机选取
        self.dealer_id = None
        
    def configure(self, game_config):
        ''' 指定一些特有参数, such as player number, initial chips, and dealer id.
        If dealer_id is None, he will be randomly chosen
        '''
        self.num_players = game_config['game_player_num']
        self.init_chips = game_config['chips_for_each']
        self.dealer_id = game_config['dealer_id']
        
    def init_game(self):
        """
        返回值是一个tuple 包含一个字典： first state of the game、int:当前玩家的id
        """
        
        #随机指定dealer位置 即小盲注
        if self.dealer_id is None:
            self.dealer_id = self.np_random.randint(0, self.num_players)
            
        # Initilize a dealer that can deal cards
        #初始化dealer 用于发牌
        self.dealer = Dealer(self.np_random)
        
        #初始化player
        # Initilize players to play the game
        self.players = [Player(i, self.init_chips[i], self.np_random) for i in range(self.num_players)]
        
        #初始化judger
        # Initialize a judger class which will decide who wins in the end
        self.judger = Judger(self.np_random)
    
        #初始阶段每人发两张牌
        # Deal cards to each  player to prepare for the first round
        for i in range(2 * self.num_players):
            self.players[i % self.num_players].hand.append(self.dealer.deal_card())
            
        #初始化公共牌 此时还是空
        # Initilize public cards
        self.public_cards = []
        self.stage = Stage.PREFLOP
        
        #压注大小盲
        s = (self.dealer_id + 1) % self.num_players
        b = (self.dealer_id + 2) % self.num_players
        self.players[b].bet(chips=self.big_blind)
        self.players[s].bet(chips=self.small_blind)
        
        #小盲先走 注释是错的 代码是对的
        #产生这个错误的原因也是因为这是按照多人局规则写的
        # The player next to the small blind plays the first
        self.game_pointer = (b + 1) % self.num_players
        
        #开始本轮的下注流程
        self.round = Round(self.num_players, self.big_blind, dealer=self.dealer, np_random=self.np_random)
        #nit_raise_amount:self.big_blind
        #这是符合游戏规则的 无人加注时 最小加注量为大盲

        self.round.start_new_round(game_pointer=self.game_pointer, raised=[p.in_chips for p in self.players])
        #preflop阶段 game_pointer为小盲 raised 分别为大小盲注
        
        #开始对轮次计数 总共有4轮
        # Count the round. There are 4 rounds in each game.
        self.round_counter = 0
        
        #存储历史信息
        self.history = []
        
        state = self.get_state(self.game_pointer)

        return state, self.game_pointer
    
    def get_legal_actions(self):
        ''' 
        返回可供当前玩家选择的合法动作
        Return the legal actions for current player
        Returns:
            (list): A list of legal actions
        '''
        return self.round.get_nolimit_legal_actions(players=self.players)
    
    def step(self, action):
        ''' Get the next state
        得到下一个状态
        Args:
            action (str): a specific action. (call, raise, fold, or check)
        Returns:
            (tuple): Tuple containing:
                (dict): next player's state
                (int): next plater's id
        '''
        #有待改进 把不合法动作自动变合法
        if action not in self.get_legal_actions():
            print(action, self.get_legal_actions())
            print(self.get_state(self.game_pointer))
            raise Exception('Action not allowed')

        if self.allow_step_back:
            # First snapshot the current state
            r = deepcopy(self.round)
            b = self.game_pointer
            r_c = self.round_counter
            d = deepcopy(self.dealer)
            p = deepcopy(self.public_cards)
            ps = deepcopy(self.players)
            self.history.append((r, b, r_c, d, p, ps))

        # Then we proceed to the next round
        self.game_pointer = self.round.proceed_round(self.players, action)
        
        #players_in_bypass是一个列表
        #如果有玩家弃牌或者all in 就变1
        players_in_bypass = [1 if player.status in (PlayerStatus.FOLDED, PlayerStatus.ALLIN) else 0 for player in self.players]
        if self.num_players - sum(players_in_bypass) == 1:
            #若有一方all in或者弃牌
            last_player = players_in_bypass.index(0)
            #last_player为值为0的那名玩家
            if self.round.raised[last_player] >= max(self.round.raised):
                #若这名还没有弃牌或者all in玩家的加仓数是max 同样将其记为by pass
                # If the last player has put enough chips, he is also bypassed
                players_in_bypass[last_player] = 1

        # If a round is over, we deal more public cards
        if self.round.is_over():
            print('当前轮结束')
            # Game pointer goes to the first player not in bypass after the dealer, if there is one
            #除了preflop 剩下轮次的先手都是大盲 
            #因为前面代码将dealer判为大盲 因此这里的逻辑是错误的
            self.game_pointer = (self.dealer_id + 1) % self.num_players
            if sum(players_in_bypass) < self.num_players:
                #即有玩家fold或allin 这种玩家不需要做动作 直接给他跳过
                #这里的逻辑也没有问题
                while players_in_bypass[self.game_pointer]:
                    self.game_pointer = (self.game_pointer + 1) % self.num_players

            # For the first round, we deal 3 cards
            #进入flop轮 发三张牌
            if self.round_counter == 0:
                self.stage = Stage.FLOP
                self.public_cards.append(self.dealer.deal_card())
                self.public_cards.append(self.dealer.deal_card())
                self.public_cards.append(self.dealer.deal_card())
                if len(self.players) == np.sum(players_in_bypass):
                    self.round_counter += 1
            # For the following rounds, we deal only 1 card
            #turn轮发一张
            if self.round_counter == 1:
                self.stage = Stage.TURN
                self.public_cards.append(self.dealer.deal_card())
                if len(self.players) == np.sum(players_in_bypass):
                    self.round_counter += 1
            #river轮发一张
            if self.round_counter == 2:
                self.stage = Stage.RIVER
                self.public_cards.append(self.dealer.deal_card())
                if len(self.players) == np.sum(players_in_bypass):
                    self.round_counter += 1

            self.round_counter += 1
            #开始本次新一轮的处理
            #此时新一轮的两方初始加注 默认都是0
            self.round.start_new_round(self.game_pointer)

        state = self.get_state(self.game_pointer)

        return state, self.game_pointer
        
    def get_state(self, player_id):
        ''' Return player's state
        Args:
            player_id (int): player id
        Returns:
            (dict): The state of the player
        '''
        self.dealer.pot = np.sum([player.in_chips for player in self.players])

        chips = [self.players[i].in_chips for i in range(self.num_players)]
        legal_actions = self.get_legal_actions()
        state = self.players[player_id].get_state(self.public_cards, chips, legal_actions)
        state['stakes'] = [self.players[i].remained_chips for i in range(self.num_players)]
        state['current_player'] = self.game_pointer
        state['pot'] = self.dealer.pot
        state['stage'] = self.stage
        return state
    
    def step_back(self):
        ''' Return to the previous state of the game
        Returns:
            (bool): True if the game steps back successfully
        '''
        if len(self.history) > 0:
            self.round, self.game_pointer, self.round_counter, self.dealer, self.public_cards, self.players = self.history.pop()
            return True
        return False

    def get_player_num(self):
        ''' Return the number of players in No Limit Texas Hold'em
        Returns:
            (int): The number of players in the game
        '''
        return self.num_players

    def get_payoffs(self):
        ''' Return the payoffs of the game
        Returns:
            (list): Each entry corresponds to the payoff of one player
        '''
        hands = [p.hand + self.public_cards if p.status in (PlayerStatus.ALIVE, PlayerStatus.ALLIN) else None for p in self.players]
        chips_payoffs = self.judger.judge_game(self.players, hands)
        return chips_payoffs

    @staticmethod
    def get_action_num():
        ''' Return the number of applicable actions
        Returns:
            (int): The number of actions. There are 6 actions (call, raise_half_pot, raise_pot, all_in, check and fold)
        '''
        return len(Action)

测试一下

In [21]:
class testAction(Enum):
    CALL = 0
    RAISE_HALF_POT = 1

In [24]:
game = NolimitholdemGame()
#进行必要的初始化
#设定大小盲 1 2
#设定每位玩家的初始筹码 100 100
for _ in range(1):
    print('开始游戏')
    state, game_pointer = game.init_game()
    #print(game_pointer, state)
    print('dealer id (大盲注位置):',game.dealer_id)
    print('game_pointer,小盲注id:',game_pointer)
    print('小盲注的当前状态:',state)
    print('round当前轮次:',game.round_counter)
    print('----开始进入preflop-------')
    #初始阶段是没有什么问题的
    while not game.is_over():
        legal_actions = game.get_legal_actions()  
        action = np.random.choice(legal_actions)
        #print('game_pointer:',game_pointer)
        print('legal_actions当前玩家的合法动作:',legal_actions)
        print('action:',action)
        #print(game_pointer, action, legal_actions)
        state, game_pointer = game.step(action)
        #print(game_pointer, state)
        print('game_pointer当前玩家:',game_pointer)
        print('state当前玩家状态:',state)
        print('round:',game.round_counter)
    print(game.get_payoffs())
    print('-------------------------------------------------------------------')

开始游戏
dealer id (大盲注位置): 0
game_pointer,小盲注id: 1
小盲注的当前状态: {'hand': ['H9', 'H6'], 'public_cards': [], 'all_chips': [2, 1], 'my_chips': 1, 'legal_actions': [<Action.FOLD: 0>, <Action.CALL: 2>, <Action.RAISE_HALF_POT: 3>, <Action.RAISE_POT: 4>, <Action.ALL_IN: 5>], 'stakes': [98, 99], 'current_player': 1, 'pot': 3, 'stage': <Stage.PREFLOP: 0>}
round当前轮次: 0
----开始进入preflop-------
legal_actions当前玩家的合法动作: [<Action.FOLD: 0>, <Action.CALL: 2>, <Action.RAISE_HALF_POT: 3>, <Action.RAISE_POT: 4>, <Action.ALL_IN: 5>]
action: Action.RAISE_HALF_POT
game_pointer当前玩家: 0
state当前玩家状态: {'hand': ['D3', 'C5'], 'public_cards': [], 'all_chips': [2, 2], 'my_chips': 2, 'legal_actions': [<Action.FOLD: 0>, <Action.CHECK: 1>, <Action.RAISE_HALF_POT: 3>, <Action.RAISE_POT: 4>, <Action.ALL_IN: 5>], 'stakes': [98, 98], 'current_player': 0, 'pot': 4, 'stage': <Stage.PREFLOP: 0>}
round: 0
legal_actions当前玩家的合法动作: [<Action.FOLD: 0>, <Action.CHECK: 1>, <Action.RAISE_HALF_POT: 3>, <Action.RAISE_POT: 4>, <Action.ALL_IN: 5>

代码的逻辑有很多错误  
dealer这些东西 nolimit基本都是继承limit那边的

dealer完成的任务就是把牌打乱然后进行发牌 不用改动

In [None]:
class LimitholdemDealer(object):

    def __init__(self, np_random):
        ''' Initialize a limitholdem dealer class
        '''
        self.np_random = np_random
        self.deck = init_standard_deck()
        self.shuffle()
        self.pot = 0

    def shuffle(self):
        ''' Shuffle the deck
        '''
        self.np_random.shuffle(self.deck)

    def deal_card(self):
        ''' Deal one card from the deck
        Returns:
            (Card): The drawn card from the deck
        '''
        return self.deck.pop()

player  
指定玩家的状态 私牌、公牌、筹码、合法动作  
暂时也不需要修改

In [None]:
from enum import Enum


class PlayerStatus(Enum):
    #玩家的状态分为三类 存活、弃牌、allin
    ALIVE = 0
    FOLDED = 1
    ALLIN = 2


class LimitholdemPlayer(object):

    def __init__(self, player_id, np_random):
        ''' Initilize a player.
        Args:
            player_id (int): The id of the player
        '''
        self.np_random = np_random
        self.player_id = player_id
        self.hand = []
        self.status = PlayerStatus.ALIVE

        # The chips that this player has put in until now
        self.in_chips = 0

    def get_state(self, public_cards, all_chips, legal_actions):
        ''' Encode the state for the player
        Args:
            public_cards (list): A list of public cards that seen by all the players
            all_chips (int): The chips that all players have put in
        Returns:
            (dict): The state of the player
        '''
        state = {}
        state['hand'] = [c.get_index() for c in self.hand]
        state['public_cards'] = [c.get_index() for c in public_cards]
        state['all_chips'] = all_chips
        state['my_chips'] = self.in_chips
        state['legal_actions'] = legal_actions
        return state

    def get_player_id(self):
        ''' Return the id of the player
        '''
        return self.player_id

nolimit 的 round 

In [None]:
from enum import Enum

from rlcard.games.limitholdem import PlayerStatus


class Action(Enum):
    #规定了一下动作种类
    FOLD = 0
    CHECK = 1
    CALL = 2
    # RAISE_3BB = 3
    RAISE_HALF_POT = 3
    RAISE_POT = 4
    # RAISE_2POT = 5
    ALL_IN = 5
    # SMALL_BLIND = 7
    # BIG_BLIND = 8
    

In [None]:
class NolimitholdemRound():
    ''' Round can call other Classes' functions to keep the game running
    '''

    def __init__(self, num_players, init_raise_amount, dealer, np_random):
        ''' Initilize the round class
        Args:
            num_players (int): The number of players
            init_raise_amount (int): The min raise amount when every round starts
        '''
        self.np_random = np_random
        self.game_pointer = None
        self.num_players = num_players
        self.init_raise_amount = init_raise_amount

        self.dealer = dealer

        # Count the number without raise
        # If every player agree to not raise, the round is overr
        #用于判断该轮何时结束 只要有一方加注 该轮就继续
        self.not_raise_num = 0

        # Count players that are not playing anymore (folded or all-in)
        #判断是否all in 或 弃牌
        self.not_playing_num = 0

        # Raised amount for each player
        #统计各方加注的数量
        self.raised = [0 for _ in range(self.num_players)]
        
    def start_new_round(self, game_pointer, raised=None):
        ''' Start a new bidding round
        Args:
            raised (list): Initialize the chips for each player
        Note: For the first round of the game, we need to setup the big/small blind
        '''
        self.game_pointer = game_pointer
        self.not_raise_num = 0
        if raised:
            self.raised = raised
        else:
            self.raised = [0 for _ in range(self.num_players)]
            
    def proceed_round(self, players, action):
        ''' Call other Classes's functions to keep one round running
        Args:
            players (list): The list of players that play the game
            action (str/int): An legal action taken by the player
        Returns:
            (int): The game_pointer that indicates the next player
        '''
        player = players[self.game_pointer]
        
        #call动作的逻辑没问题 补齐差距
        #not_raise_num+1 逻辑有待验证
        if action == Action.CALL:
            diff = max(self.raised) - self.raised[self.game_pointer]
            self.raised[self.game_pointer] = max(self.raised)
            player.bet(chips=diff)
            self.not_raise_num += 1
            
        #all in 操作没问题
        #not_raise_num设为1 逻辑有待验证
        elif action == Action.ALL_IN:
            all_in_quantity = player.remained_chips
            self.raised[self.game_pointer] = all_in_quantity + self.raised[self.game_pointer]
            player.bet(chips=all_in_quantity)

            self.not_raise_num = 1
        
        #代码错误 应该先补齐
        elif action == Action.RAISE_POT:
            self.raised[self.game_pointer] += self.dealer.pot
            player.bet(chips=self.dealer.pot)
            self.not_raise_num = 1
        #代码错误 应该先补齐
        elif action == Action.RAISE_HALF_POT:
            quantity = int(self.dealer.pot / 2)
            self.raised[self.game_pointer] += quantity
            player.bet(chips=quantity)
            self.not_raise_num = 1

        elif action == Action.FOLD:
            player.status = PlayerStatus.FOLDED

        elif action == Action.CHECK:
            self.not_raise_num += 1

        if player.remained_chips < 0:
            raise Exception("Player in negative stake")

        if player.remained_chips == 0 and player.status != PlayerStatus.FOLDED:
            player.status = PlayerStatus.ALLIN
        
        #preflop是小盲先行 后面每一轮开始的时候都是大盲先行 
        #逻辑待验证 这里只是轮内动作的交替
        self.game_pointer = (self.game_pointer + 1) % self.num_players

        if player.status == PlayerStatus.ALLIN:
            self.not_playing_num += 1
            self.not_raise_num -= 1  # Because already counted in not_playing_num
        if player.status == PlayerStatus.FOLDED:
            self.not_playing_num += 1

        # Skip the folded players
        while players[self.game_pointer].status == PlayerStatus.FOLDED:
            self.game_pointer = (self.game_pointer + 1) % self.num_players

        return self.game_pointer
    
    def get_nolimit_legal_actions(self, players):
        ''' Obtain the legal actions for the curent player
        Args:
            players (list): The players in the game
        Returns:
           (list):  A list of legal actions
        '''

        full_actions = list(Action)
        # If the current chips are less than that of the highest one in the round, we can not check
        #这个逻辑没问题 两者筹码不匹配 无法check
        if self.raised[self.game_pointer] < max(self.raised):
            full_actions.remove(Action.CHECK)

        # If the current player has put in the chips that are more than others, we can not call
        #逻辑是对的 但是代码写错了 >=
        if self.raised[self.game_pointer] == max(self.raised):
            full_actions.remove(Action.CALL)

        player = players[self.game_pointer]

        #如果底仓大于散户的持仓 无法以整个底仓加仓是对的
        if self.dealer.pot > player.remained_chips:
            full_actions.remove(Action.RAISE_POT)
        
        #无法加半仓的逻辑也是对的
        if int(self.dealer.pot / 2) > player.remained_chips:
            full_actions.remove(Action.RAISE_HALF_POT)

        # Can't raise if the raise is smaller than pot
        #判断逻辑若半仓再加上当前玩家已有的下注小于另一玩家的加注筹码 则该动作不合法
        #这个逻辑是错的 因为他搞错了加仓方式 
        #如果要加仓的话 肯定是先持平
        if Action.RAISE_HALF_POT in full_actions and \
                int(self.dealer.pot / 2) + player.in_chips <= max(self.raised):
            full_actions.remove(Action.RAISE_HALF_POT)

        # If the current player has no more chips after call, we cannot raise
        #这个逻辑判断的是无法raise 但是可以call
        #正确的逻辑：余下的chip还大于目前的diff 但是diff+我需要加仓的最小金额都不够了
        #下面这个逻辑也是错的
        diff = max(self.raised) - self.raised[self.game_pointer]
        if diff > 0 and player.in_chips + diff >= player.remained_chips:
            return [Action.FOLD, Action.CALL]

        return full_actions
    
    
    def is_over(self):
        ''' Check whether the round is over
        Returns:
            (boolean): True if the current round is over
        '''
        #判断本轮是否结束
        #逻辑待验证
        if self.not_raise_num + self.not_playing_num >= self.num_players:
            return True
        return False