<a href="https://colab.research.google.com/github/NGeorggin/COMPSCI175FinalProject/blob/main/CS175CFR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install rlcard
import os
import argparse
import rlcard
from rlcard.agents import (
    CFRAgent,
    RandomAgent,
)
from rlcard.utils import (
    set_seed,
    tournament,
    Logger,
    plot_curve,
)

Collecting rlcard
  Downloading rlcard-1.2.0.tar.gz (269 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/269.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m266.2/269.0 kB[0m [31m9.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m269.0/269.0 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rlcard
  Building wheel for rlcard (setup.py) ... [?25l[?25hdone
  Created wheel for rlcard: filename=rlcard-1.2.0-py3-none-any.whl size=325794 sha256=b7d9d57b94933547df9fca3d9968b28a3a3106933a7ad5b5ea769e992aaf754f
  Stored in directory: /root/.cache/pip/wheels/a5/0a/39/26d73b035027276e526bec94b0217ed799109d7890c34a7d9b
Successfully built rlcard
Installing collected packages: rlcard
Successfully installed rlcard-1.2.0


In [None]:
import json
import os
import numpy as np
from collections import OrderedDict

import rlcard
from rlcard.envs import Env
from rlcard.games.nolimitholdem import Game
from rlcard.games.nolimitholdem.round import Action

DEFAULT_GAME_CONFIG = {
        'game_num_players': 2,
        'chips_for_each': 100,
        'dealer_id': None,
        }

class shortDeckholdemEnv(Env):
    ''' Limitholdem Environment
    '''

    def __init__(self, config):
        ''' Initialize the Limitholdem environment
        '''
        self.name = 'no-limit-holdem'
        self.default_game_config = DEFAULT_GAME_CONFIG
        self.game = newNolimitholdemGame()
        super().__init__(config)
        self.actions = Action
        self.state_shape = [[38] for _ in range(self.num_players)]
        self.action_shape = [None for _ in range(self.num_players)]

        # for raise_amount in range(1, self.game.init_chips+1):
        #     self.actions.append(raise_amount)

        with open(os.path.join(rlcard.__path__[0], 'games/limitholdem/card2index.json'), 'r') as file:
            self.card2index = json.load(file)

    def _get_legal_actions(self):
        ''' Get all leagal actions

        Returns:
            encoded_action_list (list): return encoded legal action list (from str to int)
        '''
        return self.game.get_legal_actions()

    def _extract_state(self, state):
        ''' Extract the state representation from state dictionary for agent

        Note: Currently the use the hand cards and the public cards. TODO: encode the states

        Args:
            state (dict): Original state from the game

        Returns:
            observation (list): combine the player's score and dealer's observable score for observation
        '''
        extracted_state = {}

        legal_actions = OrderedDict({action.value: None for action in state['legal_actions']})
        extracted_state['legal_actions'] = legal_actions

        public_cards = state['public_cards']
        hand = state['hand']
        my_chips = state['my_chips']
        all_chips = state['all_chips']
        cards = public_cards + hand
        idx = [self.card2index[card] % 36 for card in cards]
        obs = np.zeros(38)
        obs[idx] = 1
        obs[36] = float(my_chips)
        obs[37] = float(max(all_chips))
        extracted_state['obs'] = obs

        extracted_state['raw_obs'] = state
        extracted_state['raw_legal_actions'] = [a for a in state['legal_actions']]
        extracted_state['action_record'] = self.action_recorder

        return extracted_state

    def get_payoffs(self):
        ''' Get the payoff of a game

        Returns:
           payoffs (list): list of payoffs
        '''
        return np.array(self.game.get_payoffs())

    def _decode_action(self, action_id):
        ''' Decode the action for applying to the game

        Args:
            action id (int): action id

        Returns:
            action (str): action for the game
        '''
        legal_actions = self.game.get_legal_actions()
        if self.actions(action_id) not in legal_actions:
            if Action.CHECK in legal_actions:
                return Action.CHECK
            else:
                print("Tried non legal action", action_id, self.actions(action_id), legal_actions)
                return Action.FOLD
        return self.actions(action_id)

    def get_perfect_information(self):
        ''' Get the perfect information of the current state

        Returns:
            (dict): A dictionary of all the perfect information of the current state
        '''
        state = {}
        state['chips'] = [self.game.players[i].in_chips for i in range(self.num_players)]
        state['public_card'] = [c.get_index() for c in self.game.public_cards] if self.game.public_cards else None
        state['hand_cards'] = [[c.get_index() for c in self.game.players[i].hand] for i in range(self.num_players)]
        state['current_player'] = self.game.game_pointer
        state['legal_actions'] = self.game.get_legal_actions()
        return state

In [None]:
from enum import Enum

import numpy as np
from copy import deepcopy
from rlcard.games.limitholdem import Game
from rlcard.games.limitholdem import PlayerStatus

from rlcard.games.nolimitholdem import Dealer
from rlcard.games.nolimitholdem import Player
from rlcard.games.nolimitholdem import Judger
from rlcard.games.nolimitholdem import Round, Action


class Stage(Enum):
    PREFLOP = 0
    FLOP = 1
    TURN = 2
    RIVER = 3
    END_HIDDEN = 4
    SHOWDOWN = 5


class shortDeckholdemGame(Game):
    def __init__(self, allow_step_back=False, num_players=2):
        """Initialize the class no limit holdem Game"""
        super().__init__(allow_step_back, num_players)

        self.np_random = np.random.RandomState()

        # small blind and big blind
        self.small_blind = 1
        self.big_blind = 2 * self.small_blind

        # config players
        self.init_chips = [100] * num_players

        # If None, the dealer will be randomly chosen
        self.dealer_id = None

    def configure(self, game_config):
        """
        Specify some game specific parameters, such as number of players, initial chips, and dealer id.
        If dealer_id is None, he will be randomly chosen
        """
        self.num_players = game_config['game_num_players']
        # must have num_players length
        self.init_chips = [game_config['chips_for_each']] * game_config["game_num_players"]
        self.dealer_id = game_config['dealer_id']

    def init_game(self):
        """
        Initialize the game of not limit holdem

        This version supports two-player no limit texas holdem

        Returns:
            (tuple): Tuple containing:

                (dict): The first state of the game
                (int): Current player's id
        """
        if self.dealer_id is None:
            self.dealer_id = self.np_random.randint(0, self.num_players)

        # Initialize a dealer that can deal cards
        self.dealer = newLimitHoldemDealer(self.np_random)
        rlcard.utils.utils.print_card(self.dealer.deck)

        # Initialize players to play the game
        self.players = [Player(i, self.init_chips[i], self.np_random) for i in range(self.num_players)]

        # Initialize a judger class which will decide who wins in the end
        self.judger = Judger(self.np_random)

        # Deal cards to each  player to prepare for the first round
        for i in range(2 * self.num_players):
            self.players[i % self.num_players].hand.append(self.dealer.deal_card())

        # Initialize public cards
        self.public_cards = []
        self.stage = Stage.PREFLOP

        # Big blind and small blind
        s = (self.dealer_id + 1) % self.num_players
        b = (self.dealer_id + 2) % self.num_players
        self.players[b].bet(chips=self.big_blind)
        self.players[s].bet(chips=self.small_blind)

        # The player next to the big blind plays the first
        self.game_pointer = (b + 1) % self.num_players

        # Initialize a bidding round, in the first round, the big blind and the small blind needs to
        # be passed to the round for processing.
        self.round = Round(self.num_players, self.big_blind, dealer=self.dealer, np_random=self.np_random)

        self.round.start_new_round(game_pointer=self.game_pointer, raised=[p.in_chips for p in self.players])

        # Count the round. There are 4 rounds in each game.
        self.round_counter = 0

        # Save the history for stepping back to the last state.
        self.history = []

        state = self.get_state(self.game_pointer)

        return state, self.game_pointer

    def get_legal_actions(self):
        """
        Return the legal actions for current player

        Returns:
            (list): A list of legal actions
        """
        return self.round.get_nolimit_legal_actions(players=self.players)

    def step(self, action):
        """
        Get the next state

        Args:
            action (str): a specific action. (call, raise, fold, or check)

        Returns:
            (tuple): Tuple containing:

                (dict): next player's state
                (int): next player id
        """

        if action not in self.get_legal_actions():
            print(action, self.get_legal_actions())
            print(self.get_state(self.game_pointer))
            raise Exception('Action not allowed')

        if self.allow_step_back:
            # First snapshot the current state
            r = deepcopy(self.round)
            b = self.game_pointer
            r_c = self.round_counter
            d = deepcopy(self.dealer)
            p = deepcopy(self.public_cards)
            ps = deepcopy(self.players)
            self.history.append((r, b, r_c, d, p, ps))

        # Then we proceed to the next round
        self.game_pointer = self.round.proceed_round(self.players, action)

        players_in_bypass = [1 if player.status in (PlayerStatus.FOLDED, PlayerStatus.ALLIN) else 0 for player in self.players]
        if self.num_players - sum(players_in_bypass) == 1:
            last_player = players_in_bypass.index(0)
            if self.round.raised[last_player] >= max(self.round.raised):
                # If the last player has put enough chips, he is also bypassed
                players_in_bypass[last_player] = 1

        # If a round is over, we deal more public cards
        if self.round.is_over():
            # Game pointer goes to the first player not in bypass after the dealer, if there is one
            self.game_pointer = (self.dealer_id + 1) % self.num_players
            if sum(players_in_bypass) < self.num_players:
                while players_in_bypass[self.game_pointer]:
                    self.game_pointer = (self.game_pointer + 1) % self.num_players

            # For the first round, we deal 3 cards
            if self.round_counter == 0:
                self.stage = Stage.FLOP
                self.public_cards.append(self.dealer.deal_card())
                self.public_cards.append(self.dealer.deal_card())
                self.public_cards.append(self.dealer.deal_card())
                if len(self.players) == np.sum(players_in_bypass):
                    self.round_counter += 1
            # For the following rounds, we deal only 1 card
            if self.round_counter == 1:
                self.stage = Stage.TURN
                self.public_cards.append(self.dealer.deal_card())
                if len(self.players) == np.sum(players_in_bypass):
                    self.round_counter += 1
            if self.round_counter == 2:
                self.stage = Stage.RIVER
                self.public_cards.append(self.dealer.deal_card())
                if len(self.players) == np.sum(players_in_bypass):
                    self.round_counter += 1

            self.round_counter += 1
            self.round.start_new_round(self.game_pointer)

        state = self.get_state(self.game_pointer)

        return state, self.game_pointer

    def get_state(self, player_id):
        """
        Return player's state

        Args:
            player_id (int): player id

        Returns:
            (dict): The state of the player
        """
        self.dealer.pot = np.sum([player.in_chips for player in self.players])

        chips = [self.players[i].in_chips for i in range(self.num_players)]
        legal_actions = self.get_legal_actions()
        state = self.players[player_id].get_state(self.public_cards, chips, legal_actions)
        state['stakes'] = [self.players[i].remained_chips for i in range(self.num_players)]
        state['current_player'] = self.game_pointer
        state['pot'] = self.dealer.pot
        state['stage'] = self.stage
        return state

    def step_back(self):
        """
        Return to the previous state of the game

        Returns:
            (bool): True if the game steps back successfully
        """
        if len(self.history) > 0:
            self.round, self.game_pointer, self.round_counter, self.dealer, self.public_cards, self.players = self.history.pop()
            self.stage = Stage(self.round_counter)
            return True
        return False

    def get_num_players(self):
        """
        Return the number of players in no limit texas holdem

        Returns:
            (int): The number of players in the game
        """
        return self.num_players

    def get_payoffs(self):
        """
        Return the payoffs of the game

        Returns:
            (list): Each entry corresponds to the payoff of one player
        """
        hands = [p.hand + self.public_cards if p.status in (PlayerStatus.ALIVE, PlayerStatus.ALLIN) else None for p in self.players]
        chips_payoffs = self.judger.judge_game(self.players, hands)
        return chips_payoffs

    @staticmethod
    def get_num_actions():
        """
        Return the number of applicable actions

        Returns:
            (int): The number of actions. There are 6 actions (call, raise_half_pot, raise_pot, all_in, check and fold)
        """
        return len(Action)

In [None]:
from rlcard.games.base import Card

def init_short_deck():
    ''' Initialize a standard deck of 52 cards

    Returns:
        (list): A list of Card object
    '''
    suit_list = ['S', 'H', 'D', 'C']
    rank_list = ['A', '6', '7', '8', '9', 'T', 'J', 'Q', 'K']
    res = [Card(suit, rank) for suit in suit_list for rank in rank_list]
    return res

In [None]:
class shortDeckholdemDealer:
    def __init__(self, np_random):
        self.np_random = np_random
        self.deck = init_short_deck()
        self.shuffle()
        self.pot = 0

    def shuffle(self):
        self.np_random.shuffle(self.deck)

    def deal_card(self):
        """
        Deal one card from the deck

        Returns:
            (Card): The drawn card from the deck
        """
        return self.deck.pop()

In [None]:
from rlcard.games.limitholdem.utils import compare_hands
import numpy as np


class LimitHoldemJudger:
    """The Judger class for limit texas holdem"""

    def __init__(self, np_random):
        self.np_random = np_random

    def judge_game(self, players, hands):
        """
        Judge the winner of the game.

        Args:
            players (list): The list of players who play the game
            hands (list): The list of hands that from the players

        Returns:
            (list): Each entry of the list corresponds to one entry of the
        """
        # Convert the hands into card indexes
        hands = [[card.get_index() for card in hand] if hand is not None else None for hand in hands]

        in_chips = [p.in_chips for p in players]
        remaining = sum(in_chips)
        payoffs = [0] * len(hands)
        while remaining > 0:
            winners = compare_hands(hands)
            each_win = self.split_pots_among_players(in_chips, winners)

            for i in range(len(players)):
                if winners[i]:
                    remaining -= each_win[i]
                    payoffs[i] += each_win[i] - in_chips[i]
                    hands[i] = None
                    in_chips[i] = 0
                elif in_chips[i] > 0:
                    payoffs[i] += each_win[i] - in_chips[i]
                    in_chips[i] = each_win[i]

        assert sum(payoffs) == 0
        return payoffs

    def split_pot_among_players(self, in_chips, winners):
        """
        Splits the next (side) pot among players.
        Function is called in loop by distribute_pots_among_players until all chips are allocated.

        Args:
            in_chips (list): List with number of chips bet not yet distributed for each player
            winners (list): List with 1 if the player is among winners else 0

        Returns:
            (list): Of how much chips each player get after this pot has been split and list of chips left to distribute
        """
        nb_winners_in_pot = sum((winners[i] and in_chips[i] > 0) for i in range(len(in_chips)))
        nb_players_in_pot = sum(in_chips[i] > 0 for i in range(len(in_chips)))
        if nb_winners_in_pot == 0 or nb_winners_in_pot == nb_players_in_pot:
            # no winner or all winners for this pot
            allocated = list(in_chips)  # we give back their chips to each players in this pot
            in_chips_after = len(in_chips) * [0]  # no more chips to distribute
        else:
            amount_in_pot_by_player = min(v for v in in_chips if v > 0)
            how_much_one_win, remaining = divmod(amount_in_pot_by_player * nb_players_in_pot, nb_winners_in_pot)
            '''
            In the event of a split pot that cannot be divided equally for every winner, the winner who is sitting
            closest to the left of the dealer receives the remaining differential in chips cf
            https://www.betclic.fr/poker/house-rules--play-safely--betclic-poker-cpok_rules to simplify and as this
            case is very rare, we will give the remaining differential in chips to a random winner
            '''
            allocated = len(in_chips) * [0]
            in_chips_after = list(in_chips)
            for i in range(len(in_chips)):  # iterate on all players
                if in_chips[i] == 0:  # player not in pot
                    continue
                if winners[i]:
                    allocated[i] += how_much_one_win
                in_chips_after[i] -= amount_in_pot_by_player
            if remaining > 0:
                random_winning_player = self.np_random.choice(
                    [i for i in range(len(winners)) if winners[i] and in_chips[i] > 0])
                allocated[random_winning_player] += remaining
        assert sum(in_chips[i] - in_chips_after[i] for i in range(len(in_chips))) == sum(allocated)
        return allocated, in_chips_after

    def split_pots_among_players(self, in_chips_initial, winners):
        """
        Splits main pot and side pots among players (to handle special case of all-in players).

        Args:
            in_chips_initial (list): List with number of chips bet for each player
            winners (list): List with 1 if the player is among winners else 0

        Returns:
            (list): List of how much chips each player get back after all pots have been split
        """
        in_chips = list(in_chips_initial)
        assert len(in_chips) == len(winners)
        assert all(v == 0 or v == 1 for v in winners)
        assert sum(winners) >= 1  # there must be at least one winner
        allocated = np.zeros(len(in_chips), dtype=int)
        while any(v > 0 for v in in_chips):  # while there are still chips to allocate
            allocated_current_pot, in_chips = self.split_pot_among_players(in_chips, winners)
            allocated += allocated_current_pot  # element-wise addition
        assert all(chips >= 0 for chips in allocated)  # check that all players got a non negative amount of chips
        assert sum(in_chips_initial) == sum(allocated)  # check that all chips bet have been allocated
        return list(allocated)

In [None]:
from rlcard.games.nolimitholdem.dealer import NolimitholdemDealer



class shortDeckholdemPlayer(NolimitholdemDealer):
    def __init__(self, player_id, init_chips, np_random):
        pass

In [None]:
# -*- coding: utf-8 -*-
"""Implement no limit texas holdem Round class"""
from enum import Enum

from rlcard.games.limitholdem import PlayerStatus


class Action(Enum):
    FOLD = 0
    CHECK_CALL = 1
    #CALL = 2
    # RAISE_3BB = 3
    RAISE_HALF_POT = 2
    RAISE_POT = 3
    # RAISE_2POT = 5
    ALL_IN = 4
    # SMALL_BLIND = 7
    # BIG_BLIND = 8


class NolimitholdemRound:
    """Round can call functions from other classes to keep the game running"""

    def __init__(self, num_players, init_raise_amount, dealer, np_random):
        """
        Initialize the round class

        Args:
            num_players (int): The number of players
            init_raise_amount (int): The min raise amount when every round starts
        """
        self.np_random = np_random
        self.game_pointer = None
        self.num_players = num_players
        self.init_raise_amount = init_raise_amount

        self.dealer = dealer

        # Count the number without raise
        # If every player agree to not raise, the round is over
        self.not_raise_num = 0

        # Count players that are not playing anymore (folded or all-in)
        self.not_playing_num = 0

        # Raised amount for each player
        self.raised = [0 for _ in range(self.num_players)]

    def start_new_round(self, game_pointer, raised=None):
        """
        Start a new bidding round

        Args:
            game_pointer (int): The game_pointer that indicates the next player
            raised (list): Initialize the chips for each player

        Note: For the first round of the game, we need to setup the big/small blind
        """
        self.game_pointer = game_pointer
        self.not_raise_num = 0
        if raised:
            self.raised = raised
        else:
            self.raised = [0 for _ in range(self.num_players)]

    def proceed_round(self, players, action):
        """
        Call functions from other classes to keep one round running

        Args:
            players (list): The list of players that play the game
            action (str/int): An legal action taken by the player

        Returns:
            (int): The game_pointer that indicates the next player
        """
        player = players[self.game_pointer]

        if action == Action.CHECK_CALL:
            diff = max(self.raised) - self.raised[self.game_pointer]
            self.raised[self.game_pointer] = max(self.raised)
            player.bet(chips=diff)
            self.not_raise_num += 1

        elif action == Action.ALL_IN:
            all_in_quantity = player.remained_chips
            self.raised[self.game_pointer] = all_in_quantity + self.raised[self.game_pointer]
            player.bet(chips=all_in_quantity)

            self.not_raise_num = 1

        elif action == Action.RAISE_POT:
            self.raised[self.game_pointer] += self.dealer.pot
            player.bet(chips=self.dealer.pot)
            self.not_raise_num = 1

        elif action == Action.RAISE_HALF_POT:
            quantity = int(self.dealer.pot / 2)
            self.raised[self.game_pointer] += quantity
            player.bet(chips=quantity)
            self.not_raise_num = 1

        elif action == Action.FOLD:
            player.status = PlayerStatus.FOLDED

        if player.remained_chips < 0:
            raise Exception("Player in negative stake")

        if player.remained_chips == 0 and player.status != PlayerStatus.FOLDED:
            player.status = PlayerStatus.ALLIN

        self.game_pointer = (self.game_pointer + 1) % self.num_players

        if player.status == PlayerStatus.ALLIN:
            self.not_playing_num += 1
            self.not_raise_num -= 1  # Because already counted in not_playing_num
        if player.status == PlayerStatus.FOLDED:
            self.not_playing_num += 1

        # Skip the folded players
        while players[self.game_pointer].status == PlayerStatus.FOLDED:
            self.game_pointer = (self.game_pointer + 1) % self.num_players

        return self.game_pointer

    def get_nolimit_legal_actions(self, players):
        """
        Obtain the legal actions for the current player

        Args:
            players (list): The players in the game

        Returns:
           (list):  A list of legal actions
        """

        full_actions = list(Action)

        # The player can always check or call
        player = players[self.game_pointer]

        diff = max(self.raised) - self.raised[self.game_pointer]
        # If the current player has no more chips after call, we cannot raise
        if diff > 0 and diff >= player.remained_chips:
            full_actions.remove(Action.RAISE_HALF_POT)
            full_actions.remove(Action.RAISE_POT)
            full_actions.remove(Action.ALL_IN)
        # Even if we can raise, we have to check remained chips
        else:
            if self.dealer.pot > player.remained_chips:
                full_actions.remove(Action.RAISE_POT)

            if int(self.dealer.pot / 2) > player.remained_chips:
                full_actions.remove(Action.RAISE_HALF_POT)

            # Can't raise if the total raise amount is leq than the max raise amount of this round
            # If raise by pot, there is no such concern
            if Action.RAISE_HALF_POT in full_actions and \
                int(self.dealer.pot / 2) + self.raised[self.game_pointer] <= max(self.raised):
                full_actions.remove(Action.RAISE_HALF_POT)

        return full_actions

    def is_over(self):
        """
        Check whether the round is over

        Returns:
            (boolean): True if the current round is over
        """
        if self.not_raise_num + self.not_playing_num >= self.num_players:
            return True
        return False

In [None]:
def train(args):
    env = newNolimitholdemEnv(
        config={
            'seed': 0,
            'allow_step_back': True,
        })
    eval_env = newNolimitholdemEnv(
        config={
            'seed': 0,
            'allow_step_back': False,
        })
    print(eval_env.state_shape)
    # Seed numpy, torch, random
    set_seed(args.seed)

    # Initilize CFR Agent
    agent = CFRAgent(
        env,
        os.path.join(
            args.log_dir,
            'cfr_model',
        ),
    )
    agent.load()  # If we have saved model, we first load the model

    # Evaluate CFR against random
    eval_env.set_agents([
        agent,
        RandomAgent(num_actions=env.num_actions),
    ])

    # Start training
    with Logger(args.log_dir) as logger:
        for episode in range(args.num_episodes):
            agent.train()
            print('\rIteration {}'.format(episode), end='')
            # Evaluate the performance. Play with Random agents.
            if episode % args.evaluate_every == 0:
                agent.save() # Save model
                logger.log_performance(
                    episode,
                    tournament(
                        eval_env,
                        args.num_eval_games
                    )[0]
                )

        # Get the paths
        csv_path, fig_path = logger.csv_path, logger.fig_path
    # Plot the learning curve
    plot_curve(csv_path, fig_path, 'cfr')

In [None]:
if __name__ == '__main__':
    parser = argparse.ArgumentParser("CFR example in RLCard")
    parser.add_argument(
        '--seed',
        type=int,
        default=42,
    )
    parser.add_argument(
        '--num_episodes',
        type=int,
        default=5000,
    )
    parser.add_argument(
        '--num_eval_games',
        type=int,
        default=2000,
    )
    parser.add_argument(
        '--evaluate_every',
        type=int,
        default=100,
    )
    parser.add_argument(
        '--log_dir',
        type=str,
        default='experiments/no_limit_holdem_cfr_result/',
    )
    # parser._remove_action("-f")
    args, unknown = parser.parse_known_args()
    print(vars(args))

    train(args)

{'seed': 42, 'num_episodes': 5000, 'num_eval_games': 2000, 'evaluate_every': 100, 'log_dir': 'experiments/no_limit_holdem_cfr_result/'}
[[38], [38]]
[<rlcard.games.base.Card object at 0x7eb945f3fa60>, <rlcard.games.base.Card object at 0x7eb945f3fcd0>, <rlcard.games.base.Card object at 0x7eb945f3e710>, <rlcard.games.base.Card object at 0x7eb945f3f400>, <rlcard.games.base.Card object at 0x7eb945f3f2e0>, <rlcard.games.base.Card object at 0x7eb945f3f460>, <rlcard.games.base.Card object at 0x7eb945f3ccd0>, <rlcard.games.base.Card object at 0x7eb945f3e380>, <rlcard.games.base.Card object at 0x7eb945f3e2c0>, <rlcard.games.base.Card object at 0x7eb945f3e470>, <rlcard.games.base.Card object at 0x7eb945f3cd00>, <rlcard.games.base.Card object at 0x7eb945f3ec50>, <rlcard.games.base.Card object at 0x7eb945f3e8f0>, <rlcard.games.base.Card object at 0x7eb945f3e3b0>, <rlcard.games.base.Card object at 0x7eb945f3d150>, <rlcard.games.base.Card object at 0x7eb945f3c190>, <rlcard.games.base.Card object at 

KeyboardInterrupt: 