## Emulator basic code

In [3]:
from pypokerengine.players import BasePokerPlayer
from pypokerengine.api.emulator import Emulator
from pypokerengine.utils.card_utils import gen_cards
from pypokerengine.utils.game_state_utils import restore_game_state, attach_hole_card, attach_hole_card_from_deck

NB_SIMULATION = 1000
DEBUG_MODE = True
def log(msg):
    if DEBUG_MODE: print("[debug_info] --> %s" % msg)

class EmulatorPlayer(BasePokerPlayer):
    def __init__(self):
        self.opponents_model = FishPlayer()

    def set_opponents_model(self, model_player):
        self.opponents_model = model_player

    # setup Emulator with passed game information
    def receive_game_start_message(self, game_info):
        self.my_model = MyModel()
        nb_player = game_info['player_num']
        max_round = game_info['rule']['max_round']
        sb_amount = game_info['rule']['small_blind_amount']
        ante_amount = game_info['rule']['ante']

        self.emulator = Emulator()
        self.emulator.set_game_rule(nb_player, max_round, sb_amount, ante_amount)
        for player_info in game_info['seats']:
            uuid = player_info['uuid']
            player_model = self.my_model if uuid == self.uuid else self.opponents_model
            self.emulator.register_player(uuid, player_model)

    def declare_action(self, valid_actions, hole_card, round_state):
        try_actions = [MyModel.FOLD, MyModel.CALL, MyModel.MIN_RAISE, MyModel.MAX_RAISE]
        action_results = [0 for i in range(len(try_actions))]

        log("hole_card of emulator player is %s" % hole_card)
        for action in try_actions:
            self.my_model.set_action(action)
            simulation_results = []
            for i in range(NB_SIMULATION):
                game_state = self._setup_game_state(round_state, hole_card)
                round_finished_state, _events = self.emulator.run_until_round_finish(game_state)
                my_stack = [player for player in round_finished_state['table'].seats.players if player.uuid == self.uuid][0].stack
                simulation_results.append(my_stack)
            action_results[action] = 1.0 * sum(simulation_results) / NB_SIMULATION
            log("average stack after simulation when declares %s : %s" % (
                {0:'FOLD', 1:'CALL', 2:'MIN_RAISE', 3:'MAX_RAISE'}[action], action_results[action])
                )

        best_action = max(zip(action_results, try_actions))[1]
        self.my_model.set_action(best_action)
        return self.my_model.declare_action(valid_actions, hole_card, round_state)

    def _setup_game_state(self, round_state, my_hole_card):
        game_state = restore_game_state(round_state)
        game_state['table'].deck.shuffle()
        player_uuids = [player_info['uuid'] for player_info in round_state['seats']]
        for uuid in player_uuids:
            if uuid == self.uuid:
                game_state = attach_hole_card(game_state, uuid, gen_cards(my_hole_card))  # attach my holecard
            else:
                game_state = attach_hole_card_from_deck(game_state, uuid)  # attach opponents holecard at random
        return game_state

    def receive_round_start_message(self, round_count, hole_card, seats):
        pass

    def receive_street_start_message(self, street, round_state):
        pass

    def receive_game_update_message(self, new_action, round_state):
        pass

    def receive_round_result_message(self, winners, hand_info, round_state):
        pass

class MyModel(BasePokerPlayer):

    FOLD = 0
    CALL = 1
    MIN_RAISE = 2
    MAX_RAISE = 3

    def set_action(self, action):
        self.action = action

    def declare_action(self, valid_actions, hole_card, round_state):
        if self.FOLD == self.action:
            return valid_actions[0]['action'], valid_actions[0]['amount']
        elif self.CALL == self.action:
            return valid_actions[1]['action'], valid_actions[1]['amount']
        elif self.MIN_RAISE == self.action:
            return valid_actions[2]['action'], valid_actions[2]['amount']['min']
        elif self.MAX_RAISE == self.action:
            return valid_actions[2]['action'], valid_actions[2]['amount']['max']
        else:
            raise Exception("Invalid action [ %s ] is set" % self.action)

class FishPlayer(BasePokerPlayer):  # Do not forget to make parent class as "BasePokerPlayer"

    #  we define the logic to make an action through this method. (so this method would be the core of your AI)
    def declare_action(self, valid_actions, hole_card, round_state):
        # valid_actions format => [raise_action_info, call_action_info, fold_action_info]
        call_action_info = valid_actions[1]
        action, amount = call_action_info["action"], call_action_info["amount"]
        return action, amount   # action returned here is sent to the poker engine

    def receive_game_start_message(self, game_info):
        pass

    def receive_round_start_message(self, round_count, hole_card, seats):
        pass

    def receive_street_start_message(self, street, round_state):
        pass

    def receive_game_update_message(self, action, round_state):
        pass

    def receive_round_result_message(self, winners, hand_info, round_state):
        pass


from pypokerengine.api.game import setup_config, start_poker
config = setup_config(max_round=10, initial_stack=100, small_blind_amount=5)
config.register_player(name="fish_player", algorithm=FishPlayer())
config.register_player(name="emulator_player", algorithm=EmulatorPlayer())
game_result = start_poker(config, verbose=1)

Started the round 1
Street "preflop" started. (community card = [])
[debug_info] --> hole_card of emulator player is ['H8', 'CA']
[debug_info] --> average stack after simulation when declares FOLD : 95.0
[debug_info] --> average stack after simulation when declares CALL : 103.34
[debug_info] --> average stack after simulation when declares MIN_RAISE : 115.435
[debug_info] --> average stack after simulation when declares MAX_RAISE : 129.9
"emulator_player" declared "raise:100"
"fish_player" declared "call:100"
Street "flop" started. (community card = ['H3', 'HJ', 'C2'])
Street "turn" started. (community card = ['H3', 'HJ', 'C2', 'HA'])
Street "river" started. (community card = ['H3', 'HJ', 'C2', 'HA', 'H9'])
"['emulator_player']" won the round 1 (stack = {'fish_player': 0, 'emulator_player': 200})


## Q-learning

In [13]:
import random
import numpy as np
from pypokerengine.players import BasePokerPlayer
from pypokerengine.api.game import setup_config, start_poker

class QLearningPlayer(BasePokerPlayer):
    def __init__(self):
        self.q_table = {}  # Initialize Q-table
        self.learning_rate = 0.1
        self.discount_factor = 0.9
        self.epsilon = 0.1  # Exploration rate
        self.total_reward = 0
        self.last_state = None
        self.last_action = None

    def declare_action(self, valid_actions, hole_card, round_state):
        state = self._get_state(hole_card, round_state)
        available_actions = [action['action'] for action in valid_actions]

        # Epsilon-greedy action selection
        if random.random() < self.epsilon:
            action = random.choice(available_actions)
        else:
            q_values = [self.q_table.get((state, a), 0) for a in available_actions]
            max_q = max(q_values)
            max_actions = [a for a, q in zip(available_actions, q_values) if q == max_q]
            action = random.choice(max_actions)

        self.last_state = state
        self.last_action = action
        return action, self._get_action_amount(action, valid_actions)
    
    def receive_game_start_message(self, game_info):
        pass

    def receive_round_start_message(self, round_count, hole_card, seats):
        pass

    def receive_street_start_message(self, street, round_state):
        pass

    def receive_game_update_message(self, new_action, round_state):
        pass  # Not needed for this simple example

    def receive_round_result_message(self, winners, hand_info, round_state):
        reward = self._get_reward(winners)
        self.total_reward += reward
        next_state = None  # Terminal state
        self._learn(self.last_state, self.last_action, reward, next_state)

    def _get_state(self, hole_card, round_state):
        # Simplify the state representation to the rank of the hole cards
        ranks = [card[1] for card in hole_card]
        state = ''.join(sorted(ranks))
        return state

    def _get_action_amount(self, action, valid_actions):
        for act in valid_actions:
            if act['action'] == action:
                if action == 'raise':
                    amount = act['amount']['min']
                else:
                    amount = act['amount']
                return amount
        return 0  # Default amount

    def _get_reward(self, winners):
        my_uuid = self.uuid
        for winner in winners:
            if winner['uuid'] == my_uuid:
                return 1  # Win
        return -1  # Lose or tie

    def _learn(self, state, action, reward, next_state):
        old_value = self.q_table.get((state, action), 0)
        future_rewards = 0
        new_value = old_value + self.learning_rate * (reward + self.discount_factor * future_rewards - old_value)
        self.q_table[(state, action)] = new_value

class RandomPlayer(BasePokerPlayer):
    def declare_action(self, valid_actions, hole_card, round_state):
        action = random.choice(valid_actions)['action']
        return action, 0

    def receive_game_start_message(self, game_info):
        pass

    def receive_round_start_message(self, round_count, hole_card, seats):
        pass

    def receive_street_start_message(self, street, round_state):
        pass

    def receive_game_update_message(self, action, round_state):
        pass

    def receive_round_result_message(self, winners, hand_info, round_state):
        pass

# Training loop
def train_bot(num_iterations):
    q_player = QLearningPlayer()
    total_wins = 0
    total_losses = 0

    for i in range(1, num_iterations + 1):
        config = setup_config(max_round=10, initial_stack=100, small_blind_amount=5)
        config.register_player(name="q_player", algorithm=q_player)
        config.register_player(name="fish_player", algorithm=FishPlayer())
        game_result = start_poker(config, verbose=0)

        # Update win/loss counts
        q_player_stack = game_result['players'][0]['stack']
        random_player_stack = game_result['players'][1]['stack']

        if q_player_stack > random_player_stack:
            total_wins += 1
        else:
            total_losses += 1

        # Print training progress every 100 iterations
        if i % 100 == 0 or i == 1:
            win_rate = total_wins / i * 100
            print(f"Iteration {i}: Total Wins: {total_wins}, Total Losses: {total_losses}, Win Rate: {win_rate:.2f}%")

    return q_player

trained_player = train_bot(1000)

# After training, you can test the trained player
config = setup_config(max_round=10, initial_stack=100, small_blind_amount=5)
config.register_player(name="trained_player", algorithm=trained_player)
config.register_player(name="fish_player", algorithm=FishPlayer())
game_result = start_poker(config, verbose=1)


Iteration 1: Total Wins: 0, Total Losses: 1, Win Rate: 0.00%
Iteration 100: Total Wins: 37, Total Losses: 63, Win Rate: 37.00%
Iteration 200: Total Wins: 83, Total Losses: 117, Win Rate: 41.50%
Iteration 300: Total Wins: 119, Total Losses: 181, Win Rate: 39.67%
Iteration 400: Total Wins: 152, Total Losses: 248, Win Rate: 38.00%
Iteration 500: Total Wins: 198, Total Losses: 302, Win Rate: 39.60%
Iteration 600: Total Wins: 233, Total Losses: 367, Win Rate: 38.83%
Iteration 700: Total Wins: 277, Total Losses: 423, Win Rate: 39.57%
Iteration 800: Total Wins: 322, Total Losses: 478, Win Rate: 40.25%
Iteration 900: Total Wins: 363, Total Losses: 537, Win Rate: 40.33%
Iteration 1000: Total Wins: 404, Total Losses: 596, Win Rate: 40.40%
Started the round 1
Street "preflop" started. (community card = [])
"fish_player" declared "call:10"
"trained_player" declared "call:10"
Street "flop" started. (community card = ['C4', 'ST', 'CQ'])
"fish_player" declared "call:0"
"trained_player" declared "call