In [1]:
pip install treys

Note: you may need to restart the kernel to use updated packages.


In [2]:
import random
from enum import Enum, auto

class Action(Enum):
    FOLD = 0
    CHECK = 1
    CALL = 2
    RAISE = 3
    BET = 4

class Player:
    def __init__(self, id, stack):
        self.id = id
        self.stack = stack
        self.hand = []
        self.current_bet = 0
        self.folded = False

class TexasHoldEm:
    def __init__(self, players, small_blind=10, big_blind=20):
        self.players = players  # List of Player objects
        self.small_blind = small_blind
        self.big_blind = big_blind
        self.deck = self.initialize_deck()
        self.board = []  # Community cards
        self.pot = 0
        self.current_bet = 0
        self.game_over = False
        self.dealer_index = 0  # Index of the dealer in self.players
        self.current_player_index = (self.dealer_index + 1) % len(self.players)  # Player to act
        self.betting_round = 'pre-flop'  # Can be 'pre-flop', 'flop', 'turn', 'river'
        self.round_bets = {}  # Tracks bets per player in the current round
        self.last_raiser = None  # Tracks the last player who raised

    def initialize_deck(self):
        suits = ['H', 'D', 'C', 'S']
        ranks = range(2, 15)  # 2-14 where 11-14 are J, Q, K, A
        deck = [(rank, suit) for rank in ranks for suit in suits]
        random.shuffle(deck)
        return deck

    def deal_hole_cards(self):
        for player in self.players:
            player.hand = [self.deck.pop(), self.deck.pop()]
            player.current_bet = 0
            player.folded = False
        self.round_bets = {player.id: 0 for player in self.players}

    def post_blinds(self):
        small_blind_player = self.players[(self.dealer_index + 1) % len(self.players)]
        big_blind_player = self.players[(self.dealer_index + 2) % len(self.players)]
        
        self._post_blind(small_blind_player, self.small_blind)
        self._post_blind(big_blind_player, self.big_blind)
        
        self.current_bet = self.big_blind
        self.last_raiser = big_blind_player.id

        # Set current player to the one after the big blind
        self.current_player_index = (self.dealer_index + 3) % len(self.players)

    def _post_blind(self, player, amount):
        player.stack -= amount
        player.current_bet = amount
        self.pot += amount
        self.round_bets[player.id] = amount

    def deal_flop(self):
        self.deck.pop()  # Burn card
        self.board.extend([self.deck.pop() for _ in range(3)])
        self.betting_round = 'flop'
        self.reset_bets()

    def deal_turn(self):
        self.deck.pop()  # Burn card
        self.board.append(self.deck.pop())
        self.betting_round = 'turn'
        self.reset_bets()

    def deal_river(self):
        self.deck.pop()  # Burn card
        self.board.append(self.deck.pop())
        self.betting_round = 'river'
        self.reset_bets()

    def reset_bets(self):
        self.current_bet = 0
        for player in self.players:
            player.current_bet = 0
        self.round_bets = {player.id: 0 for player in self.players}
        self.current_player_index = self.dealer_index  # Start with the player after the dealer
        self.last_raiser = None

    def get_current_player(self):
        while True:
            player = self.players[self.current_player_index]
            if not player.folded:
                return player
            self.current_player_index = (self.current_player_index + 1) % len(self.players)

    def get_available_actions(self, player):
        if player.current_bet < self.current_bet:
            # Player needs to call or fold
            actions = [Action.FOLD, Action.CALL, Action.RAISE]
        else:
            # Player can check or bet/raise
            if self.current_bet == 0:
                actions = [Action.CHECK, Action.BET]
            else:
                actions = [Action.CHECK, Action.RAISE]
        return actions

    def execute_action(self, player, action, raise_amount=0):
        if action == Action.FOLD:
            self.handle_fold(player)
        elif action == Action.CHECK:
            self.handle_check(player)
        elif action == Action.CALL:
            self.handle_call(player)
        elif action == Action.BET:
            self.handle_bet(player, raise_amount)
        elif action == Action.RAISE:
            self.handle_raise(player, raise_amount)
        else:
            raise ValueError("Invalid action")

        # Move to the next player
        self.current_player_index = (self.current_player_index + 1) % len(self.players)

    def handle_fold(self, player):
        player.folded = True
        print(f"Player {player.id} folds.")

    def handle_check(self, player):
        print(f"Player {player.id} checks.")

    def handle_call(self, player):
        call_amount = self.current_bet - player.current_bet
        player.stack -= call_amount
        player.current_bet += call_amount
        self.pot += call_amount
        self.round_bets[player.id] += call_amount
        print(f"Player {player.id} calls {call_amount}.")

    def handle_bet(self, player, amount):
        if amount <= 0 or amount > player.stack:
            raise ValueError("Invalid bet amount")
        player.stack -= amount
        player.current_bet += amount
        self.current_bet = player.current_bet
        self.pot += amount
        self.round_bets[player.id] += amount
        self.last_raiser = player.id
        print(f"Player {player.id} bets {amount}.")

    def handle_raise(self, player, amount):
        if amount <= 0 or amount > player.stack:
            raise ValueError("Invalid raise amount")
        call_amount = self.current_bet - player.current_bet
        total_amount = call_amount + amount
        player.stack -= total_amount
        player.current_bet += total_amount
        self.current_bet = player.current_bet
        self.pot += total_amount
        self.round_bets[player.id] += total_amount
        self.last_raiser = player.id
        print(f"Player {player.id} raises by {amount} to {player.current_bet}.")

    def is_round_over(self):
        # The betting round is over when all players have either called the current bet or folded
        active_players = [p for p in self.players if not p.folded]
        if len(active_players) == 1:
            return True  # Only one player remains
        for player in active_players:
            if player.id == self.last_raiser:
                continue  # Skip the last raiser
            if player.current_bet != self.current_bet:
                return False
        return True

    def progress_round(self):
        if self.betting_round == 'pre-flop':
            self.deal_flop()
        elif self.betting_round == 'flop':
            self.deal_turn()
        elif self.betting_round == 'turn':
            self.deal_river()
        elif self.betting_round == 'river':
            self.game_over = True  # Proceed to showdown
        else:
            raise ValueError("Invalid betting round")

    def is_game_over(self):
        # The game is over if only one player remains or all betting rounds are complete
        active_players = [p for p in self.players if not p.folded]
        if len(active_players) == 1:
            return True
        return self.game_over

    def determine_winner(self):
        # If only one player remains
        active_players = [p for p in self.players if not p.folded]
        if len(active_players) == 1:
            winner = active_players[0]
            winner.stack += self.pot
            print(f"Player {winner.id} wins the pot of {self.pot} by default.")
            self.pot = 0
            return

        # Showdown: compare hands
        from treys import Evaluator, Card
        evaluator = Evaluator()
        best_rank = None
        winners = []
        for player in active_players:
            hand = [Card.new(f"{self.rank_to_str(rank)}{suit}") for rank, suit in player.hand]
            board = [Card.new(f"{self.rank_to_str(rank)}{suit}") for rank, suit in self.board]
            rank = evaluator.evaluate(board, hand)
            if best_rank is None or rank < best_rank:
                best_rank = rank
                winners = [player]
            elif rank == best_rank:
                winners.append(player)
        # Split the pot among winners
        split_pot = self.pot / len(winners)
        for winner in winners:
            winner.stack += split_pot
            print(f"Player {winner.id} wins {split_pot} from the pot.")
        self.pot = 0

    def rank_to_str(self, rank):
        if rank == 14:
            return 'A'
        elif rank == 13:
            return 'K'
        elif rank == 12:
            return 'Q'
        elif rank == 11:
            return 'J'
        elif rank == 10:
            return 'T'
        else:
            return str(rank)

    def get_reward(self, player):
        # Define reward as the change in the player's stack
        return player.stack - 1000  # Assuming initial stack is 1000

    # Additional helper methods can be added as needed

In [3]:
class BeliefState:
    def __init__(self, observed_actions, public_cards, pot_size=0):
        self.observed_actions = observed_actions  # Sequence of actions taken
        self.public_cards = public_cards          # Community cards revealed
        self.private_cards = None                 # The AI's own hand
        self.pot_size = pot_size                  # Pot size

    def update(self, action, new_public_cards=None, pot_size=None):
        self.observed_actions.append(action)
        if new_public_cards is not None:
            self.public_cards = new_public_cards
        if pot_size is not None:
            self.pot_size = pot_size

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim

class ValueNetwork(nn.Module):
    def __init__(self, input_size):
        super(ValueNetwork, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.ReLU(),
            nn.Linear(128, 1)  # Output a scalar value
        )

    def forward(self, x):
        return self.fc(x)

class PolicyNetwork(nn.Module):
    def __init__(self, input_size, action_space):
        super(PolicyNetwork, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.ReLU(),
            nn.Linear(128, action_space)
        )

    def forward(self, x):
        return nn.Softmax(dim=-1)(self.fc(x))



MAX_FEATURE_LENGTH = 25  # Adjust this value based on your game dynamics

def extract_features(belief_state):
    # Convert actions to numerical representation
    action_features = [action.value for action in belief_state.observed_actions]
    
    # Flatten public cards
    public_card_features = [rank for (rank, suit) in belief_state.public_cards]
    
    # Combine features
    features = action_features + public_card_features
    
    # Pad or truncate to fixed size
    features = features[:MAX_FEATURE_LENGTH] + [0] * max(0, MAX_FEATURE_LENGTH - len(features))
    
    return torch.tensor(features, dtype=torch.float32)


In [5]:
# Global dictionary to store CFRNodes
cfr_nodes = {}

class CFRNode:
    def __init__(self, info_set, actions):
        self.info_set = info_set
        self.actions = actions
        self.regret_sum = {action: 0.0 for action in actions}
        self.strategy = {action: 1.0 / len(actions) for action in actions}
        self.strategy_sum = {action: 0.0 for action in actions}

    def get_strategy(self):
        normalizing_sum = 0.0
        for action in self.actions:
            self.strategy[action] = max(self.regret_sum[action], 0.0)
            normalizing_sum += self.strategy[action]
        if normalizing_sum > 0:
            for action in self.actions:
                self.strategy[action] /= normalizing_sum
        else:
            for action in self.actions:
                self.strategy[action] = 1.0 / len(self.actions)
        # Update strategy sum for averaging
        for action in self.actions:
            self.strategy_sum[action] += self.strategy[action]
        return self.strategy

def get_or_create_cfr_node(info_set, actions):
    if info_set not in cfr_nodes:
        cfr_nodes[info_set] = CFRNode(info_set, actions)
    return cfr_nodes[info_set]

def cfr(node, p0, p1):
    if node.is_terminal():
        return node.utility()
    info_set = node.get_info_set()
    actions = node.get_available_actions()
    cfr_node = get_or_create_cfr_node(info_set, actions)
    strategy = cfr_node.get_strategy()
    util = {}
    node_util = 0
    for action in actions:
        next_node = node.take_action(action)
        if node.current_player == 0:
            util[action] = -cfr(next_node, p0 * strategy[action], p1)
        else:
            util[action] = -cfr(next_node, p0, p1 * strategy[action])
        node_util += strategy[action] * util[action]
    for action in actions:
        regret = util[action] - node_util
        if node.current_player == 0:
            cfr_node.regret_sum[action] += p1 * regret
        else:
            cfr_node.regret_sum[action] += p0 * regret
    return node_util

In [6]:
num_players = 2  # Since we're working with a two-player game

In [7]:
# class GameNode:
#     def __init__(self, current_player, hands, board, history, num_players=2, 
#                  pot=0, player_stacks=None, player_bets=None, current_bet=0, 
#                  folded_players=None, betting_round='pre-flop', last_raiser=None):
#         self.current_player = current_player  # ID of the current player
#         self.hands = hands  # Dictionary mapping player IDs to their hands
#         self.board = board  # Community cards
#         self.history = history  # List of actions taken
#         self.num_players = num_players  # Total number of players
#         self.pot = pot  # Current size of the pot
#         self.player_stacks = player_stacks if player_stacks is not None else {player_id: 1000 for player_id in hands}
#         self.player_bets = player_bets if player_bets is not None else {player_id: 0 for player_id in hands}
#         self.current_bet = current_bet  # Highest bet in the current betting round
#         self.folded_players = folded_players if folded_players is not None else set()  # Players who have folded
#         self.betting_round = betting_round  # Current betting round ('pre-flop', 'flop', 'turn', 'river')
#         self.last_raiser = last_raiser  # ID of the last player who raised

    
#     def is_terminal(self):
#         # Implement logic to determine if the game has ended
#         pass
    
#     def utility(self):
#         # Calculate the utility (payoff) for the current player
#         pass
    
#     def get_info_set(self):
#         # As defined earlier
#         player_hand = self.hands[self.current_player]
#         public_cards = self.board
#         observed_actions = self.history
#         info_set = (
#             tuple(sorted(player_hand)),
#             tuple(sorted(public_cards)),
#             tuple(observed_actions)
#         )
#         return info_set
    
#     def get_available_actions(self):
#         # Return a list of possible actions at this node
#         return [Action.FOLD, Action.CALL, Action.RAISE]  # Example actions


#     def take_action(self, action, amount=0):
#         new_history = self.history + [action]
#         new_hands = self.hands.copy()
#         new_board = self.board.copy()
#         new_pot = self.pot
#         new_player_stacks = self.player_stacks.copy()
#         new_player_bets = self.player_bets.copy()
#         new_folded_players = self.folded_players.copy()
#         new_current_bet = self.current_bet
#         new_last_raiser = self.last_raiser
#         new_betting_round = self.betting_round

#         current_player_id = self.current_player

#         if action == Action.FOLD:
#             new_folded_players.add(current_player_id)
#         elif action == Action.CALL:
#             call_amount = new_current_bet - new_player_bets[current_player_id]
#             if call_amount > new_player_stacks[current_player_id]:
#                 call_amount = new_player_stacks[current_player_id]  # All-in
#             new_player_stacks[current_player_id] -= call_amount
#             new_player_bets[current_player_id] += call_amount
#             new_pot += call_amount
#         elif action == Action.RAISE:
#             raise_amount = amount
#             total_bet = new_current_bet + raise_amount
#             bet_amount = total_bet - new_player_bets[current_player_id]
#             if bet_amount > new_player_stacks[current_player_id]:
#                 bet_amount = new_player_stacks[current_player_id]  # All-in
#                 total_bet = new_player_bets[current_player_id] + bet_amount
#             new_player_stacks[current_player_id] -= bet_amount
#             new_player_bets[current_player_id] += bet_amount
#             new_pot += bet_amount
#             new_current_bet = total_bet
#             new_last_raiser = current_player_id
#         elif action == Action.CHECK:
#             pass  # No changes needed for check
#         elif action == Action.BET:
#             bet_amount = amount
#             if bet_amount > new_player_stacks[current_player_id]:
#                 bet_amount = new_player_stacks[current_player_id]  # All-in
#             new_player_stacks[current_player_id] -= bet_amount
#             new_player_bets[current_player_id] += bet_amount
#             new_pot += bet_amount
#             new_current_bet = new_player_bets[current_player_id]
#             new_last_raiser = current_player_id
#         else:
#             raise ValueError("Invalid action")

#         # Determine the next player
#         next_player = (self.current_player + 1) % self.num_players
#         while next_player in new_folded_players:
#             next_player = (next_player + 1) % self.num_players
#             if next_player == self.current_player:
#                 break  # All other players have folded

#         # Create a new GameNode with updated state
#         new_node = GameNode(
#             current_player=next_player,
#             hands=new_hands,
#             board=new_board,
#             history=new_history,
#             num_players=self.num_players,
#             pot=new_pot,
#             player_stacks=new_player_stacks,
#             player_bets=new_player_bets,
#             current_bet=new_current_bet,
#             folded_players=new_folded_players,
#             betting_round=new_betting_round,
#             last_raiser=new_last_raiser
#         )

#         # Update betting round if needed
#         if self.should_progress_round(new_node):
#             new_node.progress_round()

#         return new_node
    
#     def should_progress_round(self, node):
#         active_players = [p for p in range(self.num_players) if p not in node.folded_players]
#         # If only one player remains, the game ends
#         if len(active_players) <= 1:
#             return False
#         # If all active players have matched the current bet or are all-in
#         for player_id in active_players:
#             player_bet = node.player_bets[player_id]
#             if player_bet != node.current_bet and node.player_stacks[player_id] > 0:
#                 return False
#         return True
    
#     def progress_round(self):
#         self.player_bets = {player_id: 0 for player_id in self.player_bets}
#         self.current_bet = 0
#         self.last_raiser = None

#         if self.betting_round == 'pre-flop':
#             self.deal_flop()
#             self.betting_round = 'flop'
#         elif self.betting_round == 'flop':
#             self.deal_turn()
#             self.betting_round = 'turn'
#         elif self.betting_round == 'turn':
#             self.deal_river()
#             self.betting_round = 'river'
#         elif self.betting_round == 'river':
#             self.betting_round = 'showdown'
#         else:
#             pass  # Game is over

#     def deal_flop(self):
#         # Assuming self.deck is managed elsewhere
#         self.board.extend([self.deck.pop() for _ in range(3)])

#     def deal_turn(self):
#         self.board.append(self.deck.pop())

#     def deal_river(self):
#         self.board.append(self.deck.pop())

In [8]:
class GameNode:
    def __init__(self, current_player, hands, board, history, num_players=2, 
                 pot=0, player_stacks=None, player_bets=None, current_bet=0, 
                 folded_players=None, betting_round='pre-flop', last_raiser=None, deck=None):
        self.current_player = current_player  # ID of the current player
        self.hands = hands  # Dictionary mapping player IDs to their hands
        self.board = board  # Community cards
        self.history = history  # List of actions taken
        self.num_players = num_players  # Total number of players
        self.pot = pot  # Current size of the pot
        self.player_stacks = player_stacks if player_stacks is not None else {player_id: 1000 for player_id in hands}
        self.player_bets = player_bets if player_bets is not None else {player_id: 0 for player_id in hands}
        self.current_bet = current_bet  # Highest bet in the current betting round
        self.folded_players = folded_players if folded_players is not None else set()  # Players who have folded
        self.betting_round = betting_round  # Current betting round ('pre-flop', 'flop', 'turn', 'river', 'showdown')
        self.last_raiser = last_raiser  # ID of the last player who raised
        self.deck = deck if deck is not None else self.initialize_deck()  # Remaining cards in the deck

    def initialize_deck(self):
        suits = ['h', 'd', 'c', 's']
        ranks = range(2, 15)  # 2-14 where 11-14 are J, Q, K, A
        deck = [(rank, suit) for rank in ranks for suit in suits]
        random.shuffle(deck)
        return deck

    def is_terminal(self):
        active_players = [p for p in range(self.num_players) if p not in self.folded_players]
        # Game ends if only one player remains or after showdown
        if len(active_players) <= 1 or self.betting_round == 'showdown':
            return True
        return False

    def utility(self):
        if not self.is_terminal():
            return 0  # Utility is zero if the game is not over

        active_players = [p for p in range(self.num_players) if p not in self.folded_players]
        if len(active_players) == 1:
            # Only one player left; they win the pot
            winner = active_players[0]
            if self.current_player == winner:
                return self.pot
            else:
                return -self.pot
        else:
            # Showdown: compare hands
            hand_strengths = {p: self.evaluate_hand(p) for p in active_players}
            best_strength = min(hand_strengths.values())
            winners = [p for p, strength in hand_strengths.items() if strength == best_strength]
            if self.current_player in winners:
                # Split the pot among winners
                return self.pot / len(winners)
            else:
                return -self.pot / len(winners)

    def evaluate_hand(self, player_id):
        from treys import Evaluator, Card
        evaluator = Evaluator()
        player_hand = self.hands[player_id]
        board = self.board
        # Convert to treys Card objects
        player_cards = [Card.new(f"{self.rank_to_str(rank)}{suit}") for rank, suit in player_hand]
        board_cards = [Card.new(f"{self.rank_to_str(rank)}{suit}") for rank, suit in board]
        rank = evaluator.evaluate(board_cards, player_cards)
        return rank  # Lower rank means a better hand

    def rank_to_str(self, rank):
        rank_dict = {14: 'A', 13: 'K', 12: 'Q', 11: 'J', 10: 'T'}
        return rank_dict.get(rank, str(rank))

    def get_info_set(self):
        player_hand = self.hands[self.current_player]
        public_cards = self.board
        observed_actions = self.history
        info_set = (
            tuple(sorted(player_hand)),
            tuple(sorted(public_cards)),
            tuple(observed_actions)
        )
        return info_set

    def get_available_actions(self):
        if self.current_bet > self.player_bets[self.current_player]:
            # Player needs to call, raise, or fold
            return [Action.FOLD, Action.CALL, Action.RAISE]
        else:
            # Player can check or bet/raise
            if self.current_bet == 0:
                return [Action.CHECK, Action.BET]
            else:
                return [Action.CHECK, Action.RAISE]

    def take_action(self, action, amount=0):
        new_history = self.history + [action]
        new_hands = self.hands.copy()
        new_board = self.board.copy()
        new_pot = self.pot
        new_player_stacks = self.player_stacks.copy()
        new_player_bets = self.player_bets.copy()
        new_folded_players = self.folded_players.copy()
        new_current_bet = self.current_bet
        new_last_raiser = self.last_raiser
        new_betting_round = self.betting_round
        new_deck = self.deck.copy()

        current_player_id = self.current_player

        if action == Action.FOLD:
            new_folded_players.add(current_player_id)
        elif action == Action.CALL:
            call_amount = new_current_bet - new_player_bets[current_player_id]
            available_stack = new_player_stacks[current_player_id]
            actual_call = min(call_amount, available_stack)
            new_player_stacks[current_player_id] -= actual_call
            new_player_bets[current_player_id] += actual_call
            new_pot += actual_call
        elif action == Action.RAISE:
            raise_amount = amount
            total_bet = new_current_bet + raise_amount
            bet_amount = total_bet - new_player_bets[current_player_id]
            available_stack = new_player_stacks[current_player_id]
            actual_bet = min(bet_amount, available_stack)
            new_player_stacks[current_player_id] -= actual_bet
            new_player_bets[current_player_id] += actual_bet
            new_pot += actual_bet
            new_current_bet = new_player_bets[current_player_id]
            new_last_raiser = current_player_id
        elif action == Action.CHECK:
            pass  # No action needed
        elif action == Action.BET:
            bet_amount = amount
            available_stack = new_player_stacks[current_player_id]
            actual_bet = min(bet_amount, available_stack)
            new_player_stacks[current_player_id] -= actual_bet
            new_player_bets[current_player_id] += actual_bet
            new_pot += actual_bet
            new_current_bet = new_player_bets[current_player_id]
            new_last_raiser = current_player_id
        else:
            raise ValueError("Invalid action")

        # Determine the next player
        next_player = (self.current_player + 1) % self.num_players
        while next_player in new_folded_players:
            next_player = (next_player + 1) % self.num_players
            if next_player == self.current_player:
                break  # All other players have folded

        # Create a new GameNode with updated state
        new_node = GameNode(
            current_player=next_player,
            hands=new_hands,
            board=new_board,
            history=new_history,
            num_players=self.num_players,
            pot=new_pot,
            player_stacks=new_player_stacks,
            player_bets=new_player_bets,
            current_bet=new_current_bet,
            folded_players=new_folded_players,
            betting_round=new_betting_round,
            last_raiser=new_last_raiser,
            deck=new_deck
        )

        # Update betting round if needed
        if self.should_progress_round(new_node):
            new_node.progress_round()

        return new_node

    def should_progress_round(self, node):
        active_players = [p for p in range(self.num_players) if p not in node.folded_players]
        if len(active_players) <= 1:
            return True  # Only one player remains
        # Check if all active players have matched the current bet or are all-in
        for player_id in active_players:
            if node.player_stacks[player_id] > 0 and node.player_bets[player_id] != node.current_bet:
                return False
        if node.last_raiser == node.current_player:
            return True
        return False

    def progress_round(self):
        # Reset player bets for the new round
        self.player_bets = {player_id: 0 for player_id in self.player_bets}
        self.current_bet = 0
        self.last_raiser = None

        if self.betting_round == 'pre-flop':
            self.deal_flop()
            self.betting_round = 'flop'
        elif self.betting_round == 'flop':
            self.deal_turn()
            self.betting_round = 'turn'
        elif self.betting_round == 'turn':
            self.deal_river()
            self.betting_round = 'river'
        elif self.betting_round == 'river':
            self.betting_round = 'showdown'
        else:
            pass  # Game is over

    def deal_flop(self):
        # Burn a card
        self.deck.pop()
        # Deal three community cards
        self.board.extend([self.deck.pop() for _ in range(3)])

    def deal_turn(self):
        # Burn a card
        self.deck.pop()
        # Deal one community card
        self.board.append(self.deck.pop())

    def deal_river(self):
        # Burn a card
        self.deck.pop()
        # Deal one community card
        self.board.append(self.deck.pop())



hands = {
    0: [(14, 'h'), (13, 'd')],  # Player 0's hand
    1: [(12, 's'), (11, 'c')]   # Player 1's hand
}

# Create the root game node
root_node = GameNode(
    current_player=0,
    hands=hands,
    board=[],
    history=[],
    num_players=2
)

# Player 0 takes an action
next_node = root_node.take_action(Action.CALL)

# Player 1 takes an action
next_node = next_node.take_action(Action.CHECK)

In [9]:
# value_net = ValueNetwork(input_size=MAX_FEATURE_LENGTH)
# policy_net = PolicyNetwork(input_size=MAX_FEATURE_LENGTH, action_space=len(Action))
# value_optimizer = optim.Adam(value_net.parameters(), lr=1e-4)
# policy_optimizer = optim.Adam(policy_net.parameters(), lr=1e-4)

# NUM_EPISODES = 500

# import torch
# import torch.nn as nn
# import torch.optim as optim
# from enum import Enum, auto

# # Define sample_action function
# def sample_action(action_probs):
#     """
#     Samples an action from the given action probabilities.

#     Args:
#         action_probs (torch.Tensor): A tensor containing the probabilities for each action.

#     Returns:
#         Action: The selected action.
#     """
#     # Ensure action_probs is a 1D tensor
#     if action_probs.dim() > 1:
#         action_probs = action_probs.squeeze()

#     # Sample an action index based on the probabilities
#     action_index = torch.multinomial(action_probs, num_samples=1).item()

#     # Map the index to an action
#     action_list = list(Action)
#     selected_action = action_list[action_index]

#     return selected_action

# # Initialize neural networks and optimizers
# value_net = ValueNetwork(input_size=MAX_FEATURE_LENGTH)
# policy_net = PolicyNetwork(input_size=MAX_FEATURE_LENGTH, action_space=len(Action))
# value_optimizer = optim.Adam(value_net.parameters(), lr=1e-4)
# policy_optimizer = optim.Adam(policy_net.parameters(), lr=1e-4)

# # Training loop
# for episode in range(NUM_EPISODES):
#     # Initialize players
#     players = [Player(id=0, stack=1000), Player(id=1, stack=1000)]
    
#     # Initialize the game
#     game = TexasHoldEm(players)
#     game.deal_hole_cards()
    
#     # Initialize belief states for each player
#     belief_states = {player.id: BeliefState(observed_actions=[], public_cards=game.board) for player in players}
    
#     done = False
#     experiences = []  # To store experiences for training
    
#     while not done:
#         current_player = game.get_current_player()
#         belief_state = belief_states[current_player.id]
        
#         # Extract features
#         features = extract_features(belief_state)
        
#         # Get action probabilities from the policy network
#         action_probs = policy_net(features)
        
#         # Choose an action
#         action = sample_action(action_probs)
        
#         # Execute the action
#         game.execute_action(current_player, action)
        
#         # Get reward (to be defined based on your reward structure)
#         reward = game.get_reward(current_player)
        
#         # Record experience
#         experiences.append((features, action, reward))
        
#         # Update belief states for all players
#         for player in players:
#             belief_states[player.id].update(action, new_public_cards=game.board)
        
#         # Check for end of round/game
#         if game.is_round_over():
#             game.progress_round()
#         if game.is_game_over():
#             game.determine_winner()
#             done = True  # End the game loop
    
#     # After the game, update the networks using collected experiences

#     # Step 1: Prepare the training data
#     states = torch.stack([exp[0] for exp in experiences])  # Features are tensors
#     actions = [exp[1] for exp in experiences]              # Actions are enums
#     rewards = [exp[2] for exp in experiences]              # Rewards are scalars

#     # Step 2: Convert actions to indices
#     action_to_index = {action: idx for idx, action in enumerate(Action)}
#     action_indices = torch.tensor([action_to_index[action] for action in actions], dtype=torch.long)

#     # Step 3: Compute value targets (discounted cumulative rewards)
#     rewards = torch.tensor(rewards, dtype=torch.float32)
#     gamma = 1.0  # No discounting
#     returns = []
#     R = 0
#     for r in reversed(rewards):
#         R = r + gamma * R
#         returns.insert(0, R)
#     value_targets = torch.tensor(returns, dtype=torch.float32)

#     # Step 4: Compute value loss
#     value_predictions = value_net(states).squeeze()
#     value_loss_fn = nn.MSELoss()
#     value_loss = value_loss_fn(value_predictions, value_targets)

#     # Step 5: Compute policy loss
#     policy_outputs = policy_net(states)  # Shape: (batch_size, num_actions)
#     action_probs = policy_outputs.gather(1, action_indices.unsqueeze(1)).squeeze()
#     log_probs = torch.log(action_probs + 1e-10)
#     with torch.no_grad():
#         advantages = value_targets - value_predictions.detach()
#     policy_loss = - (log_probs * advantages).mean()

#     # Step 6: Update the networks
#     # Update value network
#     value_optimizer.zero_grad()
#     value_loss.backward()
#     value_optimizer.step()

#     # Update policy network
#     policy_optimizer.zero_grad()
#     policy_loss.backward()
#     policy_optimizer.step()
    
#     # Compute entropy of the policy
#     entropy = - (policy_outputs * torch.log(policy_outputs + 1e-10)).sum(dim=1).mean()

#     # Add entropy regularization to the policy loss
#     entropy_coef = 0.01  # Adjust as needed
#     policy_loss = policy_loss - entropy_coef * entropy
    
#     # Extract CFR strategies and values
#     cfr_strategies = torch.stack([exp[2] for exp in experiences])  # CFR strategies
#     cfr_values = torch.tensor([exp[3] for exp in experiences], dtype=torch.float32)  # CFR values

#     # Value Loss
#     value_predictions = value_net(states).squeeze()
#     value_loss_fn = nn.MSELoss()
#     value_loss = value_loss_fn(value_predictions, cfr_values)

#     # Policy Loss
#     policy_outputs = policy_net(states)
#     policy_loss_fn = nn.CrossEntropyLoss()
#     policy_loss = policy_loss_fn(policy_outputs, cfr_strategies.argmax(dim=1))



#     # Optionally, print progress
#     if (episode + 1) % 100 == 0:
#         print(f"Completed episode {episode + 1}/{NUM_EPISODES}")

In [10]:
import torch.nn as nn

MAX_FEATURE_LENGTH = 25

# Define ValueNetwork
class ValueNetwork(nn.Module):
    def __init__(self, input_size):
        super(ValueNetwork, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)  # Output a single value
        )
    
    def forward(self, x):
        return self.fc(x)

# Define PolicyNetwork
class PolicyNetwork(nn.Module):
    def __init__(self, input_size, action_space):
        super(PolicyNetwork, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, action_space)
        )
        self.softmax = nn.Softmax(dim=-1)
    
    def forward(self, x):
        x = self.fc(x)
        return self.softmax(x)
    
# Initialize neural networks and optimizers
value_net = ValueNetwork(input_size=MAX_FEATURE_LENGTH)
policy_net = PolicyNetwork(input_size=MAX_FEATURE_LENGTH, action_space=len(Action))

In [11]:
class Player:
    def __init__(self, id, stack):
        self.id = id
        self.stack = stack  # Total chips the player has
        self.hand = []      # The player's hole cards
        self.current_bet = 0
        self.folded = False

    def reset_for_new_hand(self):
        self.hand = []
        self.current_bet = 0
        self.folded = False

import random
from enum import Enum

class TexasHoldEm:
    def __init__(self, players, small_blind=10, big_blind=20):
        self.players = players
        self.small_blind = small_blind
        self.big_blind = big_blind
        self.deck = self.initialize_deck()
        self.board = []  # Community cards
        self.pot = 0
        self.current_bet = 0
        self.game_over = False
        self.dealer_index = 0
        self.current_player_index = (self.dealer_index + 1) % len(self.players)
        self.betting_round = 'pre-flop'
        self.round_bets = {player.id: 0 for player in self.players}
        self.last_raiser = None

    def initialize_deck(self):
        suits = ['h', 'd', 'c', 's']
        ranks = list(range(2, 15))  # 2-14 where 11-14 are J, Q, K, A
        deck = [(rank, suit) for rank in ranks for suit in suits]
        random.shuffle(deck)
        return deck

    def deal_hole_cards(self):
        for player in self.players:
            player.hand = [self.deck.pop(), self.deck.pop()]
            player.current_bet = 0
            player.folded = False
        self.round_bets = {player.id: 0 for player in self.players}

    def post_blinds(self):
        small_blind_player = self.players[(self.dealer_index + 1) % len(self.players)]
        big_blind_player = self.players[(self.dealer_index + 2) % len(self.players)]
        self._post_blind(small_blind_player, self.small_blind)
        self._post_blind(big_blind_player, self.big_blind)
        self.current_bet = self.big_blind
        self.last_raiser = big_blind_player.id
        self.current_player_index = (self.dealer_index + 3) % len(self.players)

    def _post_blind(self, player, amount):
        player.stack -= amount
        player.current_bet = amount
        self.pot += amount
        self.round_bets[player.id] += amount

    def get_current_player(self):
        while True:
            player = self.players[self.current_player_index]
            if not player.folded and player.stack > 0:
                return player
            self.current_player_index = (self.current_player_index + 1) % len(self.players)

    def get_available_actions(self, player):
        actions = []
        if player.current_bet < self.current_bet:
            actions.extend([Action.FOLD, Action.CALL])
            if player.stack > (self.current_bet - player.current_bet):
                actions.append(Action.RAISE)
        else:
            actions.append(Action.CHECK)
            if player.stack > 0:
                actions.append(Action.BET)
        return actions

    def execute_action(self, player, action, raise_amount=0):
        if action == Action.FOLD:
            self.handle_fold(player)
        elif action == Action.CHECK:
            self.handle_check(player)
        elif action == Action.CALL:
            self.handle_call(player)
        elif action == Action.BET:
            self.handle_bet(player, raise_amount)
        elif action == Action.RAISE:
            self.handle_raise(player, raise_amount)
        else:
            raise ValueError("Invalid action")

        # Move to the next player
        self.current_player_index = (self.current_player_index + 1) % len(self.players)

    def handle_fold(self, player):
        player.folded = True

    def handle_check(self, player):
        pass  # No action needed

    def handle_call(self, player):
        call_amount = self.current_bet - player.current_bet
        actual_call = min(call_amount, player.stack)
        player.stack -= actual_call
        player.current_bet += actual_call
        self.pot += actual_call
        self.round_bets[player.id] += actual_call

    def handle_bet(self, player, amount):
        if amount <= 0 or amount > player.stack:
            raise ValueError("Invalid bet amount")
        player.stack -= amount
        player.current_bet += amount
        self.current_bet = player.current_bet
        self.pot += amount
        self.round_bets[player.id] += amount
        self.last_raiser = player.id

    def handle_raise(self, player, amount):
        if amount <= 0 or amount > player.stack:
            raise ValueError("Invalid raise amount")
        raise_amount = amount
        call_amount = self.current_bet - player.current_bet
        total_bet = call_amount + raise_amount
        if total_bet > player.stack:
            total_bet = player.stack
        player.stack -= total_bet
        player.current_bet += total_bet
        self.current_bet = player.current_bet
        self.pot += total_bet
        self.round_bets[player.id] += total_bet
        self.last_raiser = player.id

    def is_round_over(self):
        active_players = [p for p in self.players if not p.folded and p.stack > 0]
        if len(active_players) <= 1:
            return True
        for player in active_players:
            if player.current_bet != self.current_bet:
                return False
        return True

    def progress_round(self):
        for player in self.players:
            player.current_bet = 0
        self.round_bets = {player.id: 0 for player in self.players}
        self.current_bet = 0
        self.last_raiser = None

        if self.betting_round == 'pre-flop':
            self.deal_flop()
            self.betting_round = 'flop'
        elif self.betting_round == 'flop':
            self.deal_turn()
            self.betting_round = 'turn'
        elif self.betting_round == 'turn':
            self.deal_river()
            self.betting_round = 'river'
        elif self.betting_round == 'river':
            self.game_over = True

    def deal_flop(self):
        self.deck.pop()  # Burn card
        self.board.extend([self.deck.pop() for _ in range(3)])

    def deal_turn(self):
        self.deck.pop()  # Burn card
        self.board.append(self.deck.pop())

    def deal_river(self):
        self.deck.pop()  # Burn card
        self.board.append(self.deck.pop())

    def is_game_over(self):
        active_players = [p for p in self.players if not p.folded and p.stack > 0]
        if len(active_players) <= 1:
            return True
        return self.game_over

    def determine_winner(self):
        active_players = [p for p in self.players if not p.folded]
        if len(active_players) == 1:
            winner = active_players[0]
            winner.stack += self.pot
            self.pot = 0
        else:
            # Showdown: evaluate hands
            from treys import Evaluator, Card
            evaluator = Evaluator()
            best_rank = None
            winners = []
            for player in active_players:
                player_cards = [Card.new(f"{self.rank_to_str(rank)}{suit}") for rank, suit in player.hand]
                board_cards = [Card.new(f"{self.rank_to_str(rank)}{suit}") for rank, suit in self.board]
                hand_rank = evaluator.evaluate(board_cards, player_cards)
                if best_rank is None or hand_rank < best_rank:
                    best_rank = hand_rank
                    winners = [player]
                elif hand_rank == best_rank:
                    winners.append(player)
            split_pot = self.pot / len(winners)
            for winner in winners:
                winner.stack += split_pot
            self.pot = 0

    def rank_to_str(self, rank):
        rank_dict = {14: 'A', 13: 'K', 12: 'Q', 11: 'J', 10: 'T'}
        return rank_dict.get(rank, str(rank))

    def get_reward(self, player):
        initial_stack = 1000
        return player.stack - initial_stack

class BeliefState:
    def __init__(self, observed_actions, public_cards, pot_size=0):
        self.observed_actions = observed_actions
        self.public_cards = public_cards
        self.pot_size = pot_size  # Add pot size

    def update(self, action, new_public_cards, pot_size):
        self.observed_actions.append(action)
        self.public_cards = new_public_cards
        self.pot_size = pot_size

def extract_features(belief_state):
    features = []

    # Encode observed actions
    action_encoding = {
        Action.FOLD: 0,
        Action.CHECK: 1,
        Action.CALL: 2,
        Action.RAISE: 3,
        Action.BET: 4
    }
    max_history_length = 12  # Increased from 10 to 12
    action_features = [action_encoding[action] for action in belief_state.observed_actions[-max_history_length:]]
    action_features += [0] * (max_history_length - len(action_features))
    features.extend(action_features)

    # Encode public cards
    rank_encoding = {r: i for i, r in enumerate(range(2, 15), start=1)}
    suit_encoding = {'h': 0, 'd': 1, 'c': 2, 's': 3}
    max_board_cards = 6  # Increased from 5 to 6
    board_features = []
    for rank, suit in belief_state.public_cards:
        rank_feature = rank_encoding.get(rank, 0)
        suit_feature = suit_encoding.get(suit, 0)
        board_features.extend([rank_feature, suit_feature])
    while len(board_features) < max_board_cards * 2:
        board_features.extend([0, 0])
    features.extend(board_features)

    # Ensure total feature length is 25
    # Current length: max_history_length (12) + max_board_cards * 2 (12) = 24
    # Add one more feature, e.g., pot size
    pot_size = belief_state.pot_size if hasattr(belief_state, 'pot_size') else 0
    features.append(pot_size / 1000)  # Normalize pot size

    # Convert to tensor
    features = torch.tensor(features, dtype=torch.float32)
    return features

In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
from enum import Enum, auto

class Action(Enum):
    FOLD = 0
    CHECK = 1
    CALL = 2
    RAISE = 3
    BET = 4

# Define MAX_FEATURE_LENGTH
MAX_FEATURE_LENGTH = 25  # Adjust as needed

# Define sample_action function
def sample_action(action_probs, valid_actions):
    """
    Samples an action from the given action probabilities, considering only valid actions.

    Args:
        action_probs (torch.Tensor): A tensor containing the probabilities for each action.
        valid_actions (list of Action): List of valid actions in the current state.

    Returns:
        Action: The selected action.
    """
    action_list = list(Action)
    action_to_index = {action: idx for idx, action in enumerate(action_list)}
    valid_action_indices = [action_to_index[action] for action in valid_actions]

    # Get probabilities of valid actions
    valid_action_probs = action_probs[valid_action_indices]
    valid_action_probs /= valid_action_probs.sum()  # Normalize

    # Sample from valid actions
    chosen_index = torch.multinomial(valid_action_probs, num_samples=1).item()
    selected_action = valid_actions[chosen_index]

    return selected_action

def determine_raise_amount(player, game):
    """
    Determines the amount to raise or bet.

    Args:
        player (Player): The player who is raising.
        game (TexasHoldEm): The current game state.

    Returns:
        float: The raise amount.
    """
    # Simple strategy: raise by a fixed amount or percentage of the pot
    # For this example, we'll raise by half the pot or the player's remaining stack, whichever is smaller
    raise_amount = min(player.stack, max(game.pot * 0.5, game.big_blind))
    return raise_amount

# Initialize neural networks and optimizers
value_net = ValueNetwork(input_size=MAX_FEATURE_LENGTH)
policy_net = PolicyNetwork(input_size=MAX_FEATURE_LENGTH, action_space=len(Action))
value_optimizer = optim.Adam(value_net.parameters(), lr=1e-4)
policy_optimizer = optim.Adam(policy_net.parameters(), lr=1e-4)

NUM_EPISODES = 500

# Training loop
for episode in range(NUM_EPISODES):
    # Initialize players
    players = [Player(id=0, stack=1000), Player(id=1, stack=1000)]
    
    # Initialize the game
    game = TexasHoldEm(players)
    game.deal_hole_cards()
    game.post_blinds()
    
    # Initialize belief states for each player
    belief_states = {
        player.id: BeliefState(
            observed_actions=[],
            public_cards=game.board,
            pot_size=game.pot  # Optional
        ) for player in players
    }
    
    done = False
    experiences = {player.id: [] for player in players}  # Collect experiences for each player separately
    
    while not done:
        current_player = game.get_current_player()
        belief_state = belief_states[current_player.id]
        
        # Extract features
        features = extract_features(belief_state)

        # Ensure features have the correct shape
        if features.dim() == 1:
            features = features.unsqueeze(0)  # Add batch dimension if necessary
        
        # Get action probabilities from the policy network
        action_probs = policy_net(features)
        action_probs = action_probs.squeeze(0)  # Remove batch dimension
        
        # Get available actions for the current player
        valid_actions = game.get_available_actions(current_player)
        
        # Choose an action
        action = sample_action(action_probs, valid_actions)
        
        # Determine raise amount if necessary
        if action in [Action.BET, Action.RAISE]:
            raise_amount = determine_raise_amount(current_player, game)
            game.execute_action(current_player, action, raise_amount=raise_amount)
        else:
            game.execute_action(current_player, action)
        
        # Record experience for the current player
        experiences[current_player.id].append((features.squeeze(0), action, 0))  # Squeeze features before storing, and reward is zero for now
        
        # Update belief states for all players
        for player in players:
            belief_states[player.id].update(
                action,
                new_public_cards=game.board,
                pot_size=game.pot  # Add this argument
            )
        
        # Check for end of round/game
        if game.is_round_over():
            game.progress_round()
        if game.is_game_over():
            game.determine_winner()
            done = True  # End the game loop
    
    # After the game ends, assign rewards and update networks
    
    # Assign final rewards to each player's experiences
    # Update belief states for all players
    for player in players:
        belief_states[player.id].update(
            action,
            new_public_cards=game.board,
            pot_size=game.pot
        )
    
    # Combine experiences from both players
    all_experiences = []
    for exp_list in experiences.values():
        all_experiences.extend(exp_list)
    
    # Proceed to prepare data and update networks

    # Step 1: Prepare the training data
    states = torch.stack([exp[0] for exp in all_experiences])  # Features are tensors
    actions = [exp[1] for exp in all_experiences]              # Actions are enums
    rewards = [exp[2] for exp in all_experiences]              # Rewards are scalars

    # Step 2: Convert actions to indices
    action_to_index = {action: idx for idx, action in enumerate(Action)}
    action_indices = torch.tensor([action_to_index[action] for action in actions], dtype=torch.long)

    # Step 3: Compute value targets
    value_targets = torch.tensor(rewards, dtype=torch.float32)
    
    # Step 4: Compute value loss
    value_predictions = value_net(states).squeeze()
    value_loss_fn = nn.MSELoss()
    value_loss = value_loss_fn(value_predictions, value_targets)

    # Step 5: Compute policy loss
    policy_outputs = policy_net(states)  # Shape: (batch_size, num_actions)
    
    action_probs_taken = policy_outputs.gather(1, action_indices.unsqueeze(1)).squeeze()
    log_probs = torch.log(action_probs_taken + 1e-10)
    with torch.no_grad():
        advantages = value_targets - value_predictions.detach()
    policy_loss = - (log_probs * advantages).mean()
    
    # Optional: Add entropy regularization to encourage exploration
    entropy = - (policy_outputs * torch.log(policy_outputs + 1e-10)).sum(dim=1).mean()
    entropy_coef = 0.01  # Adjust as needed
    policy_loss = policy_loss - entropy_coef * entropy

    # Step 6: Update the networks
    # Update value network
    value_optimizer.zero_grad()
    value_loss.backward()
    value_optimizer.step()

    # Update policy network
    policy_optimizer.zero_grad()
    policy_loss.backward()
    policy_optimizer.step()
    
    # Optionally, print progress
    if (episode + 1) % 100 == 0:
        print(f"Completed episode {episode + 1}/{NUM_EPISODES}")

  return F.mse_loss(input, target, reduction=self.reduction)


Completed episode 100/500
Completed episode 200/500
Completed episode 300/500
Completed episode 400/500
Completed episode 500/500


In [13]:
# After training is complete
torch.save(policy_net.state_dict(), 'policy_net.pth')
torch.save(value_net.state_dict(), 'value_net.pth')

In [14]:
Congratulations on getting your training loop running! Now, let's focus on how to use your trained AI model to play a game of Texas Hold'em, either against another AI or a human player.

---

### *Overview*

To implement your AI and play against it, you'll need to:

1. *Save the Trained Models*: Save your policy_net and value_net after training.

2. *Load the Models*: In a new script or the same script, load the saved models.

3. *Set Up the Game Loop*: Create a game loop where the AI can interact with other players.

4. *Provide Necessary Inputs*: Ensure that the AI receives the required inputs at each turn.

5. *Integrate the AI's Decisions*: Use the policy_net to make decisions based on the current game state.

6. *Handle User Input*: If playing against a human, capture the user's actions.

7. *Maintain Game State*: Keep track of blinds, bets, community cards, and player stacks.

---

### *Step 1: Save the Trained Models*

After your training loop completes, save your models using PyTorch's torch.save function.

python
# After training is complete
torch.save(policy_net.state_dict(), 'policy_net.pth')
torch.save(value_net.state_dict(), 'value_net.pth')


You can add this code at the end of your training script.

---

### *Step 2: Load the Models*

In your new script (or a new cell if you're using a notebook), load the saved models.

python
import torch

# Define MAX_FEATURE_LENGTH and Action enum as before
MAX_FEATURE_LENGTH = 25

class Action(Enum):
    FOLD = 0
    CHECK = 1
    CALL = 2
    RAISE = 3
    BET = 4

# Initialize neural networks
policy_net = PolicyNetwork(input_size=MAX_FEATURE_LENGTH, action_space=len(Action))
value_net = ValueNetwork(input_size=MAX_FEATURE_LENGTH)

# Load the saved models
policy_net.load_state_dict(torch.load('policy_net.pth'))
value_net.load_state_dict(torch.load('value_net.pth'))

# Set networks to evaluation mode
policy_net.eval()
value_net.eval()


---

### *Step 3: Set Up the Game Loop*

Create a game loop where the AI can play against another player (AI or human). We'll assume a two-player game: the AI and a human player.

python
# Initialize players
ai_player = Player(id=0, stack=1000)
human_player = Player(id=1, stack=1000)
players = [ai_player, human_player]

# Initialize the game
game = TexasHoldEm(players)
game.deal_hole_cards()
game.post_blinds()


---

### *Step 4: Provide Necessary Inputs to the AI*

At each turn, you need to construct the BeliefState for the AI and extract features for the policy_net.

#### *Constructing the BeliefState*

python
ai_belief_state = BeliefState(
    observed_actions=[],   # Actions observed so far
    public_cards=game.board,
    pot_size=game.pot
)


Update the ai_belief_state as the game progresses by appending actions and updating the pot size.

---

### *Step 5: Integrate the AI's Decisions*

Create a function for the AI to decide on an action based on the current game state.

python
def ai_decide_action(ai_player, game, ai_belief_state):
    # Extract features
    features = extract_features(ai_belief_state)
    if features.dim() == 1:
        features = features.unsqueeze(0)
    
    # Get action probabilities
    with torch.no_grad():
        action_probs = policy_net(features)
    action_probs = action_probs.squeeze(0)
    
    # Get available actions
    valid_actions = game.get_available_actions(ai_player)
    
    # Choose an action
    action = sample_action(action_probs, valid_actions)
    
    # Determine raise amount if necessary
    if action in [Action.BET, Action.RAISE]:
        raise_amount = determine_raise_amount(ai_player, game)
        game.execute_action(ai_player, action, raise_amount=raise_amount)
    else:
        game.execute_action(ai_player, action)
    
    return action


---

### *Step 6: Handle User Input*

For the human player, prompt them to enter their action.

python
def human_decide_action(human_player, game):
    valid_actions = game.get_available_actions(human_player)
    print(f"Your hand: {human_player.hand}")
    print(f"Community cards: {game.board}")
    print(f"Pot size: {game.pot}")
    print(f"Your stack: {human_player.stack}")
    print(f"Current bet: {game.current_bet}")
    print(f"Valid actions: {[action.name for action in valid_actions]}")
    
    action = None
    while action not in valid_actions:
        action_input = input("Enter your action: ").upper()
        try:
            action = Action[action_input]
            if action not in valid_actions:
                print("Invalid action. Please choose from the valid actions.")
        except KeyError:
            print("Invalid action name. Please try again.")
    
    # Determine raise amount if necessary
    if action in [Action.BET, Action.RAISE]:
        raise_amount = float(input("Enter raise amount: "))
        game.execute_action(human_player, action, raise_amount=raise_amount)
    else:
        game.execute_action(human_player, action)
    
    return action


---

### *Step 7: Update Belief States and Game State*

After each action, update the belief states for both players.

python
def update_belief_states(action, game, ai_belief_state, human_belief_state):
    # Update AI's belief state
    ai_belief_state.update(
        action,
        new_public_cards=game.board,
        pot_size=game.pot
    )
    
    # Update human player's belief state if needed
    # For simplicity, we can skip this if not used


---

### *Step 8: Complete the Game Loop*

Now, integrate everything into a game loop.

python
# Initialize belief states
ai_belief_state = BeliefState(
    observed_actions=[],
    public_cards=game.board,
    pot_size=game.pot
)

# For the human player, you can have a similar belief state if needed
human_belief_state = None  # Not required unless you need it

done = False

while not done:
    current_player = game.get_current_player()
    
    if current_player == ai_player:
        action = ai_decide_action(ai_player, game, ai_belief_state)
        print(f"AI chooses to {action.name}")
    else:
        action = human_decide_action(human_player, game)
    
    # Update belief states
    update_belief_states(action, game, ai_belief_state, human_belief_state)
    
    # Check for end of round/game
    if game.is_round_over():
        game.progress_round()
    if game.is_game_over():
        game.determine_winner()
        done = True  # End the game loop


---

### *Step 9: Handling Blinds and Initial Variables*

At the start of the game, you need to handle the blinds.

python
# Decide who is small blind and big blind
small_blind_player = players[0]  # Could be randomized
big_blind_player = players[1]

game.post_blinds()

# Update AI belief state with initial blinds if necessary
ai_belief_state.observed_actions.append(Action.BET)  # Assuming blinds are considered bets


---

### *Step 10: Implementing Required Inputs*

From your requirements, the AI needs to know:

- *If it's the small blind or big blind*: You can set a flag in the Player class or pass this information to the AI.

- *Current blind to be called*: The AI can get this from game.current_bet.

- *What other players have bet*: The game.round_bets or player.current_bet can provide this information.

- *Current bid value on the table*: Available via game.current_bet.

- *Community cards*: Already included in the BeliefState.

- *Your current money/points*: Available via player.stack.

- *Your hand*: For the AI, you may need to include private cards in the features if necessary.

---

### **Enhancing the extract_features Function**

To include additional inputs, modify the extract_features function.

python
def extract_features(belief_state, player):
    features = []
    
    # Encode observed actions (same as before)
    # ...
    
    # Encode public cards (same as before)
    # ...
    
    # Encode player's stack
    features.append(player.stack / 1000)  # Normalize if needed
    
    # Encode current bet
    features.append(player.current_bet / 1000)
    
    # Encode player's hand (if you choose to include private cards)
    # For example, encode ranks and suits of hole cards
    hole_card_features = []
    rank_encoding = {r: i for i, r in enumerate(range(2, 15), start=1)}
    suit_encoding = {'h': 0, 'd': 1, 'c': 2, 's': 3}
    for rank, suit in player.hand:
        rank_feature = rank_encoding.get(rank, 0)
        suit_feature = suit_encoding.get(suit, 0)
        hole_card_features.extend([rank_feature, suit_feature])
    features.extend(hole_card_features)
    
    # Ensure total feature length matches MAX_FEATURE_LENGTH
    # Pad with zeros if necessary
    features = features[:MAX_FEATURE_LENGTH] + [0] * max(0, MAX_FEATURE_LENGTH - len(features))
    
    # Convert to tensor
    features = torch.tensor(features, dtype=torch.float32)
    return features


---

### *Adjusting MAX_FEATURE_LENGTH*

After adding new features, ensure that MAX_FEATURE_LENGTH is updated to match the total number of features.

For example, if you've added:

- 2 features for the player's stack and current bet
- 4 features for the hole cards (2 cards * 2 features each)

Update MAX_FEATURE_LENGTH accordingly.

---

### *Saving and Loading Models*

As you mentioned, you might want to have a separate Python file to run the game. Ensure you have the necessary code to load the models in that file.

---

### *Putting It All Together*

Here's a simplified version of the game loop:

python
def play_game():
    # Initialize players
    ai_player = Player(id=0, stack=1000)
    human_player = Player(id=1, stack=1000)
    players = [ai_player, human_player]
    
    # Initialize the game
    game = TexasHoldEm(players)
    game.deal_hole_cards()
    game.post_blinds()
    
    # Initialize belief states
    ai_belief_state = BeliefState(
        observed_actions=[],
        public_cards=game.board,
        pot_size=game.pot
    )
    
    done = False
    while not done:
        current_player = game.get_current_player()
        
        if current_player == ai_player:
            action = ai_decide_action(ai_player, game, ai_belief_state)
            print(f"AI chooses to {action.name}")
        else:
            action = human_decide_action(human_player, game)
        
        # Update belief states
        update_belief_states(action, game, ai_belief_state, None)
        
        # Check for end of round/game
        if game.is_round_over():
            game.progress_round()
        if game.is_game_over():
            game.determine_winner()
            done = True

if __name__ == "__main__":
    play_game()


---

### *Handling Multiple Players*

If you plan to extend this to more than two players, you'll need to adjust the code accordingly.

---

### *Final Notes*

- *Model Evaluation Mode*: Ensure that you set your models to evaluation mode using model.eval() to disable dropout and batch normalization during inference.

- *Error Handling*: Add error handling for invalid inputs and edge cases.

- *User Interface*: For a better user experience, consider creating a simple GUI or using a library like curses for a terminal-based interface.

- *Testing*: Thoroughly test your game loop to ensure that the game logic works as expected.

---

### *Example: Saving and Loading Models*

*Saving Models:*

python
# After training
torch.save({
    'policy_net_state_dict': policy_net.state_dict(),
    'value_net_state_dict': value_net.state_dict(),
}, 'poker_ai_model.pth')


*Loading Models:*

python
# In your game script
checkpoint = torch.load('poker_ai_model.pth')
policy_net.load_state_dict(checkpoint['policy_net_state_dict'])
value_net.load_state_dict(checkpoint['value_net_state_dict'])
policy_net.eval()
value_net.eval()


---

### *Conclusion*

By following these steps, you can set up a game where you play against your trained AI. Adjust the code to fit your specific needs and consider adding more features to enhance the gameplay experience.

If you have any questions or need further assistance with specific parts of the implementation, feel free to ask!

SyntaxError: unterminated string literal (detected at line 7) (3913444657.py, line 7)

In [None]:
It seems you're encountering two main issues:

1. *Input Not Recognized*: When you input actions like "FOLD", "fold", or "0", the code outputs "Invalid action name. Please try again."

2. *Desired Functionality*: You want to provide specific inputs to the AI—such as your hand, community cards, and opponent's actions—and have the AI suggest plays for you, rather than playing a full game where the AI assumes random cards.

Let's address each issue step by step.

---

### *Issue 1: Input Not Recognized*

#### *Problem Analysis*

The error occurs because there are multiple definitions of the Action enum in your code, leading to inconsistencies. Specifically:

- You define Action in both modules.py and Cell 2 of your 5.ipynb.
- When you compare actions or try to create an Action from user input, the code may be using different Action definitions, causing comparisons to fail and exceptions to be raised.

#### *Solution*

1. *Remove Redundant Definitions*: Remove the Action enum definition from Cell 2 in your 5.ipynb.

2. **Import Action from modules**: Ensure that you import Action from modules.py wherever it's needed.

3. **Update Imports in 5.ipynb**

   In Cell 2, modify your code as follows:

   python
   import torch

   # Remove this definition
   # class Action(Enum):
   #     FOLD = 0
   #     CHECK = 1
   #     CALL = 2
   #     RAISE = 3
   #     BET = 4

   # Import Action and MAX_FEATURE_LENGTH from modules
   from modules import Action, MAX_FEATURE_LENGTH

   # Initialize neural networks
   from modules import PolicyNetwork, ValueNetwork
   policy_net = PolicyNetwork(input_size=MAX_FEATURE_LENGTH, action_space=len(Action))
   value_net = ValueNetwork(input_size=MAX_FEATURE_LENGTH)

   # Load the saved models
   policy_net.load_state_dict(torch.load('policy_net.pth'))
   value_net.load_state_dict(torch.load('value_net.pth'))

   # Set networks to evaluation mode
   policy_net.eval()
   value_net.eval()
   

4. *Ensure Consistency in All Cells*

   - In any cell where you use Action, import it from modules:

     python
     from modules import Action
     

5. **Modify human_decide_action Function**

   To improve user input handling and allow the user to input either the action name or index, modify Cell 6 as follows:

   python
   def human_decide_action(human_player, game):
       valid_actions = game.get_available_actions(human_player)
       print(f"Your hand: {human_player.hand}")
       print(f"Community cards: {game.board}")
       print(f"Pot size: {game.pot}")
       print(f"Your stack: {human_player.stack}")
       print(f"Current bet: {game.current_bet}")
       # Map valid actions to indices
       valid_actions_list = list(enumerate([action.name for action in valid_actions]))
       print("Valid actions:")
       for idx, action_name in valid_actions_list:
           print(f"{idx}: {action_name}")
       
       action = None
       while action not in valid_actions:
           action_input = input("Enter your action (number or name): ").upper()
           try:
               # Try to parse as integer index
               if action_input.isdigit():
                   idx = int(action_input)
                   if 0 <= idx < len(valid_actions):
                       action = valid_actions[idx]
                   else:
                       print("Invalid action index. Please choose from the valid actions.")
               else:
                   # Try to parse as action name
                   action = Action[action_input]
                   if action not in valid_actions:
                       print("Invalid action. Please choose from the valid actions.")
           except (KeyError, ValueError):
               print("Invalid action name or index. Please try again.")
       
       # Determine raise amount if necessary
       if action in [Action.BET, Action.RAISE]:
           raise_amount = float(input("Enter raise amount: "))
           game.execute_action(human_player, action, raise_amount=raise_amount)
       else:
           game.execute_action(human_player, action)
       
       return action
   

   *Explanation*:

   - This function now accepts either the action name (e.g., "FOLD") or the action index (e.g., "0").
   - It prints the valid actions with their indices for clarity.

---

### *Issue 2: Desired Functionality*

You want to provide specific inputs to the AI—such as your hand, community cards, and opponent's actions—and have the AI suggest plays for you. Essentially, you want to test the AI in specific scenarios rather than playing a full game.

#### *Solution Overview*

We'll create a function that allows you to input:

- *Your Hand*: The cards you're holding.
- *Community Cards*: The cards on the table.
- *Observed Actions*: Actions taken by the opponent so far.
- *Pot Size*: Current pot size.
- *Your Stack and Current Bet*: Your remaining chips and current bet.
- *Valid Actions*: Actions you can take in the current state.

Based on these inputs, the AI will suggest an action.

#### *Implementing the Solution*

1. **Modify extract_features to Include Hand**

   Update the extract_features function in modules.py to include the player's hand:

   python
   def extract_features(belief_state):
       features = []
       
       # Encode observed actions
       action_encoding = {
           Action.FOLD: 0,
           Action.CHECK: 1,
           Action.CALL: 2,
           Action.RAISE: 3,
           Action.BET: 4
       }
       max_history_length = 12
       action_features = [action_encoding[action] for action in belief_state.observed_actions[-max_history_length:]]
       action_features += [0] * (max_history_length - len(action_features))
       features.extend(action_features)
       
       # Encode public cards
       rank_encoding = {r: i for i, r in enumerate(range(2, 15), start=1)}
       suit_encoding = {'H': 0, 'D': 1, 'C': 2, 'S': 3}
       max_board_cards = 5
       board_features = []
       for rank, suit in belief_state.public_cards:
           rank_feature = rank_encoding.get(rank, 0)
           suit_feature = suit_encoding.get(suit, 0)
           board_features.extend([rank_feature, suit_feature])
       while len(board_features) < max_board_cards * 2:
           board_features.extend([0, 0])
       features.extend(board_features)
       
       # Encode private cards (player's hand)
       hand_features = []
       if belief_state.private_cards is not None:
           for rank, suit in belief_state.private_cards:
               rank_feature = rank_encoding.get(rank, 0)
               suit_feature = suit_encoding.get(suit, 0)
               hand_features.extend([rank_feature, suit_feature])
       else:
           hand_features.extend([0, 0, 0, 0])  # Assuming 2 hole cards
       features.extend(hand_features)
       
       # Encode pot size
       features.append(belief_state.pot_size / 1000)  # Normalize pot size
       
       # Ensure total feature length matches MAX_FEATURE_LENGTH
       features = features[:MAX_FEATURE_LENGTH] + [0] * max(0, MAX_FEATURE_LENGTH - len(features))
       
       # Convert to tensor
       features = torch.tensor(features, dtype=torch.float32)
       return features
   

2. *Create a Function to Suggest Actions*

   In 5.ipynb, create a new function ai_suggest_action:

   python
   def ai_suggest_action(
       player_hand,
       community_cards,
       observed_actions,
       pot_size,
       player_stack,
       player_current_bet,
       current_bet,
       valid_actions
   ):
       # Create a belief state
       belief_state = BeliefState(
           observed_actions=observed_actions,
           public_cards=community_cards,
           pot_size=pot_size
       )
       belief_state.private_cards = player_hand
       
       # Create a dummy player object
       player = Player(id=0, stack=player_stack)
       player.hand = player_hand
       player.current_bet = player_current_bet
       player.folded = False
       
       # Extract features
       features = extract_features(belief_state)
       if features.dim() == 1:
           features = features.unsqueeze(0)
       
       # Get action probabilities
       with torch.no_grad():
           action_probs = policy_net(features)
       action_probs = action_probs.squeeze(0)
       
       # Choose an action
       action = sample_action(action_probs, valid_actions)
       return action
   

3. *Use the Function with Custom Inputs*

   In a new cell, input your desired parameters:

   python
   # Sample inputs
   from modules import sample_action, extract_features, determine_raise_amount, Action

   # Input your hand
   player_hand = [(14, 'H'), (14, 'D')]  # For example, Ace of Hearts and Ace of Diamonds

   # Input community cards
   community_cards = [(10, 'C'), (11, 'D'), (12, 'S'), (13, 'H'), (9, 'C')]  # For example

   # Input observed actions (opponent's actions)
   observed_actions = [Action.CALL, Action.RAISE]

   # Input pot size, your stack, and current bets
   pot_size = 500
   player_stack = 1000
   player_current_bet = 0
   current_bet = 100  # The amount you need to call

   # Define valid actions in the current state
   valid_actions = [Action.FOLD, Action.CALL, Action.RAISE]

   # Get the AI's suggested action
   suggested_action = ai_suggest_action(
       player_hand=player_hand,
       community_cards=community_cards,
       observed_actions=observed_actions,
       pot_size=pot_size,
       player_stack=player_stack,
       player_current_bet=player_current_bet,
       current_bet=current_bet,
       valid_actions=valid_actions
   )

   print(f"AI suggests action: {suggested_action.name}")
   

   *Explanation*:

   - *Player Hand*: Your cards.
   - *Community Cards*: Cards on the table.
   - *Observed Actions*: Actions taken by the opponent.
   - *Pot Size*: Current total pot.
   - *Player Stack and Current Bet*: Your remaining chips and the amount you've currently bet.
   - *Current Bet*: The amount you need to call to stay in the hand.
   - *Valid Actions*: Actions you can take in this situation.

4. **Adjust MAX_FEATURE_LENGTH If Necessary**

   Ensure that MAX_FEATURE_LENGTH in both modules.py and 5.ipynb matches the length of your feature vector after including the player's hand.

5. **Update BeliefState in modules.py**

   Ensure that the BeliefState class in modules.py includes private_cards:

   python
   class BeliefState:
       def __init__(self, observed_actions, public_cards, pot_size=0):
           self.observed_actions = observed_actions
           self.public_cards = public_cards
           self.private_cards = None
           self.pot_size = pot_size

       def update(self, action, new_public_cards=None, pot_size=None):
           self.observed_actions.append(action)
           if new_public_cards is not None:
               self.public_cards = new_public_cards
           if pot_size is not None:
               self.pot_size = pot_size
   

---

### *Summary*

- *Fix Input Recognition*: Ensure that you use the same Action enum throughout your code by importing it from modules.py and removing duplicate definitions.

- *Implement Desired Functionality*: Create a function that allows you to input specific game parameters and get the AI's suggested action based on those inputs.

- *Adjust Feature Extraction*: Modify extract_features to include your hand and any additional information you want the AI to consider.

---

### *Additional Tips*

- *Test with Different Scenarios*: Try different hand combinations, community cards, and observed actions to see how the AI responds.

- *Validate Inputs*: Ensure that the inputs you provide are valid within the game's rules (e.g., valid card ranks and suits).

- *Expand Valid Actions*: Adjust the valid_actions list based on the game's state (e.g., you can't CHECK if there's a bet you need to call).

---

### *Conclusion*

By making these adjustments, you'll be able to provide specific inputs to the AI and have it suggest actions based on the game state you define. This setup allows you to test the AI's decision-making in various scenarios and better understand its strategy.

If you have any further questions or need assistance with specific parts of the implementation, feel free to ask!

In [None]:
I understand that you're experiencing a few issues:

1. *Understanding the output of Cell 9*.
2. *Wanting to input your own hand rather than having it randomly generated*.
3. **An error occurring before the game finishes: KeyError: 'D'**.

Let's address each of these concerns step by step.

---

### *1. Understanding the Output of Cell 9*

*Cell 9 Output:*

AI suggests action: FOLD


*Explanation:*

- *Purpose of Cell 9*: This cell is designed to test the ai_suggest_action function by providing it with specific inputs. It simulates a scenario where you provide the AI with certain game parameters, and it outputs the suggested action based on those inputs.

- *No Interaction Needed*: You don't need to interact with Cell 9. It's not expecting any input from you during execution. It simply runs the function with the hardcoded sample inputs and prints the AI's suggested action.

- *Result Interpretation: The AI, given the specific inputs (e.g., your hand, community cards, observed actions, pot size, etc.), has determined that the best action is to **FOLD*. This is based on the model's current state and training.

*Why Did the AI Suggest "FOLD"?*

- The AI's suggestion is based on its learned policy. It's possible that, given the inputs, it evaluated the situation as unfavorable and recommended folding.

- Since the AI is trained based on your code and training data, its decisions are influenced by that training. If you believe the AI should have suggested a different action, you may need to revisit the training data or model parameters.

---

### *2. Inputting Your Own Hand Instead of Randomly Generating It*

*Current Behavior:*

- In *Cell 10*, the game is set up to deal random hands to players using the game.deal_hole_cards() method.

*Your Requirement:*

- You want to specify your own hand rather than having it randomly assigned.

*Solution:*

To allow you to input your own hand, we need to modify the code to set the player's hand manually.

#### *Steps to Modify the Code:*

1. **Remove or Comment Out the deal_hole_cards() Method Call**

   In *Cell 3*, comment out or remove the line:

   python
   game.deal_hole_cards()
   

   We will assign the hands manually instead.

2. *Assign Hands Manually*

   After initializing the game, set the hand attribute of each player to your desired cards.

   python
   # Input your desired hands
   ai_player.hand = [(14, 'H'), (14, 'D')]  # AI player's hand (e.g., Ace of Hearts and Ace of Diamonds)
   human_player.hand = [(12, 'D'), (8, 'S')]  # Your hand (e.g., Queen of Diamonds and 8 of Spades)
   

   *Note:* Ensure that the cards you assign are not duplicated and are removed from the deck if the game logic depends on the deck.

3. *Update the Deck to Remove Dealt Cards*

   Since we're manually assigning cards, we need to make sure these cards are removed from the deck to avoid duplicates.

   python
   # Remove assigned cards from the deck
   assigned_cards = ai_player.hand + human_player.hand
   for card in assigned_cards:
       if card in game.deck:
           game.deck.remove(card)
   

4. *Initialize the Belief States with Private Cards*

   When creating the BeliefState for the AI, include the AI's hand:

   python
   ai_belief_state = BeliefState(
       observed_actions=[],
       public_cards=game.board,
       pot_size=game.pot
   )
   ai_belief_state.private_cards = ai_player.hand
   

5. *Adjust the Feature Extraction if Necessary*

   Ensure that extract_features includes the AI's private cards, which you've already updated.

6. *Proceed with the Game Loop*

   Now, when you run *Cell 10*, the game will use the hands you've assigned rather than random ones.

#### *Updated Code Example for Cell 3:*

python
# Initialize players
from modules import Player
ai_player = Player(id=0, stack=1000)
human_player = Player(id=1, stack=1000)
players = [ai_player, human_player]

# Initialize the game
from modules import TexasHoldEm
game = TexasHoldEm(players)
# game.deal_hole_cards()  # Commented out since we're assigning hands manually
game.post_blinds()

# Assign hands manually
ai_player.hand = [(14, 'H'), (14, 'D')]  # AI player's hand
human_player.hand = [(12, 'D'), (8, 'S')]  # Human player's hand

# Remove assigned cards from the deck
assigned_cards = ai_player.hand + human_player.hand
for card in assigned_cards:
    if card in game.deck:
        game.deck.remove(card)


#### *Update Cell 4:*

Include the AI's private cards in the belief state:

python
from modules import BeliefState
ai_belief_state = BeliefState(
    observed_actions=[],
    public_cards=game.board,
    pot_size=game.pot
)
ai_belief_state.private_cards = ai_player.hand


---

### **3. Error Occurring Before the Game Finishes: KeyError: 'D'**

*Error Message:*


KeyError: 'D'


*Traceback:*


File c:\...treys\card.py:70, in Card.new(string)
     suit_int = Card.CHAR_SUIT_TO_INT_SUIT[suit_char]


*Explanation:*

- The error occurs when the game tries to determine the winner and uses the treys library to evaluate the hands.

- The treys library expects suit characters to be one of 's', 'h', 'd', 'c' (lowercase letters).

- In your code, suits are represented with uppercase letters ('H', 'D', 'C', 'S'), which causes a KeyError when the treys library attempts to look up the suit.

*Solution:*

We need to ensure that the suit characters are in lowercase and match the expected values in the treys library.

#### *Steps to Fix the Error:*

1. **Modify the rank_to_str Function to Return Lowercase Suit Characters**

   Update the rank_to_str function in modules.py to return lowercase suits.

   **Current rank_to_str Function:**

   python
   def rank_to_str(self, rank):
       if rank == 14:
           return 'A'
       elif rank == 13:
           return 'K'
       elif rank == 12:
           return 'Q'
       elif rank == 11:
           return 'J'
       elif rank == 10:
           return 'T'
       else:
           return str(rank)
   

   *Modification:*

   Since the issue is with the suits, we need a function to convert suits to lowercase and to the expected characters.

2. **Create a suit_to_str Function**

   Add a function to convert the suit to the appropriate character.

   python
   def suit_to_str(self, suit):
       suit_mapping = {
           'H': 'h',  # Hearts
           'D': 'd',  # Diamonds
           'C': 'c',  # Clubs
           'S': 's'   # Spades
       }
       return suit_mapping.get(suit.upper(), 'Invalid')
   

3. **Update the determine_winner Method**

   In the determine_winner method, update the code where you create the cards for the treys library.

   *Original Code:*

   python
   hand = [Card.new(f"{self.rank_to_str(rank)}{suit}") for rank, suit in player.hand]
   board = [Card.new(f"{self.rank_to_str(rank)}{suit}") for rank, suit in self.board]
   

   *Modified Code:*

   python
   hand = [Card.new(f"{self.rank_to_str(rank)}{self.suit_to_str(suit)}") for rank, suit in player.hand]
   board = [Card.new(f"{self.rank_to_str(rank)}{self.suit_to_str(suit)}") for rank, suit in self.board]
   

4. *Ensure Ranks Are Correctly Converted*

   Double-check that rank_to_str correctly converts numerical ranks to the expected characters for the treys library.

5. *Handle Invalid Suits*

   In the suit_to_str function, if the suit is invalid, you can raise an error or handle it appropriately.

#### **Updated Code Snippet in modules.py:**

python
def rank_to_str(self, rank):
    if rank == 14:
        return 'A'
    elif rank == 13:
        return 'K'
    elif rank == 12:
        return 'Q'
    elif rank == 11:
        return 'J'
    elif rank == 10:
        return 'T'
    else:
        return str(rank)

def suit_to_str(self, suit):
    suit_mapping = {
        'H': 'h',  # Hearts
        'D': 'd',  # Diamonds
        'C': 'c',  # Clubs
        'S': 's'   # Spades
    }
    return suit_mapping.get(suit.upper(), 'Invalid')

def determine_winner(self):
    # If only one player remains
    active_players = [p for p in self.players if not p.folded]
    if len(active_players) == 1:
        winner = active_players[0]
        winner.stack += self.pot
        print(f"Player {winner.id} wins the pot of {self.pot} by default.")
        self.pot = 0
        return

    # Showdown: compare hands
    from treys import Evaluator, Card
    evaluator = Evaluator()
    best_rank = None
    winners = []
    for player in active_players:
        hand = [Card.new(f"{self.rank_to_str(rank)}{self.suit_to_str(suit)}") for rank, suit in player.hand]
        board = [Card.new(f"{self.rank_to_str(rank)}{self.suit_to_str(suit)}") for rank, suit in self.board]
        rank = evaluator.evaluate(board, hand)
        if best_rank is None or rank < best_rank:
            best_rank = rank
            winners = [player]
        elif rank == best_rank:
            winners.append(player)
    # Split the pot among winners
    split_pot = self.pot / len(winners)
    for winner in winners:
        winner.stack += split_pot
        print(f"Player {winner.id} wins {split_pot} from the pot.")
    self.pot = 0


---

### *Additional Notes*

#### *Regarding Cell 10*

- *Purpose of Cell 10*: It runs the game loop, allowing you to play a game against the AI.

- *Interactions*: You are expected to input your actions when prompted.

- *Valid Actions Input*: With the updated human_decide_action function, you can enter either the action name (e.g., "FOLD") or the index number (e.g., "0").

#### *Testing the Updated Code*

1. **Save Changes to modules.py**: Make sure you save the updated modules.py file after making the changes.

2. *Restart Your Notebook Kernel*: Restart the kernel in your Jupyter notebook to ensure that the updated modules.py is loaded.

3. *Reload the Module*:

   In *Cell 1* (or a new cell), add:

   python
   import importlib
   import modules
   importlib.reload(modules)
   

4. *Re-import Necessary Classes and Functions*:

   Update your imports to ensure you're using the latest versions:

   python
   from modules import (
       Action,
       MAX_FEATURE_LENGTH,
       PolicyNetwork,
       ValueNetwork,
       Player,
       TexasHoldEm,
       BeliefState,
       extract_features,
       sample_action,
       determine_raise_amount
   )
   

5. *Run All Cells in Order*: Execute each cell sequentially after making the necessary changes.

---

### *Summary*

- *Cell 9 Output*: The AI suggested "FOLD" based on the inputs provided. No interaction is required in this cell.

- *Inputting Your Own Hand*: Modify the code to assign your desired hand to the player and remove those cards from the deck to prevent duplicates.

- *Error Before Game Finishes*: The KeyError: 'D' occurs because the treys library expects lowercase suit characters. Update the code to convert suit characters to the expected format.

---

### *Final Thoughts*

By making these changes, you should be able to:

- Provide your own hand and play against the AI with known cards.

- Avoid errors related to suit characters when determining the winner.

- Proceed with the game until it reaches a conclusion.

---

If you encounter any further issues or have additional questions, please feel free to ask, and I'll be happy to assist you further!

In [None]:
Certainly! To modify your existing Jupyter Notebook to continuously accept inputs at each turn and have the AI compute and suggest actions using your neural network model, follow the steps below. This modification will enable the AI to operate in an infinite loop, processing your inputs and providing actions until you decide to exit.

### Overview of Changes

1. **Create an Infinite Loop for AI Decision-Making:**
   - This loop will continuously prompt you for the necessary inputs at each turn.
   - It will utilize the `ai_suggest_action` function to compute the preferred action based on your inputs.
   - The loop will run indefinitely until you choose to exit.

2. **Handle Input Validation:**
   - Ensure that all inputs are correctly formatted to prevent runtime errors.
   - Provide clear prompts and feedback for incorrect inputs.

3. **Address the Existing Error:**
   - The `KeyError: 'D'` suggests an issue with card suit encoding. Ensure that all suits are correctly mapped in the `Card.new` function.

### Step-by-Step Implementation

#### **1. Modify or Add a New Cell for the Infinite Loop**

Add a new cell (e.g., **Cell 11**) at the end of your notebook with the following code. This cell will handle the continuous input-output loop for the AI's decision-making.

```python
# Cell 11: Infinite Loop for AI Decision-Making

import sys

def input_hand(prompt):
    """
    Prompts the user to input their hand cards.
    Expected format: '14H 14D' for Ace of Hearts and Ace of Diamonds.
    """
    while True:
        hand_input = input(prompt)
        try:
            cards = hand_input.strip().split()
            if len(cards) != 2:
                print("Please enter exactly two cards, e.g., 14H 14D")
                continue
            hand = []
            for card in cards:
                rank = int(card[:-1])
                suit = card[-1].upper()
                if suit not in ['H', 'D', 'C', 'S']:
                    raise ValueError
                hand.append((rank, suit))
            return hand
        except ValueError:
            print("Invalid card format. Please enter as RankSuit, e.g., 14H for Ace of Hearts.")

def input_community_cards():
    """
    Prompts the user to input community cards.
    Expected format: '10C 11D 12S 13H 9C'
    """
    while True:
        community_input = input("Enter community cards separated by space (e.g., 10C 11D 12S 13H 9C), or press Enter if none: ")
        if not community_input.strip():
            return []
        try:
            cards = community_input.strip().split()
            community = []
            for card in cards:
                rank = int(card[:-1])
                suit = card[-1].upper()
                if suit not in ['H', 'D', 'C', 'S']:
                    raise ValueError
                community.append((rank, suit))
            if len(community) > 5:
                print("Maximum of 5 community cards allowed.")
                continue
            return community
        except ValueError:
            print("Invalid card format. Please enter as RankSuit, e.g., 10C for Ten of Clubs.")

def input_actions(prompt):
    """
    Prompts the user to input a list of valid actions.
    Expected format: 'FOLD CALL RAISE'
    """
    while True:
        actions_input = input(prompt)
        try:
            action_names = actions_input.strip().upper().split()
            actions = [Action[action] for action in action_names]
            if not actions:
                print("Please enter at least one action.")
                continue
            return actions
        except KeyError:
            print("Invalid action name. Valid actions are: FOLD, CHECK, CALL, RAISE, BET.")

def input_observed_actions():
    """
    Prompts the user to input a list of observed opponent actions.
    Expected format: 'CALL RAISE'
    """
    while True:
        actions_input = input("Enter observed opponent actions separated by space (e.g., CALL RAISE), or press Enter if none: ")
        if not actions_input.strip():
            return []
        try:
            action_names = actions_input.strip().upper().split()
            actions = [Action[action] for action in action_names]
            return actions
        except KeyError:
            print("Invalid action name. Valid actions are: FOLD, CHECK, CALL, RAISE, BET.")

def input_float(prompt):
    """
    Prompts the user to input a floating-point number.
    """
    while True:
        val = input(prompt)
        try:
            return float(val)
        except ValueError:
            print("Please enter a valid number.")

def main_loop():
    """
    Main loop that continuously takes inputs and outputs AI's suggested action.
    """
    while True:
        print("\n--- New Turn ---")
        try:
            player_stack = input_float("Enter your stack: ")
            opponent_actions = input_observed_actions()
            player_hand = input_hand("Enter your hand (e.g., 14H 14D): ")
            community_cards = input_community_cards()
            valid_actions = input_actions("Enter valid actions separated by space (e.g., FOLD CALL RAISE): ")
            current_bet = input_float("Enter current bet: ")
            pot_size = input_float("Enter pot size: ")
            player_current_bet = input_float("Enter your current bet: ")

            # Get the AI's suggested action
            suggested_action = ai_suggest_action(
                player_hand=player_hand,
                community_cards=community_cards,
                observed_actions=opponent_actions,
                pot_size=pot_size,
                player_stack=player_stack,
                player_current_bet=player_current_bet,
                current_bet=current_bet,
                valid_actions=valid_actions
            )
            
            print(f"AI suggests action: {suggested_action.name}")

            # Option to exit
            exit_input = input("Do you want to exit? (y/n): ").lower()
            if exit_input == 'y':
                print("Exiting...")
                break
        except Exception as e:
            print(f"An error occurred: {e}")
            print("Restarting the turn...")

# Start the main loop
main_loop()
```

#### **2. Explanation of the Added Code**

- **Input Functions:**
  - `input_hand(prompt)`: Prompts the user to input their two hole cards in the format `RankSuit` (e.g., `14H` for Ace of Hearts).
  - `input_community_cards()`: Allows the user to input up to five community cards or leave it empty if there are none.
  - `input_actions(prompt)`: Prompts the user to input a list of valid actions available in the current turn (e.g., `FOLD CALL RAISE`).
  - `input_observed_actions()`: Lets the user input a sequence of opponent actions observed so far.
  - `input_float(prompt)`: Ensures that numerical inputs like stack size, current bet, and pot size are correctly entered as floating-point numbers.

- **`main_loop()` Function:**
  - This function encapsulates the infinite loop that handles each turn.
  - It sequentially prompts the user for all necessary inputs.
  - It calls the `ai_suggest_action` function with the collected inputs to determine the AI's preferred action.
  - After displaying the suggested action, it asks the user if they wish to exit the loop.
  - The loop handles any unexpected errors gracefully, allowing the user to restart the turn without crashing the entire loop.

#### **3. Handling the Existing KeyError**

The error you encountered:
```
KeyError: 'D'
```
suggests that there's an issue with how the suits are being handled when converting card representations. To resolve this:

- **Ensure Consistent Suit Encoding:**
  - In your `extract_features` function, suits are encoded as `'H'`, `'D'`, `'C'`, `'S'`.
  - Verify that the `treys` library (or any other card handling library you're using) expects suits in the same format.
  
- **Modify `determine_winner` Function:**
  - Update the `determine_winner` method in your `TexasHoldEm` class to ensure that suits are correctly formatted.
  - Example modification:
    ```python
    def determine_winner(self):
        # If only one player remains
        active_players = [p for p in self.players if not p.folded]
        if len(active_players) == 1:
            winner = active_players[0]
            winner.stack += self.pot
            print(f"Player {winner.id} wins the pot of {self.pot} by default.")
            self.pot = 0
            return

        # Showdown: compare hands
        from treys import Evaluator, Card
        evaluator = Evaluator()
        best_rank = None
        winners = []
        for player in active_players:
            # Ensure that rank_to_str returns correct strings
            hand = [Card.new(f"{self.rank_to_str(rank)}{suit}") for rank, suit in player.hand]
            board = [Card.new(f"{self.rank_to_str(rank)}{suit}") for rank, suit in self.board]
            rank = evaluator.evaluate(board, hand)
            if best_rank is None or rank < best_rank:
                best_rank = rank
                winners = [player]
            elif rank == best_rank:
                winners.append(player)
        # Split the pot among winners
        split_pot = self.pot / len(winners)
        for winner in winners:
            winner.stack += split_pot
            print(f"Player {winner.id} wins {split_pot} from the pot.")
        self.pot = 0
    ```
  - **Check `rank_to_str` Function:**
    - Ensure that the `rank_to_str` method correctly converts numerical ranks to their string representations expected by the `treys` library.
    - Example:
      ```python
      def rank_to_str(self, rank):
          rank_mapping = {
              2: '2', 3: '3', 4: '4', 5: '5',
              6: '6', 7: '7', 8: '8', 9: '9',
              10: 'T', 11: 'J', 12: 'Q',
              13: 'K', 14: 'A'
          }
          return rank_mapping.get(rank, '2')  # Default to '2' if rank not found
      ```

#### **4. Running the Modified Notebook**

1. **Ensure All Dependencies Are Installed:**
   - Make sure that the `treys` library and any other dependencies are correctly installed in your environment.
   - You can install `treys` using pip:
     ```bash
     pip install treys
     ```

2. **Execute All Cells:**
   - Run all the cells in your notebook up to **Cell 11** to initialize the models, players, game, and define necessary functions.

3. **Start the Infinite Loop:**
   - Run **Cell 11** to start the infinite loop.
   - Follow the on-screen prompts to input your stack, actions, hand, community cards, etc.
   - The AI will respond with suggested actions based on your inputs.
   - To exit the loop, simply enter `'y'` when prompted.

#### **5. Example Interaction**

```
--- New Turn ---
Enter your stack: 1000
Enter observed opponent actions separated by space (e.g., CALL RAISE), or press Enter if none: CALL RAISE
Enter your hand (e.g., 14H 14D): 14H 14D
Enter community cards separated by space (e.g., 10C 11D 12S 13H 9C), or press Enter if none: 10C 11D 12S
Enter valid actions separated by space (e.g., FOLD CALL RAISE): FOLD CALL RAISE
Enter current bet: 100
Enter pot size: 500
Enter your current bet: 0
AI suggests action: RAISE
Do you want to exit? (y/n): n

--- New Turn ---
Enter your stack: 900
...
```

#### **6. Additional Recommendations**