In [None]:
import gym 
from gym import spaces
from enum import Enum
import numpy as np
import random

class Actions(Enum):
    HIT = 0
    STAND = 1
    SPLIT = 2
    DOUBLE = 3
        

In [None]:
class BlackjackEnv(gym.Env):
    def __init__(self, number_deck):
        super().__init__()
        self.number_deck = number_deck
        self.current_player_index = 0
        self.current_hand_index = 0
        self.wallet = 1000
        self.number_players = 6
        self.total_rewards = 0
        self.dealer = []
        self.hand_players = {f'player_{i}': \
            {'hands': [[]], 'value': 0, 'nb_ace': 0, 'split': False,
             'bet': 0, 'current_player': self.current_player_index == i, 'hand_playing': 0, 'reward':0, 'blackjack': False} for i in range(self.number_players)}
        self.deck = [2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10, 11] * 4 * self.number_deck 
        self.info = {card: self.deck.count(card) for card in set(self.deck)}
        self.len_deck = 13 * 4 * number_deck
        self.dealer_playing = self.current_player_index == self.number_players
        self.action_space = spaces.Discrete(4)
        self.observation_space = spaces.Dict({
            "dealer":  spaces.Dict({
                'value': spaces.Box(low=0, high=31, shape=(1,), dtype=np.int32),
                #hand represent card hand 
                'hand': spaces.Box(low=0, high=11, shape=(10,), dtype=np.int32)
                }),
            "len_deck": spaces.Discrete(self.len_deck + 1),  # Nombre de cartes restantes dans le deck
            "info": spaces.Dict({  # Comptage des cartes restantes
                card: spaces.Discrete(52 * self.number_deck + 1) for card in set(self.deck)
            }),
            "players": spaces.Dict({  # Informations détaillées pour chaque joueur
                f'player_{i}': spaces.Dict({
                    "hands": spaces.Box(low=0, high=31, shape=(5,), dtype=np.int32),  # Valeur de chaque main (on suppose jusqu'à 5 mains max)
                    "value": spaces.Box(low=0, high=31, shape=(1,), dtype=np.int32),  # Valeur de la main active
                    "nb_ace": spaces.Discrete(10),  # Nombre d'as (jusqu'à 4 ou 5 possibles par main)
                    "split": spaces.Discrete(2),  # 1 si la main est splittée, sinon 0
                    "bet": spaces.Box(low=0, high=500, shape=(1,), dtype=np.int32),  # Montant du pari (peut être ajusté selon les limites)
                    "current_player": spaces.Discrete(2),  # Indique si c'est le joueur actif
                    "hand_playing": spaces.Discrete(2),  # Index de la main active (jusqu'à 2)
                    "blackjack": spaces.Discrete(2)  # 1 si le joueur a un blackjack, sinon 0
                }) for i in range(6) # Prend en charge jusqu'à 6 joueurs
            }),
            "wallet": spaces.Box(low=0, high=1e6, shape=(1,), dtype=np.float32)
        })
    def shuffle_deck(self):
            """
            Mélange le deck de cartes.
            """
            random.shuffle(self.deck)
            return self.deck
    def time_to_shuffle(self):
        return len(self.deck) < self.len_deck // 2
    
    def reset():
        print('reset')

    
    def value_hands(self, hands):
        """
        Calculates the value of a hand taking into account aces.
        
        Parameters:
        - hands: list of cards in the hand.
        
        Returns:
        - value: value of the hand.
        """
        value = 0
        num_aces = 0
        
        for card in hands:
            if card == 11:  
                num_aces += 1
                value += 11
            else:
                value += card
    
        while value > 21 and num_aces > 0:
            value -= 10 
            num_aces -= 1
        
        return value

    def play_dealer_hand(self):
        """
        Plays the dealer's hand according to the rules.
        """
        self.dealer_value = self.value_hands(self.dealer)
        
        while self.dealer_value < 17:
            self.dealer.append(self.deck.pop())
            self.dealer_value = self.value_hands(self.dealer)
        if self.time_to_shuffle(): 
            self.shuffle_deck()
    
    def player_vs_dealer(self, reward, player_value, dealer_value, index):
        """
        Determines the reward for a player based on the comparison of the player's hand value with the dealer's hand value.
        
        Parameters:
        - reward: reward to be updated based on the comparison of the player's hand value with the dealer's hand value.
        - player_value: value of the player's hand.
        - dealer_value: value of the dealer's hand.
        
        Returns:
        - reward: updated reward based on the comparison of the player's hand value with the dealer's hand value.
        """
        reward = 0
        if player_value > 21:
            reward = -self.hand_players[f'player_{index}']['bet']
        elif dealer_value > 21 or player_value > dealer_value:
            reward = self.hand_players[f'player_{index}']['bet']
        elif player_value == dealer_value:
            reward = 0
        else:
            reward = -self.hand_players[f'player_{index}']['bet']
        return reward
        
    
    def initialize_new_game(self):    
        """
        Initializes a new game by shuffling the deck, dealing the initial hands, and setting the initial player.

        """
        self.dealer = []
        self.hand_players = {f'player_{i}': \
            {'hands': [[]], 'value': 0, 'nb_ace': 0, 'split': False,
             'bet': 0, 'current_player': self.current_player_index == i, 'hand_playing': 0, 'reward':0, 'blackjack': False} 
            for i in range(self.number_players) }
        
        for _ in range(2):
            for player in self.hand_players:
            # distribuer carte de chaque joueur 
                self.hand_players[player]['hands'][self.current_hand_index].append(self.deck.pop())
            self.dealer.append(self.deck.pop())
            
        for i in range(self.number_players):
            self.update_player_info(i)

    def _get_obs(self):          
        player_hands = [] 
        value_hands = []
        hands_split = []
        bets = []
        dealer = []
        normalized_wallet = self.wallet / 1e6  # Exemple de normalisation
        for i in range(self.number_players):
            player_hands.append(self.hand_players[f'player_{i}']['hands'])
            value_hands.append(self.hand_players[f'player_{i}']['value'])
            hands_split.append(1 if self.hand_players[f'player_{i}']['split'] else 0)
            bets.append(self.hand_players[f'player_{i}']['bet'])
        
        if not self.dealer_playing:
            dealer.append(self.dealer[0])
            dealer.append(self.value_hands([self.dealer[0]]))

        else: 
            dealer.append(self.dealer)
            dealer.append(self.value_hands(self.dealer))    
        
        return {
            "dealer": dealer,
            "len_deck": len(self.deck),
            "info": list(self.info.values()),  # Assurez-vous que self.info est une liste plate
            "player_hands": player_hands,
            "current_player_index": self.current_player_index,
            "value_hands": value_hands,
            "hands_split": hands_split,
            "bets": bets,
            "normalized_wallet": normalized_wallet,
        }

# return np.array([
#             dealer,
#             len(self.deck),
#             list(self.info.values()),  # Assurez-vous que self.info est une liste plate
#             player_hands,
#             self.current_player_index,
#             value_hands,
#             hands_split,
#             bets,
#             normalized_wallet,
#         ])         
    def end_round(self):
        """
        Calcule la récompense finale pour chaque joueur, renvoie l'observation finale et réinitialise le jeu pour une nouvelle manche.
        """
        self.total_rewards = 0  # Réinitialise les récompenses totales pour la manche
        
        # Calcule les récompenses pour chaque joueur en fonction de la main du croupier
        self.play_dealer_hand()  # Le dealer joue sa main finale
        
        for i in range(self.number_players):
            player_key = f'player_{i}'
            player_info = self.hand_players[player_key]
            
            # Calcule les gains pour chaque main (s'il y a eu un split, chaque main est évaluée individuellement)
            if player_info['split']:
                for hand in player_info['hands']:
                    reward = self.player_vs_dealer(0, self.value_hands(hand), self.dealer_value, i)
                    self.total_rewards += reward
            else:
                reward = self.player_vs_dealer(0, player_info['value'], self.dealer_value, i)
                self.total_rewards += reward
            
            # Ajuste le portefeuille (wallet) en fonction du gain ou de la perte
            self.wallet += self.total_rewards

            actual_state = self._get_obs()
        # Prépare pour une nouvelle manche
        self.reset_game()  # Méthode pour réinitialiser les mains et le deck
        return actual_state, self.total_rewards, True, False    
    
    def update_player_info(self, player_index):
        """
        Met à jour les informations du joueur, y compris la valeur de la main et le nombre d'as.
        """
        player = self.hand_players[f'player_{player_index}']
        hand = player['hands'][self.current_hand_index]
        player['value'] = self.value_hands(hand)
        player['nb_ace'] = hand.count(11)  # Nombre d'as dans la main
        player['blackjack'] = (player['value'] == 21 and len(hand) == 2)  # Vérifie le blackjack
        
    def play_single_hand(self, action):
        reward = 0
        next_player = False
        
        if not (self.current_player_index ==self.number_players):
            hand = self.hand_players[f'player_{self.current_player_index}']['hands'][self.current_hand_index]
            if action == Actions.HIT.value:
                hand.append(self.deck.pop())
                self.update_player_info(self.current_player_index)  # Met à jour la main du joueur
                value = self.hand_players[f'player_{self.current_player_index}']['value']
                if value < 21:
                    reward = (21 - value) / 21
                elif value == 21:
                    reward = 1  # Bonus pour avoir 21
                    next_player = True
                elif value > 21:
                    reward = -self.hand_players[f'player_{self.current_player_index}']['bet']
                    next_player = True
            elif action == Actions.STAND.value:
                next_player = True

            elif action == Actions.SPLIT.value and len(hand) == 2 and hand[0] == hand[1]:
                if not self.hand_players[f'player_{self.current_player_index}']['split']:
                    self.handle_split() 
                    self.update_player_info(self.current_player_index)  # Met à jour la main après le split
                else:
                    reward = -self.hand_players[f'player_{self.current_player_index}']['bet']

            elif action == Actions.DOUBLE.value:
                hand.append(self.deck.pop())
                self.update_player_info(self.current_player_index)
                value = self.hand_players[f'player_{self.current_player_index}']['value']
                reward = (21 - value) / 21 if value <= 21 else -self.hand_players[f'player_{self.current_player_index}']['bet']
                next_player = True

            # Passe au joueur suivant si nécessaire
            self.advance_hand_or_player(next_player)
            return self._get_obs(), reward, False, False
        else:
            # Gérer le tour du croupier après tous les joueurs
            self.play_dealer_hand()
            for i in range(self.number_players):
                player = f'player_{i}'
                if self.hand_players[player]['split']:
                    for hand in self.hand_players[player]['hands']:
                        reward = self.player_vs_dealer(reward, self.value_hands(hand), self.dealer_value, i)
                        self.total_rewards += reward
                else:
                    hand = self.hand_players[player]['hands'][0]
                    reward = self.player_vs_dealer(reward, self.value_hands(hand), self.dealer_value, i)
                    self.total_rewards += reward
            return self._get_obs(), self.total_rewards, True, False

    def advance_hand_or_player(self, next_player):
        """
        Moves to the next hand for the current player or to the next player if all hands of the player have been played.
        """
        if next_player:
            self.current_hand_index = 0  # Reset the hand index for the next player
            self.current_player_index += 1
        elif self.current_hand_index < len(self.hand_players[f'player_{self.current_player_index}']['hands']) - 1:
            self.current_hand_index += 1  # Move to the next hand of the same player
       


    def handle_split(self):
        current_player = f'player_{self.current_player_index}'
        hand = self.hand_players[current_player]['hands'][self.current_hand_index]
        
        card1, card2 = hand
        new_hand1 = [card1, self.deck.pop()]
        new_hand2 = [card2, self.deck.pop()]
        
        self.hand_players[current_player]['hands'][self.current_hand_index] = new_hand1
        self.hand_players[current_player]['hands'].insert(self.current_hand_index + 1, new_hand2)
        self.hand_players[current_player]['split'] = True
        
        # Met à jour les informations pour les nouvelles mains
        self.update_player_info(self.current_player_index)
        print(f"Hand split for {current_player}: new hands {new_hand1} and {new_hand2}")
        
    def get_action_mask(self):
        """
        Crée un masque binaire pour les actions valides et invalides en fonction de l’état actuel de la main du joueur.
        """
        mask = np.ones(self.action_space.n)  # Par défaut, toutes les actions sont valides
        
        # Récupérer la main actuelle du joueur
        current_hand = self.dealer if self.dealer_playing \
            else self.hand_players[f'player_{self.current_player_index}']['hands'][self.current_hand_index]         
        value = self.value_hands(current_hand)
        
        # Vérifier les conditions d’invalidité pour chaque action
        if value >= 21:
            # Si la main est 21 ou plus, "HIT" et "DOUBLE" sont invalides
            mask[Actions.HIT.value] = 0
            mask[Actions.DOUBLE.value] = 0

        # "SPLIT" est invalide si les cartes sont différentes ou si le joueur a déjà splitté
        if len(current_hand) != 2 or current_hand[0] != current_hand[1] or self.hand_players[f'player_{self.current_player_index}']['split']:
            mask[Actions.SPLIT.value] = 0

        # "DOUBLE" peut être restreint aux cas où le joueur n’a pas encore tiré de cartes supplémentaires
        if len(current_hand) > 2:
            mask[Actions.DOUBLE.value] = 0  
            
        return mask

    def step(self, action):
        """
        Exécute une action valide et retourne les résultats.
        """
        # Appliquer le masque d’actions
        mask = self.get_action_mask()

        # Vérifier si l’action choisie est valide
        if mask[action] == 0:
            # Si l'action est invalide, appliquer une pénalité ou ignorer l'action
            reward = -5  # Pénalité pour l’action invalide
            print("Action invalide choisie.")
            return self._get_obs(), reward, False, False

        # Si l'action est valide, exécute la main normalement
        new_state, reward, done, truncated = self.play_single_hand(
            action
            )

        return new_state, reward, done, truncated

In [117]:
env = BlackjackEnv(6)
env.initialize_new_game()
for i in range(20):
    print(env.step(0))


execution 1
({'dealer': [8, 8], 'len_deck': 297, 'info': [24, 24, 24, 24, 24, 24, 24, 24, 96, 24], 'player_hands': [[[11, 7, 10]], [[10, 6]], [[10, 5]], [[10, 4]], [[10, 3]], [[9, 2]]], 'current_player_index': 0, 'value_hands': [18, 16, 15, 14, 13, 11], 'hands_split': [0, 0, 0, 0, 0, 0], 'bets': [0, 0, 0, 0, 0, 0], 'normalized_wallet': 0.001}, 0.14285714285714285, False, False)
execution 1
execution
Move to the next player: player 1
({'dealer': [8, 8], 'len_deck': 296, 'info': [24, 24, 24, 24, 24, 24, 24, 24, 96, 24], 'player_hands': [[[11, 7, 10, 10]], [[10, 6]], [[10, 5]], [[10, 4]], [[10, 3]], [[9, 2]]], 'current_player_index': 1, 'value_hands': [28, 16, 15, 14, 13, 11], 'hands_split': [0, 0, 0, 0, 0, 0], 'bets': [0, 0, 0, 0, 0, 0], 'normalized_wallet': 0.001}, 0, False, False)
execution 1
execution
Move to the next player: player 2
({'dealer': [8, 8], 'len_deck': 295, 'info': [24, 24, 24, 24, 24, 24, 24, 24, 96, 24], 'player_hands': [[[11, 7, 10, 10]], [[10, 6, 10]], [[10, 5]], [[1

In [113]:
print(env.wallet)

1000
