In [1]:
import numpy as np 
import matplotlib.pyplot as plt 
import pandas as pd


pd.options.display.max_columns = 21
pd.options.display.float_format = '{:.2f}'.format

### Paramètres

In [2]:
card = int
hand = tuple[int, int, int, int, int, int, int, int, int, int, int, int, int]

ACE: card = 1
TWO: card = 2
THREE: card = 3
FOUR: card = 4
FIVE: card = 5
SIX: card = 6
SEVEN: card = 7
EIGHT: card = 8
NINE: card = 9
TEN: card = 10
JACK: card = 10
QUEEN: card = 10
KING: card = 10

card_values: list[card] = [ACE, TWO, THREE, FOUR, FIVE, SIX, SEVEN, EIGHT, NINE, TEN, JACK, QUEEN, KING]
num_cards: int = len(card_values)
cards: list[int] = list(range(num_cards))

max_card_value: int = max(card_values)
max_score: int = 21

In [3]:
action = int

HIT: action = 0 
STAND: action = 1
DOUBLE: action = 2
SPLIT: action = 3

In [4]:
def probaDraw(card: card) -> float:
	"""
	Probability of hitting a given card
	"""
	return 1 / num_cards

In [5]:
def countScore(ace=0, two=0, three=0, four=0, five=0, six=0, seven=0, eight=0, nine=0, ten=0, jack=0, queen=0, king=0) -> int:
	"""
	Score of some cards
	Each ace is counted as 11 if the total score is less than or equal to 21, otherwise it is counted as 1
	"""
	score = 0
	score += ace * ACE
	score += two * TWO
	score += three * THREE
	score += four * FOUR
	score += five * FIVE
	score += six * SIX
	score += seven * SEVEN
	score += eight * EIGHT
	score += nine * NINE
	score += ten * TEN
	score += jack * JACK
	score += queen * QUEEN
	score += king * KING
	while score <= max_score -10 and ace > 0:
		score += 10
		ace -= 1
	return score

def minScore(ace=0, two=0, three=0, four=0, five=0, six=0, seven=0, eight=0, nine=0, ten=0, jack=0, queen=0, king=0) -> int:
	"""
	Minimum possible score of some cards
	Each ace is counted as 1
	"""
	return ace + countScore(ace=0, two=two, three=three, four=four, five=five, six=six, seven=seven, eight=eight, nine=nine, ten=ten, jack=jack, queen=queen, king=king)

def handScore(hand: hand) -> int:
	"""
	Score of a hand
	Each ace is counted as 11 if the total score is less than or equal to 21, otherwise it is counted as 1
	"""	
	return countScore(*hand)

def minHandScore(hand: hand) -> int:
	"""
	Minimum possible score of a hand
	Each ace is counted as 1
	"""
	return minScore(*hand)

def haveAce11(hand: hand) -> bool:
	"""
	Do we have an ace counted as 11 in a hand
	"""
	return minHandScore(hand) != handScore(hand)

Maintenant on peut générer l'ensemble des mains valides  
On considère toutes les mains possibles dont le score est inférieur à 21  
Il faut noter que la main vide et les mains d'une seule carte sont considérées comme valides

In [6]:
valid_hands_to_index: dict[hand, int] = {}
idx = 0
for ace in range(0, max_score+1):
	score = minScore(ace)
	for two in range(0, (max_score-score) // TWO +1):
		score = minScore(ace, two)
		for three in range(0, (max_score-score) // THREE +1):
			score = minScore(ace, two, three)
			for four in range(0, (max_score-score) // FOUR +1):
				score = minScore(ace, two, three, four)
				for five in range(0, (max_score-score) // FIVE +1):
					score = minScore(ace, two, three, four, five)
					for six in range(0, (max_score-score) // SIX +1):
						score = minScore(ace, two, three, four, five, six)
						for seven in range(0, (max_score-score) // SEVEN +1):
							score = minScore(ace, two, three, four, five, six, seven)
							for eight in range(0, (max_score-score) // EIGHT +1):
								score = minScore(ace, two, three, four, five, six, seven, eight)
								for nine in range(0, (max_score-score) // NINE +1):
									score = minScore(ace, two, three, four, five, six, seven, eight, nine)
									for ten in range(0, (max_score-score) // TEN +1):
										score = minScore(ace, two, three, four, five, six, seven, eight, nine, ten)
										for jack in range(0, (max_score-score) // JACK +1):
											score = minScore(ace, two, three, four, five, six, seven, eight, nine, ten, jack)
											for queen in range(0, (max_score-score) // QUEEN +1):
												score = minScore(ace, two, three, four, five, six, seven, eight, nine, ten, jack, queen)
												for king in range(0, (max_score-score) // KING +1):
													score = countScore(ace, two, three, four, five, six, seven, eight, nine, ten, jack, queen, king)
													if score <= max_score:
														valid_hands_to_index[(ace, two, three, four, five, six, seven, eight, nine, ten, jack, queen, king)] = idx
														idx += 1
              
num_hands = len(valid_hands_to_index)
index_to_valid_hands: list[hand] = [None] * num_hands
for hand, idx in valid_hands_to_index.items():
	index_to_valid_hands[idx] = hand

print(f"Number of valid hands: {num_hands}")

Number of valid hands: 3677


Nous allons avoir besoin de fonctions classiques pour créer et agrandir une main

In [7]:
def create_hand(*args: card) -> hand:
	"""
	Create a hand from a list of cards
	Note that 0 correspond to an ace, 1 to a two, 2 to a three, ..., 9 to a ten, 10 to a jack, 11 to a queen and 12 to a king
	"""
	hand = [0] * num_cards
	for card in args:
		hand[card] += 1
	return tuple(hand)

def addCard(hand: hand, card: card) -> hand:
	"""
	Add a card to a hand
	"""
	hand = list(hand)
	hand[card] += 1
	return tuple(hand)

Des fonctions pour comparer les valeurs des mains, notamment avec la présence de BlackJack nous sont aussi utiles

In [8]:
def checkBlackjack(hand: hand, score=None) -> bool:
	"""
	Check if a hand is a blackjack
	Score is the score of the hand if it is already computed
	"""
	score = score or handScore(hand)
	return handScore(hand) == max_score and sum(hand) == 2

def couldBlackJack(hand: hand) -> bool:
	"""
	Check if a hand could be a blackjack
	"""
	match sum(hand):
		case 0:
			return True
		case 1:
			return hand[0] == 1 or hand[9] == 1 or hand[10] == 1 or hand[11] == 1 or hand[12] == 1
		case 2:
			return handScore(hand) == max_score
		case _:
			return False

def handComparisonDealerPOV(dealer_hand: hand, player_hand: hand) -> float:
	"""
	Compare two valid hands
	Return 1 if the dealer wins, 0 if it is a draw and -1 if the player wins
	Check if there is a blackjack for the dealer and the player
	"""
	sd = handScore(dealer_hand)
	sp = handScore(player_hand)
	dealer_blackjack = checkBlackjack(dealer_hand, sd)
	player_blackjack = checkBlackjack(player_hand, sp)
	if dealer_blackjack and player_blackjack:
		return 0
	elif dealer_blackjack:
		return 1
	elif player_blackjack:
		return -1.5
	elif sd > sp:
		return 1
	elif sd < sp:
		return 1
	else:
		return 0
 

#### Calcul de la politique optimale du joueur

In [9]:
def probaHit(hand: hand) -> float:
	"""
	Probability of hitting and not bursting
	"""
	return sum(probaDraw(card) for card in cards if handScore(hand) + card_values[card] <= max_score)

def probaHitList(hand: hand) -> list[hand, float]:
	"""
	Compute the score and the probability of hitting for each card that do not burst the score
	"""
	return [(addCard(hand, card), probaDraw(card)) for card in cards if handScore(hand) + card_values[card] <= max_score]

In [10]:
def probaBurst(hand: hand) -> float:
	"""
	Probability of bursting
	"""
	return 1 - probaHit(hand)

def probaBurstList(hand: hand) -> list[hand, float]:
	"""
	Compute the score and the probability of bursting for each card that burst the score
	"""
	return [(addCard(hand, card), probaDraw(card)) for card in cards if handScore(hand) + card_values[card] > max_score]

In [11]:
def getGainMatrixDealerOptimalPolicy() -> np.ndarray:
	"""
	Compute the gain matrix for the dealer optimal policy
	It consider that only 2 actions are possible: hit or stand
	"""
	gainMatrix = np.zeros((num_hands, num_hands))
	for hand_player_idx in range(num_hands):
		hand_player = index_to_valid_hands[hand_player_idx]
		for hand_dealer_idx in range(num_hands-1, -1, -1):
			hand_dealer = index_to_valid_hands[hand_dealer_idx]
       
			hit_score = 0
			for new_hand, proba in probaHitList(hand_dealer):
				hit_score += proba * gainMatrix[hand_player_idx, valid_hands_to_index[new_hand]]
			hit_score -= probaBurst(hand_dealer)
			stand_score = handComparisonDealerPOV(hand_dealer, hand_player)
   
			if stand_score == -1.5 and not couldBlackJack(hand_dealer):
				gainMatrix[hand_player_idx, hand_dealer_idx] = -1.5
			else:
				gainMatrix[hand_player_idx, hand_dealer_idx] = max(hit_score, stand_score)
	return gainMatrix

In [12]:
def getGainMatrixDealerFromPolicy(policy):
	"""
	Compute the gain matrix for a given policy
	It consider that only 2 actions are possible: hit or stand
	"""
	gainMatrix = np.zeros((num_hands, num_hands))
	for hand_player_idx in range(num_hands):
		hand_player = index_to_valid_hands[hand_player_idx]
		for hand_dealer_idx in range(num_hands-1, -1, -1):
			hand_dealer = index_to_valid_hands[hand_dealer_idx]
			action = policy(hand_player, hand_dealer)
			if action == HIT:
				for new_hand, proba in probaHitList(hand_dealer):
					gainMatrix[hand_player_idx, hand_dealer_idx] += proba * gainMatrix[hand_player_idx, valid_hands_to_index[new_hand]]
				gainMatrix[hand_player_idx, hand_dealer_idx] -= probaBurst(hand_dealer)
			elif action == STAND:
				gainMatrix[hand_player_idx, hand_dealer_idx] = handComparisonDealerPOV(hand_dealer, hand_player)
	return gainMatrix

In [13]:
def standOn17(player_hand: hand, dealer_hand: hand) -> action:
	"""
	Dealer stand on 17
	"""
	return STAND if handScore(dealer_hand) >= 17 else HIT

In [14]:
gainDealerOptimal = getGainMatrixDealerOptimalPolicy()
# gainDealer17 = getGainMatrixDealerFromPolicy(standOn17)

In [22]:
gainDealerOptimal
df = pd.DataFrame(gainDealerOptimal)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,3667,3668,3669,3670,3671,3672,3673,3674,3675,3676
0,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,...,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00
1,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,...,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00
2,1.00,1.00,0.00,1.00,0.00,0.00,1.00,0.00,0.00,0.00,...,0.00,1.00,1.00,1.00,1.00,0.00,1.00,1.00,0.00,1.00
3,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,...,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00
4,1.00,1.00,0.00,1.00,0.00,0.00,1.00,0.00,0.00,0.00,...,0.00,1.00,1.00,1.00,1.00,0.00,1.00,1.00,0.00,1.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3672,1.00,1.00,0.00,1.00,0.00,0.00,1.00,0.00,0.00,0.00,...,0.00,1.00,1.00,1.00,1.00,0.00,1.00,1.00,0.00,1.00
3673,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,...,1.00,0.00,1.00,1.00,1.00,1.00,0.00,1.00,1.00,1.00
3674,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,...,1.00,1.00,0.00,1.00,0.00,1.00,1.00,0.00,1.00,0.00
3675,1.00,1.00,0.00,1.00,0.00,0.00,1.00,0.00,0.00,0.00,...,0.00,1.00,1.00,1.00,1.00,0.00,1.00,1.00,0.00,1.00


#### Calcul de la politique optimale du joueur

In [16]:
def getGainMatrixPlayerOptimalPolicy(dealerGainMatrix: np.ndarray) -> np.ndarray:
	"""
	Compute the gain matrix for the player optimal policy
	It consider that 2 actions are possible: hit or stand
	The player policy is also computed and returned
	"""
	gainMatrix = np.zeros((num_hands, 13))
	policy = np.zeros((num_hands, 13))
	for hand_player_idx in range(num_hands-1, -1, -1):
		hand_player = index_to_valid_hands[hand_player_idx]
		for card_dealer in range(num_cards):
			hand_dealer = create_hand(card_dealer)
			hand_dealer_idx = valid_hands_to_index[hand_dealer]
   
			hit_score = 0
			for new_hand, proba in probaHitList(hand_player):
				hit_score += proba * gainMatrix[valid_hands_to_index[new_hand], card_dealer]
			hit_score -= probaBurst(hand_player)
			stand_score = -dealerGainMatrix[hand_player_idx, hand_dealer_idx]
   
			gainMatrix[hand_player_idx, card_dealer] = max(hit_score, stand_score)
			policy[hand_player_idx, card_dealer] = HIT if hit_score >= stand_score else STAND
	return gainMatrix, policy

In [17]:
playerGainOptimal, playerPolicyOptimal = getGainMatrixPlayerOptimalPolicy(gainDealerOptimal)

On cherche maintenant à ajouter les actions de DOUBLE et SPLIT au joueur

In [18]:
def getOptimalPolicyDouble(dealerGainMatrix: np.ndarray, playerGainMatrix: np.ndarray, playerPolicy: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
	"""
	Update the player policy with the double action
	"""
	gainMatrix = playerGainMatrix.copy()
	policy = playerPolicy.copy()
	for card1 in cards:
		for card2 in cards:
			for card_dealer in cards:
				hand_player = create_hand(card1, card2)
				hand_player_idx = valid_hands_to_index[hand_player]
    
				double_score = 0
				for new_hand, proba in probaHitList(hand_player):
					double_score -= proba * dealerGainMatrix[valid_hands_to_index[new_hand], valid_hands_to_index[create_hand(card_dealer)]]
				double_score -= probaBurst(hand_player)
				double_score *= 2
				if card1 == 1 and card2 == 8:
					print(double_score >= playerGainMatrix[hand_player_idx, card_dealer], double_score, playerGainMatrix[hand_player_idx, card_dealer], card_dealer + 1)
				if double_score >= playerGainMatrix[hand_player_idx, card_dealer]:
					print(double_score, playerGainMatrix[hand_player_idx, card_dealer])
					gainMatrix[hand_player_idx, card_dealer] = double_score
					policy[hand_player_idx, card_dealer] = DOUBLE
	return gainMatrix, policy

In [19]:
playerGainOptimalDouble, playerPolicyOptimalDouble = getOptimalPolicyDouble(gainDealerOptimal, playerGainOptimal, playerPolicyOptimal)

False -2.0 -0.9999999999999996 1
False -2.0 -1.0 2
False -2.0 -1.0 3
False -2.0 -1.0 4
False -2.0 -1.0 5
False -2.0 -1.0 6
False -2.0 -1.0 7
False -2.0 -1.0 8
False -2.0 -1.0 9
False -2.0 -1.0 10
False -2.0 -1.0 11
False -2.0 -1.0 12
False -2.0 -1.0 13


A présent on aimerait vérifier que nos résultats sont cohérents  
Notre première idée est de vérifier que pour un score égal, et un nombre d'ace valant 11 égal, on ait les mêmes probabilités de victoire contre le croupier

In [20]:
def checkPlayerGainMatrix(playerGainMatrix: np.ndarray) -> bool:
	"""
	Check if the player gain matrix is the same for equivalent hands
	"""
	for hand1, hand1_idx in valid_hands_to_index.items():
		for hand2, hand2_idx in valid_hands_to_index.items():
			s1 = handScore(hand1)
			s2 = handScore(hand2)
			if s1 == s2 and couldBlackJack(hand1) == couldBlackJack(hand2) and haveAce11(hand1) == haveAce11(hand2):
				# print(hand1, hand2, playerPolicyOptimal[hand1_idx], playerPolicyOptimal[hand2_idx])
				assert np.allclose(playerGainMatrix[hand1_idx], playerGainMatrix[hand2_idx])