In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
pd.options.display.max_columns = 21
pd.options.display.precision = 2
pd.options.display.float_format = '{:.2f}'.format

In [None]:
Card = int
Hand = tuple[int, int, int, int, int, int, int, int, int, int, int, int, int]
Deck = np.ndarray[int]
DeckProb = list[float]

ACE: Card = 1
TWO: Card = 2
THREE: Card = 3
FOUR: Card = 4
FIVE: Card = 5
SIX: Card = 6
SEVEN: Card = 7
EIGHT: Card = 8
NINE: Card = 9
TEN: Card = 10
JACK: Card = 10
QUEEN: Card = 10
KING: Card = 10

card_values: list[Card] = [ACE, TWO, THREE, FOUR, FIVE, SIX, SEVEN, EIGHT, NINE, TEN, JACK, QUEEN, KING]
num_cards: int = len(card_values)
cards: list[int] = list(range(num_cards))

max_card_value: int = max(card_values)
max_score: int = 21


In [4]:
action = int

HIT: action = 0 
STAND: action = 1
DOUBLE: action = 2
SPLIT: action = 3

In [5]:
def probaDraw(card: Card, deckProb: DeckProb) -> float:
	"""
	Probability of hitting a given card
	"""
	return deckProb[card]

In [6]:
def countScore(ace=0, two=0, three=0, four=0, five=0, six=0, seven=0, eight=0, nine=0, ten=0, jack=0, queen=0, king=0) -> int:
	"""
	Score of some cards
	Each ace is counted as 11 if the total score is less than or equal to 21, otherwise it is counted as 1
	"""
	score = 0
	score += ace * ACE
	score += two * TWO
	score += three * THREE
	score += four * FOUR
	score += five * FIVE
	score += six * SIX
	score += seven * SEVEN
	score += eight * EIGHT
	score += nine * NINE
	score += ten * TEN
	score += jack * JACK
	score += queen * QUEEN
	score += king * KING
	while score <= max_score -10 and ace > 0:
		score += 10
		ace -= 1
	return score

def minScore(ace=0, two=0, three=0, four=0, five=0, six=0, seven=0, eight=0, nine=0, ten=0, jack=0, queen=0, king=0) -> int:
	"""
	Minimum possible score of some cards
	Each ace is counted as 1
	"""
	return ace + countScore(ace=0, two=two, three=three, four=four, five=five, six=six, seven=seven, eight=eight, nine=nine, ten=ten, jack=jack, queen=queen, king=king)

def handScore(hand: Hand) -> int:
	"""
	Score of a hand
	Each ace is counted as 11 if the total score is less than or equal to 21, otherwise it is counted as 1
	"""	
	return countScore(*hand)

def minHandScore(hand: Hand) -> int:
	"""
	Minimum possible score of a hand
	Each ace is counted as 1
	"""
	return minScore(*hand)

def haveAce11(hand: Hand) -> bool:
	"""
	Do we have an ace counted as 11 in a hand
	"""
	return minHandScore(hand) != handScore(hand)

In [7]:
valid_hands_to_index: dict[Hand, int] = {}
idx = 0
for ace in range(0, max_score+1):
	score = minScore(ace)
	for two in range(0, (max_score-score) // TWO +1):
		score = minScore(ace, two)
		for three in range(0, (max_score-score) // THREE +1):
			score = minScore(ace, two, three)
			for four in range(0, (max_score-score) // FOUR +1):
				score = minScore(ace, two, three, four)
				for five in range(0, (max_score-score) // FIVE +1):
					score = minScore(ace, two, three, four, five)
					for six in range(0, (max_score-score) // SIX +1):
						score = minScore(ace, two, three, four, five, six)
						for seven in range(0, (max_score-score) // SEVEN +1):
							score = minScore(ace, two, three, four, five, six, seven)
							for eight in range(0, (max_score-score) // EIGHT +1):
								score = minScore(ace, two, three, four, five, six, seven, eight)
								for nine in range(0, (max_score-score) // NINE +1):
									score = minScore(ace, two, three, four, five, six, seven, eight, nine)
									for ten in range(0, (max_score-score) // TEN +1):
										score = minScore(ace, two, three, four, five, six, seven, eight, nine, ten)
										for jack in range(0, (max_score-score) // JACK +1):
											score = minScore(ace, two, three, four, five, six, seven, eight, nine, ten, jack)
											for queen in range(0, (max_score-score) // QUEEN +1):
												score = minScore(ace, two, three, four, five, six, seven, eight, nine, ten, jack, queen)
												for king in range(0, (max_score-score) // KING +1):
													score = countScore(ace, two, three, four, five, six, seven, eight, nine, ten, jack, queen, king)
													if score <= max_score:
														valid_hands_to_index[(ace, two, three, four, five, six, seven, eight, nine, ten, jack, queen, king)] = idx
														idx += 1

num_hands = len(valid_hands_to_index)
index_to_valid_hands: list[Hand] = [None] * num_hands
for hand, idx in valid_hands_to_index.items():
	index_to_valid_hands[idx] = hand

print(f"Number of valid hands: {num_hands}")

Number of valid hands: 3677


In [8]:
def create_hand(*args: Card) -> Hand:
	"""
	Create a hand from a list of cards
	Note that 0 correspond to an ace, 1 to a two, 2 to a three, ..., 9 to a ten, 10 to a jack, 11 to a queen and 12 to a king
	"""
	hand = [0] * num_cards
	for card in args:
		hand[card] += 1
	return tuple(hand)

def addCard(hand: Hand, card: Card) -> Hand:
	"""
	Add a card to a hand
	"""
	hand = list(hand)
	hand[card] += 1
	return tuple(hand)

In [None]:
def checkBlackjack(hand: Hand, score=None) -> bool:
	"""
	Check if a hand is a blackjack
	Score is the score of the hand if it is already computed
	"""
	score = score or handScore(hand)
	return handScore(hand) == max_score and sum(hand) == 2

def couldBlackJack(hand: Hand) -> bool:
	"""
	Check if a hand could be a blackjack
	"""
	match sum(hand):
		case 0:
			return True
		case 1:
			return hand[0] == 1 or hand[9] == 1 or hand[10] == 1 or hand[11] == 1 or hand[12] == 1
		case 2:
			return handScore(hand) == max_score
		case _:
			return False

def handComparisonDealerPOV(dealer_hand: Hand, player_hand: Hand) -> float:
	"""
	Compare two valid hands
	Return 1 if the dealer wins, 0 if it is a draw and -1 if the player wins
	Check if there is a blackjack for the dealer and the player
	"""
	sd = handScore(dealer_hand)
	sp = handScore(player_hand)
	dealer_blackjack = checkBlackjack(dealer_hand, sd)
	player_blackjack = checkBlackjack(player_hand, sp)
	if dealer_blackjack and player_blackjack:
		return 0
	elif dealer_blackjack:
		return 1
	elif player_blackjack:
		return -1.5
	elif sd > sp:
		return 1
	elif sd < sp:
		return -1
	else:
		return 0
 

In [10]:
def updateDeckProb(deck: Deck) -> DeckProb:
	"""
	Update the probability of drawing a card
	"""
	deckProb: DeckProb = [0] * num_cards
	for card in cards:
		deckProb[card] = deck[card] / sum(deck)
	return deckProb

In [11]:
def probaHit(hand: Hand, deck: Deck) -> float:
	"""
	Probability of hitting and not bursting
	"""
	deckProb = updateDeckProb(deck)
	return sum(probaDraw(card, deckProb) for card in cards if addCard(hand, card) in valid_hands_to_index)

def probaHitList(hand: Hand, deck: Deck) -> list[Hand, float]:
	"""
	Compute the score and the probability of hitting for each card that do not burst the score
	"""
	deckProb = updateDeckProb(deck)
	return [(addCard(hand, card), probaDraw(card, deckProb)) for card in cards if addCard(hand, card) in valid_hands_to_index]

In [12]:
def probaBurst(hand: Hand, deck: Deck) -> float:
	"""
	Probability of bursting
	"""
	return 1 - probaHit(hand, deck)

def probaBurstList(hand: Hand, deck: Deck) -> list[Hand, float]:
	"""
	Compute the score and the probability of bursting for each card that burst the score
	"""
	return [(addCard(hand, card), probaDraw(card, deck)) for card in cards if handScore(hand) + card_values[card] > max_score]

In [13]:
def getGainDealerPolicy(policy, hand_player, deck) -> np.ndarray:
	"""
	Compute the gain matrix for the dealer optimal policy
	It consider that only 2 actions are possible: hit or stand
	"""
	gainMatrix = np.zeros(num_hands)
	for hand_dealer_idx in range(num_hands-1, -1, -1):
		hand_dealer = index_to_valid_hands[hand_dealer_idx]
		action = policy(hand_player, hand_dealer)
		if action == HIT:
			for new_hand, proba in probaHitList(hand, deck):
				gainMatrix[hand_dealer_idx] += proba * gainMatrix[valid_hands_to_index[new_hand]]
			gainMatrix[hand_dealer_idx] -= probaBurst(hand, deck)
		elif action == STAND:
			gainMatrix[hand_dealer_idx] = handComparisonDealerPOV(hand_dealer, hand_player)
	return gainMatrix

In [17]:
def getGainMatrixPlayerOptimalPolicy(dealer_policy, deck: Deck) -> np.ndarray:
	"""
	Compute the gain matrix for the player optimal policy
	It consider that 2 actions are possible: hit or stand
	The player policy is also computed and returned
	"""
	gainMatrix = np.zeros((num_hands, 13))
	policy = np.zeros((num_hands, 13))
	dealerGainDict = {}
	for hand_player_idx in range(num_hands-1, -1, -1):
		hand_player = index_to_valid_hands[hand_player_idx]
		for card_dealer in range(num_cards):
			hand_dealer = create_hand(card_dealer)
			hand_dealer_idx = valid_hands_to_index[hand_dealer]
			new_deck = deck.copy()
			new_deck[card_dealer] -= 1
			new_deck -= np.array(hand_player)
			if (new_deck < 0).any():
				# TODO put nan
				continue

			hit_score = 0
			for new_hand, proba in probaHitList(hand_player, new_deck):
				hit_score += proba * gainMatrix[valid_hands_to_index[new_hand], card_dealer]
			hit_score -= probaBurst(hand_player, new_deck)
			# stand_score = -dealerGainMatrix[hand_player_idx, hand_dealer_idx]
			if hand_player in dealerGainDict:
				stand_score = -(dealerGainDict[hand_player])[hand_dealer_idx]
			else:
				dealerGainDict[hand_player] = getGainDealerPolicy(dealer_policy, hand_player, new_deck)
				stand_score = -(dealerGainDict[hand_player])[hand_dealer_idx]
			gainMatrix[hand_player_idx, card_dealer] = max(hit_score, stand_score)
			policy[hand_player_idx, card_dealer] = HIT if hit_score >= stand_score else STAND
	return gainMatrix, policy

In [None]:
def standOn17(player_hand: Hand, dealer_hand: Hand) -> action:
	"""
	Dealer stand on 17
	"""
	return STAND if handScore(dealer_hand) >= 17 else HIT

In [20]:
def stand_on_17_soft(player_hand: Hand, dealer_hand: Hand) -> action:
	"""
	Dealer stand on 17 soft
	"""
	score = handScore(dealer_hand)
	if score > 17:
		return STAND
	elif score == 17:
		if haveAce11(dealer_hand):
			return HIT
		else:
			return STAND
	else:
		return HIT

In [21]:
deck = [4] * num_cards
M_soft = getGainMatrixPlayerOptimalPolicy(stand_on_17_soft, deck)