In [1]:
import numpy as np
import torch
import torch.nn.functional as F
import random

In [2]:
Card = int
Action = int

ACE: Card = 1
TWO: Card = 2
THREE: Card = 3
FOUR: Card = 4
FIVE: Card = 5
SIX: Card = 6
SEVEN: Card = 7
EIGHT: Card = 8
NINE: Card = 9
TEN: Card = 10
JACK: Card = 10
QUEEN: Card = 10
KING: Card = 10

card_values: list[Card] = [ACE, TWO, THREE, FOUR, FIVE, SIX, SEVEN, EIGHT, NINE, TEN, JACK, QUEEN, KING]
num_cards: int = len(card_values)
cards: list[int] = list(range(num_cards))

max_card_value: int = max(card_values)
max_score: int = 21


In [3]:

HIT: Action = 0 
STAND: Action = 1
DOUBLE: Action = 2
SPLIT: Action = 3

In [4]:
N = 7
max_card_per_hand = 28
hand_played_per_simulation = 100

In [5]:
def count_score_min(ace, two=0, three=0, four=0, five=0, six=0, seven=0, eight=0, nine=0, ten=0, jack=0, queen=0, king=0):
	score = 0
	score += ace
	score += 2*two
	score += 3*three
	score += 4*four
	score += 5*five
	score += 6*six
	score += 7*seven
	score += 8*eight
	score += 9*nine
	score += 10*ten
	score += 10*jack
	score += 10*queen
	score += 10*king
	return score

In [6]:
valid_hands_to_index = {}
index = 0
for ace in range(0, 22):
	score = count_score_min(ace)
	for two in range(0, (21 - score) // 2 + 1):
		score = count_score_min(ace, two)
		for three in range(0, (21 - score) // 3 + 1):
			score = count_score_min(ace, two, three)
			for four in range(0, (21 - score) // 4 + 1):
				score = count_score_min(ace, two, three, four)
				for five in range(0, (21 - score) // 5 + 1):
					score = count_score_min(ace, two, three, four, five)
					for six in range(0, (21 - score) // 6 + 1):
						score = count_score_min(ace, two, three, four, five, six)
						for seven in range(0, (21 - score) // 7 + 1):
							score = count_score_min(ace, two, three, four, five, six, seven)
							for eight in range(0, (21 - score) // 8 + 1):
								score = count_score_min(ace, two, three, four, five, six, seven, eight)
								for nine in range(0, (21 - score) // 9 + 1):
									score = count_score_min(ace, two, three, four, five, six, seven, eight, nine)
									for ten in range(0, (21 - score) // 10 + 1):
										score = count_score_min(ace, two, three, four, five, six, seven, eight, nine, ten)
										for jack in range(0, (21 - score) // 10 + 1):
											score = count_score_min(ace, two, three, four, five, six, seven, eight, nine, ten, jack)
											for queen in range(0, (21 - score) // 10 + 1):
												score = count_score_min(ace, two, three, four, five, six, seven, eight, nine, ten, jack, queen)
												for king in range(0, (21 - score) // 10 + 1):
													score = count_score_min(ace, two, three, four, five, six, seven, eight, nine, ten, jack, queen, king)
													if score <= 21:
														valid_hands_to_index[(ace, two, three, four, five, six, seven, eight, nine, ten, jack, queen, king)] = index
														index += 1

In [7]:
def drawCard(deck):
	"""
	Draw a card from the deck and return it
	"""
	card = torch.multinomial(deck, 1).item()
	deck[card] -= 1
	return card

In [8]:
def drawCards(decks, indices):
	"""
	This function assumes `deck` is a tensor of shape (N, 13), where N is the number of decks.
	"""
	# Calculate probabilities for each deck and draw cards
	cards = torch.multinomial(decks[indices], num_samples=1).squeeze(1)
	decks[indices, cards] -= 1
	return cards

In [9]:
def generateRandomDeck() -> torch.tensor:
	deck = torch.tensor([4 * N] * num_cards, dtype=torch.float32)
	num_cards_to_draw = random.randint(1, num_cards * N * 2)
	for _ in range(num_cards_to_draw):
		drawCard(deck)
	return deck

In [10]:
def generateRandomDecks(n: int) -> torch.tensor:
	return torch.stack([torch.tensor([16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16]) for _ in range(n)])
	# return torch.stack([generateRandomDeck() for _ in range(n)])

In [11]:
def generateRandomInitialHand():
	dealer_card = random.choice(cards)
	card1 = random.choice(cards)
	card2 = random.choice(cards)
	hand = F.one_hot(torch.tensor([card1, card2]), num_classes=num_cards).sum(dim=0)
	return torch.tensor([dealer_card, *hand], dtype=torch.float32)

In [12]:
def generateRandomInitialHands(n: int) -> torch.tensor:
	return torch.stack([generateRandomInitialHand() for _ in range(n)])

In [13]:
def generateRandomInitialHandAndDeck(n: int) -> torch.tensor:
	return torch.cat([generateRandomDecks(n), generateRandomInitialHands(n)], dim=1)

In [14]:
generateRandomInitialHandAndDeck(128).shape

torch.Size([128, 27])

In [15]:
def handsScoreMin(hands: torch.tensor) -> torch.tensor:
	"""
	Score of a hand
	Each ace is counted as 11 if the total score is less than or equal to 21, otherwise it is counted as 1
	"""	
	return torch.sum(hands * torch.tensor(card_values, dtype=torch.float32), dim=1)

In [16]:
def handsScore(hands: torch.tensor) -> torch.tensor:
	"""
	Score of a hand
	Each ace is counted as 11 if the total score is less than or equal to 21, otherwise it is counted as 1
	"""	
	score = handsScoreMin(hands)
	can_use_ace_index = score + 10 <= max_score
	have_ace_index = hands[:, 0] > 0
	score[torch.logical_and(can_use_ace_index, have_ace_index)] += 10
	return score

In [17]:
def gamesFinished(player_hands, actions) -> torch.tensor:
	"""
	Check if the game is finished
	"""
	return torch.logical_or((actions == STAND), (handsScoreMin(player_hands) > max_score))

In [18]:
def stand_on_17_hard(dealer_hands, player_hands):
	"""
	Dealer stand on 17 hard
	"""
	score = handsScore(dealer_hands)
	return torch.where(score >= 17, torch.tensor(STAND), torch.tensor(HIT))

In [19]:
def checkBlackjacks(hands, scores) -> torch.tensor:
	"""
	Check if a hand is a blackjack
	Score is the score of the hand if it is already computed
	"""
	return torch.logical_and(torch.eq(scores, max_score), torch.eq(torch.sum(hands, dim=1), 2))

In [20]:
def handsComparisonPlayerPOV(dealer_hands, player_hands) -> torch.tensor:
	"""
	Compare two valid hands
	Return 1 if the dealer wins, 0 if it is a draw and -1 if the player wins
	Check if there is a blackjack for the dealer and the player
	"""
	result = torch.zeros(dealer_hands.shape[0])
	score_dealers = handsScore(dealer_hands)
	score_players = handsScore(player_hands)
	dealer_blackjack = checkBlackjacks(dealer_hands, score_dealers)
	player_blackjack = checkBlackjacks(player_hands, score_players)
	result[score_dealers > score_players] = -1
	result[score_dealers < score_players] = 1
	result[score_dealers > max_score] = 1
	result[dealer_blackjack] = -1
	result[player_blackjack] = 1.5
	result[torch.logical_and(dealer_blackjack, player_blackjack)] = 0
	result[score_players > max_score] = -1
	return result

In [21]:
def resolveGames(dealer_hands, player_hands, decks) -> torch.tensor:
	"""
	Resolve a game
	Return the reward for the player
	"""
	need_actions = torch.tensor([True] * dealer_hands.shape[0])
	while torch.any(need_actions):
		actions = stand_on_17_hard(dealer_hands, player_hands)
		hit_indices = torch.where(actions == HIT)[0]
		dealer_hands.index_add_(0, hit_indices, F.one_hot(drawCards(decks, hit_indices), num_classes=13).to(torch.float32))
		need_actions = torch.logical_not(gamesFinished(player_hands, actions))
	return handsComparisonPlayerPOV(dealer_hands, player_hands)

In [22]:
@torch.no_grad()
def simulateGame(inital_hand_and_deck, model_play):
	"""
	Simulate a game
	Return the gain of the player
	"""
	decks = inital_hand_and_deck[:, :13]
	dealer_cards = inital_hand_and_deck[:, 13]
	player_hands = inital_hand_and_deck[:, 14:]
	need_actions = torch.tensor([True] * inital_hand_and_deck.shape[0])
	while torch.any(need_actions):
		actions = torch.tensor([STAND] * inital_hand_and_deck.shape[0])
		actions[need_actions] = torch.argmax(model_play(inital_hand_and_deck[need_actions]), dim=1)
		hit_indices = torch.where(actions == HIT)[0]
		player_hands.index_add_(0, hit_indices, F.one_hot(drawCards(decks, hit_indices), num_classes=13).to(torch.float32))
		need_actions = torch.logical_not(gamesFinished(player_hands, actions))
	dealer_hands = F.one_hot(dealer_cards.to(torch.int64), num_classes=num_cards).to(torch.float32)
	return resolveGames(dealer_hands, player_hands, decks)
	

In [23]:
def make_bet_model():
	return torch.nn.Sequential(
		torch.nn.Linear(13, 13),
		torch.nn.ReLU(),
		torch.nn.Linear(13, 1),
		torch.nn.ReLU()
	)

In [97]:
def make_play_model():
	return torch.nn.Sequential(
		torch.nn.Linear(27, 27, dtype=torch.float32),
		torch.nn.ReLU(),
		torch.nn.Linear(27, 2, dtype=torch.float32),
		torch.nn.Softmax(dim=1)
	)

In [98]:
initial_hand_and_deck = generateRandomInitialHandAndDeck(128)
model_play = make_play_model()
simulateGame(initial_hand_and_deck, model_play)

tensor([-1., -1., -1., -1., -1., -1., -1., -1., -1.,  1., -1., -1., -1., -1.,
        -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
        -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
        -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
        -1.,  1., -1., -1., -1.,  1., -1., -1., -1., -1., -1., -1., -1., -1.,
        -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
        -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
        -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
        -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
        -1., -1.])

In [131]:
batch_size = 1024
num_epochs = 1000

In [137]:
def train_play_model(model_play, optimizer_play, num_epochs):
	# goal_gain load gain.npy
	goal_gains_matrix = torch.tensor(np.load("gain.npy"), dtype=torch.float32)
	initial_hands_and_decks = generateRandomInitialHandAndDeck(batch_size)
	for epoch in range(num_epochs):
		decks = initial_hands_and_decks[:, :13]
		dealer_cards = initial_hands_and_decks[:, 13]
		player_hands = initial_hands_and_decks[:, 14:]
		goal_gains = torch.zeros((batch_size, 2))

		for i, (dealer_card, player_hands) in enumerate(zip(dealer_cards, player_hands)):
			player_hands = tuple(player_hands.to(torch.int64).tolist())
			dealer_card = dealer_card.to(torch.int64).item()
			goal_gain = F.one_hot(torch.argmax(goal_gains_matrix[valid_hands_to_index[player_hands], dealer_card]), num_classes=2).to(torch.float32)
			# goal_gain = torch.softmax(goal_gains_matrix[valid_hands_to_index[player_hands], dealer_card], dim=0)
			goal_gains[i] = goal_gain
		optimizer_play.zero_grad()
		actions = model_play(initial_hands_and_decks)
		loss = F.cross_entropy(actions, goal_gains)
		loss.backward()
		optimizer_play.step()
		if epoch % 100 == 0:
			copy_initial_hands_and_decks = initial_hands_and_decks.clone()
			gains = simulateGame(copy_initial_hands_and_decks, model_play)
			average_gain = torch.mean(gains)
			print(f"epoch: {epoch}, loss: {loss.item()}, average gain: {average_gain}")


In [None]:
def train_play_model(model_play, optimizer_play, num_epochs):
	# goal_gain load gain.npy
	goal_gains_matrix = torch.tensor(np.load("gain.npy"), dtype=torch.float32)
	initial_hands_and_decks = generateRandomInitialHandAndDeck(batch_size)
	for epoch in range(num_epochs):
		decks = initial_hands_and_decks[:, :13]
		dealer_cards = initial_hands_and_decks[:, 13]
		player_hands = initial_hands_and_decks[:, 14:]
		goal_gains = torch.zeros((batch_size, 2))

		for i, (dealer_card, player_hands) in enumerate(zip(dealer_cards, player_hands)):
			player_hands = tuple(player_hands.to(torch.int64).tolist())
			dealer_card = dealer_card.to(torch.int64).item()
			goal_gain = F.one_hot(torch.argmax(goal_gains_matrix[valid_hands_to_index[player_hands], dealer_card]), num_classes=2).to(torch.float32)
			# goal_gain = torch.softmax(goal_gains_matrix[valid_hands_to_index[player_hands], dealer_card], dim=0)
			goal_gains[i] = goal_gain
		optimizer_play.zero_grad()
		actions = model_play(initial_hands_and_decks)
		loss = F.cross_entropy(actions, goal_gains)
		loss.backward()
		optimizer_play.step()
		if epoch % 100 == 0:
			copy_initial_hands_and_decks = initial_hands_and_decks.clone()
			gains = simulateGame(copy_initial_hands_and_decks, model_play)
			average_gain = torch.mean(gains)
			print(f"epoch: {epoch}, loss: {loss.item()}, average gain: {average_gain}")


In [141]:
def train_play_model(model_play, optimizer_play, num_epochs):
	# goal_gain load gain.npy
	initial_hands_and_decks = generateRandomInitialHandAndDeck(batch_size)
	for epoch in range(num_epochs):
		copy_initial_hands_and_decks = initial_hands_and_decks.clone()
		decks = initial_hands_and_decks[:, :13]
		dealer_cards = initial_hands_and_decks[:, 13]
		player_hands = initial_hands_and_decks[:, 14:]

		gains = simulateGame(copy_initial_hands_and_decks, model_play)
		average_gain = torch.mean(gains)
		optimizer_play.zero_grad()
		actions = model_play(initial_hands_and_decks)
		loss = -average_gain
		loss.backward()
		optimizer_play.step()
		if epoch % 100 == 0:
			gains = simulateGame(copy_initial_hands_and_decks, model_play)
			average_gain = torch.mean(gains)
			print(f"epoch: {epoch}, loss: {loss.item()}, average gain: {average_gain}")


In [142]:
model_play = make_play_model()
optimizer_play = torch.optim.Adam(model_play.parameters(), lr=0.001)
train_play_model(model_play, optimizer_play, num_epochs)

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [74]:
print(F.cross_entropy(torch.tensor([0.0, 1.0]), torch.tensor([0.5346, 0.4654])))
print(F.cross_entropy(torch.tensor([0.0, 1.0]), torch.tensor([0.1787, 0.8213])))

tensor(0.8479)
tensor(0.4920)


In [29]:
deck = np.array([N] * 13)
bet_model = make_bet_model()
bet = bet_model(torch.tensor(deck, dtype=torch.float32))
print(bet)

tensor([0.], grad_fn=<ReluBackward0>)


In [30]:
deck = np.array([N] * 13)
dealer_card = drawCard(deck)
card1 = drawCard(deck)
card2 = drawCard(deck)
l = torch.tensor(np.concatenate([deck, [dealer_card, card1, card2]]), dtype=torch.float32)
print(l.shape)

TypeError: multinomial(): argument 'input' (position 1) must be Tensor, not numpy.ndarray

In [None]:
type(valid_hands_to_index)

dict

tensor([10, 20, 30, 40, -1])
