In [1]:
import numpy as np
from tqdm import trange

In [69]:
"""
The deck does until reset after each simulation... and the model knows!
Let's see if this model has an edge over the casino

"""

STAND, HIT, DOUBLE_DOWN, SPLIT = 0, 1, 2, 3
DECK_COUNT = 1
BIG_BET_SIZE = 5 # The bot will bet small when the card count is low and big when the card count is high
PENETRATION_PERCENT = 0.75 # When PENETRATION_PERCENT of the cards have been dealt the deck is shuffled
DECK = np.repeat(np.array([2,3,4,5,6,7,8,9,10,10,10,10,11]), 4)

In [3]:
class Node():
    def __init__(self, log_info, num_actions):
        self.log_info = log_info
        self.num_actions = num_actions
        self.regret_sum = np.zeros(num_actions)
        self.strategy_sum = np.zeros(num_actions)

    def __repr__(self):
        return self.log_info + "\t" + ''.join([format(strat*100, '.0f') + "%\t" for strat in self.normalize(self.strategy_sum)])

    def normalize(self, value):
        normalizing_sum = np.sum(value)
        if normalizing_sum > 0:
            return value / normalizing_sum
        return np.ones(self.num_actions) / self.num_actions

    def get_strategy(self):
        return self.normalize(np.maximum(self.regret_sum, 0))

    def get_action(self):
        strategy = self.normalize(self.strategy_sum)
        return np.searchsorted(np.cumsum(strategy), np.random.random())

In [4]:
def get_hand_value(cards):
    ace_count = np.count_nonzero(cards == 11)
    value = np.sum(cards)
    while value > 21 and ace_count > 0:
        value -= 10
        ace_count -= 1
    return value

def get_terminal_payout(player_value, dealer_cards, deck, top_card_index, action, bet):
    if action is None: # Cards have just been dealt
        if get_hand_value(dealer_cards) == 21:
            if player_value == 21:
                return 0
            return -bet

        if player_value == 21:
            return 3/2 * bet
        
        return None
    
    if action == DOUBLE_DOWN:
        return get_terminal_payout(player_value, dealer_cards, deck, top_card_index, STAND, bet * 2)

    if player_value > 21:
        return -bet
        
    if action == STAND:
        dealer_value = get_hand_value(dealer_cards)
        while dealer_value < 17:
            dealer_cards = np.append(dealer_cards, deck[top_card_index])
            top_card_index += 1
            dealer_value = get_hand_value(dealer_cards)
        
        if dealer_value > 21:
            return bet
        return np.sign(player_value - dealer_value) * bet
        
    return None

def get_node(nodes, player_value, ace_count, can_split, dealer_card) -> Node:
    info_set_hash = str(can_split > 0) + str(ace_count > 0) + str(player_value) + str(dealer_card)
    num_actions = 4 if can_split else 3
    node = nodes.get(info_set_hash, Node(str(player_value) + ("A" if ace_count > 0 else "") + "\t" + str(dealer_card), num_actions))
    nodes[info_set_hash] = node
    return node

def counter_factual_regret(nodes, player_value, ace_count, can_split, dealer_cards, deck, top_card_index, action):
    if action is SPLIT:
        utility = counter_factual_regret(nodes, player_value//2, ace_count//2, False, dealer_cards, deck, top_card_index, HIT)
        utility += counter_factual_regret(nodes, player_value//2, ace_count//2, False, dealer_cards, deck, top_card_index+4, HIT)
        return utility

    if action is HIT or action == DOUBLE_DOWN:
        player_value += deck[top_card_index]
        if deck[top_card_index] == 11:
            ace_count += 1
        top_card_index += 1
        
    while player_value > 21 and ace_count > 0:
        player_value -= 10
        ace_count -= 1

    payout = get_terminal_payout(player_value, dealer_cards, deck, top_card_index, action, 1)
    if payout is not None:
        return payout

    node = get_node(nodes, player_value, ace_count, can_split, dealer_cards[0])
    strategy = node.get_strategy()
    node.strategy_sum += strategy
    
    utility = np.zeros(node.num_actions)
    node_utility = 0

    for action in range(node.num_actions):
        utility[action] = counter_factual_regret(nodes, player_value, ace_count, False, dealer_cards, deck, top_card_index, action)
        node_utility += strategy[action] * utility[action]

    node.regret_sum += utility - node_utility
    return node_utility

In [55]:
def get_betting_node(nodes, card_count) -> Node:
    node = nodes.get(card_count, Node(str(card_count), 2))
    nodes[card_count] = node
    return node

In [89]:
def train(iterations):
    nodes = {}
    betting_nodes = {}
    cards = np.repeat(DECK, DECK_COUNT)
    # card counting is abstracted as it also is for real card counters
    # https://www.qfit.com/cardcounting/Wong-Halves/
    # future investigation could discover the optimal increment value for each en card rank
    #                            0     1      2  3  4  5  6  7  8   9  10  11
    card_count_value = np.array([None, None,  1, 2, 2, 3, 2, 1, 0, -1, -2, -2])
    bet_sizes = np.array([1, BIG_BET_SIZE])
    top_deck_index = 0
    card_counter = 0
    total_cards = np.sum(np.bincount(cards))
   
    utility = 0
    for i in trange(iterations):
        if top_deck_index > total_cards * PENETRATION_PERCENT:
            np.random.shuffle(cards)
            card_counter = 0
            top_deck_index = 0

        decks_left = np.ceil((total_cards - top_deck_index) / 52)
        betting_node = get_betting_node(betting_nodes, card_counter / decks_left)
        bet_strategy = betting_node.get_strategy()
        betting_node.strategy_sum += bet_strategy
        betting_action = bet_sizes[np.searchsorted(np.cumsum(bet_strategy), np.random.random())]

        card_counter += np.sum(card_count_value[cards[top_deck_index:top_deck_index+4]])
        player_cards = cards[top_deck_index:top_deck_index+2]
        top_deck_index += 2
        dealer_cards = cards[top_deck_index:top_deck_index+2]
        top_deck_index += 2
        has_ace = 11 in player_cards
        can_split = player_cards[0] == player_cards[1]
        black_jack_utility = counter_factual_regret(nodes, np.sum(player_cards), has_ace, can_split, dealer_cards, cards, top_deck_index, None)

        betting_node.regret_sum += black_jack_utility * bet_sizes - black_jack_utility * betting_action
        utility += black_jack_utility * betting_action

        # Reset average strategy sum to forget bad early decisions
        if i == iterations // 2:
            for node in nodes.values():
                node.strategy_sum[:] = 0
            for node in betting_nodes.values():
                node.strategy_sum[:] = 0
            utility = 0 # Also reset utility to get number that represents the fully trained model better

    # Average game value is expected to be positive. When the player is card counting they should have an edge over the casino
    print("Average game value:", utility / iterations)

    LOG_NODE_PROBABILITIES = True
    if LOG_NODE_PROBABILITIES:
        print("Count\tSmall\tBig")
        nodes_values = dict(sorted(betting_nodes.items(), key=lambda item: item[0]))
        for n in nodes_values.values():
            print(n)

        print("Player\tDealer\tSTAND\tHIT\tDD\tSPLIT")
        nodes_values = dict(sorted(nodes.items(), key=lambda item: item[0]))
        for n in nodes_values.values():
            print(n)
    return nodes

In [88]:
nodes = train(300000)

100%|██████████| 300000/300000 [00:41<00:00, 7296.61it/s]

Average game value: 0.007728459186786303
Count	Small	Big
-30.0	50%	50%	
-28.0	50%	50%	
-26.0	50%	50%	
-25.0	75%	25%	
-24.0	100%	0%	
-23.0	100%	0%	
-22.0	100%	0%	
-21.0	100%	0%	
-20.0	100%	0%	
-19.0	100%	0%	
-18.0	100%	0%	
-17.0	100%	0%	
-16.0	100%	0%	
-15.0	100%	0%	
-14.0	100%	0%	
-13.0	100%	0%	
-12.0	100%	0%	
-11.0	100%	0%	
-10.0	100%	0%	
-9.0	100%	0%	
-8.0	100%	0%	
-7.0	100%	0%	
-6.0	100%	0%	
-5.0	100%	0%	
-4.0	100%	0%	
-3.0	100%	0%	
-2.0	100%	0%	
-1.0	49%	51%	
0.0	100%	0%	
1.0	21%	79%	
2.0	0%	100%	
3.0	0%	100%	
4.0	14%	86%	
5.0	0%	100%	
6.0	47%	53%	
7.0	0%	100%	
8.0	22%	78%	
9.0	0%	100%	
10.0	0%	100%	
11.0	23%	77%	
12.0	0%	100%	
13.0	0%	100%	
14.0	0%	100%	
15.0	0%	100%	
16.0	0%	100%	
17.0	0%	100%	
18.0	100%	0%	
19.0	0%	100%	
20.0	34%	66%	
21.0	70%	30%	
22.0	73%	27%	
23.0	32%	68%	
24.0	50%	50%	
25.0	39%	61%	
26.0	50%	50%	
27.0	75%	25%	
28.0	50%	50%	
29.0	50%	50%	
32.0	50%	50%	
40.0	50%	50%	
44.0	50%	50%	
Player	Dealer	STAND	HIT	DD	SPLIT
10	10	0%	100%	0%	
10	11	0%	81%	19%	
10	2	0%	0%	


