In [60]:
import numpy as np
import pickle

In [114]:
class BlackJackSolution:
    
    def __init__(self, lr=0.1, exp_rate=0.3):
        self.player_Q_Values = {}  # key: [(player_value, show_card, usable_ace)][action] = value
        # initialise Q values | (12-21) x (1-10) x (True, False) x (1, 0) 400 in total
        for i in range(12, 22):
            for j in range(1, 11):
                for k in [True, False]:
                    self.player_Q_Values[(i, j, k)] = {}
                    for a in [1, 0]:
                        self.player_Q_Values[(i, j, k)][a] = 0
        
        self.player_state_action = []
        self.state = (0, 0, False)  # initial state
        self.actions = [1, 0]  # 1: HIT  0: STAND
        self.end = False
        self.lr = lr
        self.exp_rate = exp_rate
    
    # give card
    @staticmethod
    def giveCard():
        # 1 stands for ace
        c_list = list(range(1, 11)) + [10, 10, 10]
        return np.random.choice(c_list)
    
    def dealerPolicy(self, current_value, usable_ace, is_end):
        if current_value > 21:
            if usable_ace:
                current_value -= 10
                usable_ace = False
            else:
                return current_value, usable_ace, True
        # HIT17
        if current_value >= 17:
            return current_value, usable_ace, True
        else:
            card = self.giveCard()
            if card == 1:
                if current_value <= 10:
                    return current_value+11, True, False
                return current_value+1, usable_ace, False
            else:
                return current_value+card, usable_ace, False
            
    def chooseAction(self):
        # if current value <= 11, always hit
        current_value = self.state[0]
        if current_value <= 11:
            return 1
        
        if np.random.uniform(0, 1) <= self.exp_rate:
            action = np.random.choice(self.actions)
        else:
            # greedy action
            v = -999
            action = 0
            for a in self.player_Q_Values[self.state]:
                if self.player_Q_Values[self.state][a] > v:
                    action = a
                    v = self.player_Q_Values[self.state][a]
        return action
            
    # one can only has 1 usable ace 
    # return next state
    def playerNxtState(self, action):
        current_value = self.state[0]
        show_card = self.state[1]
        usable_ace = self.state[2]
        
        if current_value > 21:
            if usable_ace:
                current_value -= 10
                usable_ace = False
            else:
                # should not reach here
                self.end = True
                self.state = (current_value, show_card, usable_ace)
                return
        if action:
            card = self.giveCard()
            if card == 1:
                if current_value <= 10:
                    current_value += 11
                    usable_ace = True
                else:
                    current_value += 1
            else:
                current_value += card
        else:
            # action stand
            self.end = True
        
        if current_value > 21:
            self.end = True
        self.state = (current_value, show_card, usable_ace)
    
    def _giveCredit(self, player_value, dealer_value, is_end=True):
        reward = 0
        if is_end:
            if player_value > 21:
                if dealer_value > 21:
                    # draw
                    reward = 0
                else:
                    reward = -1
            else:
                if dealer_value > 21:
                    reward = 1
                else:
                    if player_value < dealer_value:
                        reward = -1
                    elif player_value > dealer_value:
                        reward = 1
                    else:
                        # draw
                        reward = 0
        # backpropagate reward
        for s in reversed(self.player_state_action):
            state, action = s[0], s[1]
            reward = self.lr*(reward - self.player_Q_Values[state][action])
            self.player_Q_Values[state][action] += reward
            
    def reset(self):
        self.player_state_action = []
        self.state = (0, 0, False)  # initial state
        self.end = False
    
    def play(self, rounds=1000):
        for i in range(rounds):
            if i % 1000 == 0:
                print("round", i)
            # hit 2 cards each
            dealer_value, player_value = 0, 0
            show_card = 0

            # give dealer 2 cards and show 1
            dealer_value += self.giveCard()
            show_card = dealer_value
            self.state = (0, show_card, False)
            dealer_value += self.giveCard()

            # player's turn
            usable_ace, is_end = False, False
            while True:
                action = self.chooseAction()
                # print("current value {}, action {}".format(self.state[0], action))
                if self.state[0] >= 12:
                    self.player_state_action.append([self.state, action])
                # update next state
                self.playerNxtState(action)
                if self.end:
                    break    

            # dealer's turn
            usable_ace, is_end = False, False
            while not is_end:
                dealer_value, usable_ace, is_end = self.dealerPolicy(dealer_value, usable_ace, is_end)
            # print("dealer card sum", dealer_value)

            # judge winner
            # give reward and update Q value
            player_value = self.state[0]
            print("player value {} | dealer value {}".format(player_value, dealer_value))
            self._giveCredit(player_value, dealer_value)
            self.reset()
            
    def savePolicy(self, file="policy"):
        fw = open(file, 'wb')
        pickle.dump(self.player_Q_Values, fw)
        fw.close()

    def loadPolicy(self, file="policy"):
        fr = open(file,'rb')
        self.player_Q_Values = pickle.load(fr)
        fr.close()
        
    # trained robot play against dealer
    def playWithDealer(self, rounds=1000):
        self.reset()
        self.loadPolicy()
        self.exp_rate = 0
        
        result = np.zeros(3)  # player [win, draw, lose]
        for _ in range(rounds):
            # hit 2 cards each
            dealer_value, player_value = 0, 0
            show_card = 0

            # give dealer 2 cards and show 1
            dealer_value += self.giveCard()
            show_card = dealer_value
            self.state = (0, show_card, False)
            dealer_value += self.giveCard()

            # player's turn
            while True:
                action = self.chooseAction()
                # update next state
                self.playerNxtState(action)
                if self.end:
                    break    

            # dealer's turn
            usable_ace, is_end = False, False
            while not is_end:
                dealer_value, usable_ace, is_end = self.dealerPolicy(dealer_value, usable_ace, is_end)
            
            # judge
            player_value = self.state[0]
            # print("player value {} | dealer value {}".format(player_value, dealer_value))
            if player_value > 21:
                if dealer_value > 21:
                    # draw
                    result[1] += 1
                else:
                    result[2] += 1
            else:
                if dealer_value > 21:
                    result[0] += 1
                else:
                    if player_value < dealer_value:
                        result[2] += 1
                    elif player_value > dealer_value:
                        result[0] += 1
                    else:
                        # draw
                        result[1] += 1
            self.reset()
        return result

In [119]:
b = BlackJackSolution()
b.play(50000)

round 0
player value 18 | dealer value 18
player value 15 | dealer value 22
player value 14 | dealer value 17
player value 26 | dealer value 21
player value 17 | dealer value 22
player value 23 | dealer value 21
player value 26 | dealer value 21
player value 31 | dealer value 23
player value 23 | dealer value 21
player value 14 | dealer value 25
player value 19 | dealer value 20
player value 25 | dealer value 21
player value 22 | dealer value 21
player value 23 | dealer value 19
player value 22 | dealer value 19
player value 24 | dealer value 26
player value 23 | dealer value 24
player value 24 | dealer value 17
player value 15 | dealer value 20
player value 25 | dealer value 22
player value 22 | dealer value 17
player value 16 | dealer value 18
player value 23 | dealer value 21
player value 19 | dealer value 19
player value 28 | dealer value 19
player value 28 | dealer value 17
player value 24 | dealer value 18
player value 21 | dealer value 21
player value 25 | dealer value 25
player

player value 21 | dealer value 22
player value 15 | dealer value 20
player value 20 | dealer value 24
player value 30 | dealer value 19
player value 25 | dealer value 17
player value 18 | dealer value 17
player value 19 | dealer value 22
player value 20 | dealer value 17
player value 18 | dealer value 20
player value 23 | dealer value 18
player value 22 | dealer value 18
player value 18 | dealer value 18
player value 18 | dealer value 20
player value 20 | dealer value 17
player value 27 | dealer value 18
player value 20 | dealer value 26
player value 24 | dealer value 20
player value 21 | dealer value 22
player value 14 | dealer value 17
player value 12 | dealer value 17
player value 21 | dealer value 19
player value 15 | dealer value 18
player value 20 | dealer value 21
player value 17 | dealer value 20
player value 17 | dealer value 19
player value 28 | dealer value 22
player value 13 | dealer value 17
player value 12 | dealer value 26
player value 15 | dealer value 24
player value 1

player value 14 | dealer value 19
player value 19 | dealer value 19
player value 18 | dealer value 20
player value 20 | dealer value 25
player value 20 | dealer value 18
player value 26 | dealer value 23
player value 20 | dealer value 26
player value 19 | dealer value 21
player value 12 | dealer value 17
player value 15 | dealer value 25
player value 14 | dealer value 20
player value 21 | dealer value 20
player value 17 | dealer value 26
player value 25 | dealer value 24
player value 20 | dealer value 25
player value 20 | dealer value 23
player value 30 | dealer value 17
player value 19 | dealer value 20
player value 20 | dealer value 19
player value 22 | dealer value 24
player value 18 | dealer value 18
player value 19 | dealer value 22
player value 18 | dealer value 20
player value 15 | dealer value 17
player value 16 | dealer value 18
player value 19 | dealer value 22
player value 19 | dealer value 19
player value 24 | dealer value 21
player value 23 | dealer value 18
player value 1

player value 22 | dealer value 18
player value 21 | dealer value 17
player value 18 | dealer value 20
player value 22 | dealer value 19
player value 21 | dealer value 20
player value 20 | dealer value 20
player value 20 | dealer value 25
player value 20 | dealer value 17
player value 21 | dealer value 18
player value 18 | dealer value 23
player value 27 | dealer value 22
player value 17 | dealer value 17
player value 24 | dealer value 17
player value 18 | dealer value 19
player value 19 | dealer value 18
player value 18 | dealer value 22
player value 18 | dealer value 23
player value 22 | dealer value 20
player value 16 | dealer value 24
player value 24 | dealer value 19
player value 26 | dealer value 23
player value 20 | dealer value 20
player value 18 | dealer value 24
player value 20 | dealer value 25
player value 21 | dealer value 20
player value 24 | dealer value 17
player value 13 | dealer value 17
player value 17 | dealer value 26
player value 27 | dealer value 17
player value 1

player value 14 | dealer value 26
player value 20 | dealer value 22
player value 17 | dealer value 19
player value 20 | dealer value 18
player value 22 | dealer value 18
player value 13 | dealer value 17
player value 23 | dealer value 24
player value 26 | dealer value 17
player value 19 | dealer value 26
player value 20 | dealer value 18
player value 26 | dealer value 21
player value 17 | dealer value 22
player value 22 | dealer value 18
player value 20 | dealer value 26
player value 23 | dealer value 19
player value 22 | dealer value 20
player value 21 | dealer value 24
player value 18 | dealer value 21
player value 12 | dealer value 22
player value 23 | dealer value 17
player value 20 | dealer value 24
player value 12 | dealer value 26
player value 19 | dealer value 19
player value 28 | dealer value 22
player value 19 | dealer value 26
player value 18 | dealer value 19
player value 20 | dealer value 20
player value 14 | dealer value 18
player value 17 | dealer value 19
player value 2

player value 17 | dealer value 22
player value 17 | dealer value 17
player value 17 | dealer value 22
player value 12 | dealer value 24
player value 26 | dealer value 19
player value 12 | dealer value 18
player value 12 | dealer value 19
player value 16 | dealer value 22
player value 19 | dealer value 17
player value 16 | dealer value 17
player value 17 | dealer value 18
player value 25 | dealer value 22
player value 23 | dealer value 20
player value 22 | dealer value 25
player value 16 | dealer value 20
player value 20 | dealer value 23
player value 24 | dealer value 17
player value 25 | dealer value 24
player value 20 | dealer value 18
player value 19 | dealer value 18
player value 30 | dealer value 17
player value 21 | dealer value 24
player value 19 | dealer value 25
player value 15 | dealer value 17
player value 19 | dealer value 21
player value 17 | dealer value 20
player value 16 | dealer value 21
player value 19 | dealer value 20
player value 17 | dealer value 22
player value 1

player value 18 | dealer value 19
player value 22 | dealer value 17
player value 29 | dealer value 25
player value 27 | dealer value 18
player value 20 | dealer value 26
player value 13 | dealer value 17
player value 22 | dealer value 24
player value 19 | dealer value 26
player value 22 | dealer value 20
player value 20 | dealer value 17
player value 23 | dealer value 20
player value 20 | dealer value 17
player value 24 | dealer value 19
player value 26 | dealer value 17
player value 18 | dealer value 19
player value 20 | dealer value 18
player value 20 | dealer value 17
player value 24 | dealer value 25
player value 20 | dealer value 18
player value 17 | dealer value 20
player value 26 | dealer value 22
player value 23 | dealer value 21
player value 24 | dealer value 17
player value 20 | dealer value 20
player value 24 | dealer value 19
player value 21 | dealer value 19
player value 24 | dealer value 18
player value 26 | dealer value 18
player value 17 | dealer value 21
player value 2

player value 16 | dealer value 18
player value 20 | dealer value 19
player value 17 | dealer value 22
player value 23 | dealer value 19
player value 21 | dealer value 19
player value 19 | dealer value 25
player value 20 | dealer value 20
player value 26 | dealer value 22
player value 20 | dealer value 23
player value 13 | dealer value 20
player value 17 | dealer value 24
player value 17 | dealer value 23
player value 20 | dealer value 18
player value 18 | dealer value 22
player value 22 | dealer value 17
player value 18 | dealer value 22
player value 16 | dealer value 21
player value 12 | dealer value 21
player value 28 | dealer value 17
player value 16 | dealer value 19
player value 19 | dealer value 19
player value 19 | dealer value 25
player value 14 | dealer value 22
player value 18 | dealer value 25
player value 15 | dealer value 20
player value 18 | dealer value 25
player value 12 | dealer value 17
player value 16 | dealer value 24
player value 22 | dealer value 25
player value 1

player value 22 | dealer value 17
player value 16 | dealer value 18
player value 27 | dealer value 20
player value 20 | dealer value 17
player value 19 | dealer value 23
player value 23 | dealer value 19
player value 19 | dealer value 19
player value 16 | dealer value 25
player value 19 | dealer value 21
player value 22 | dealer value 22
player value 12 | dealer value 22
player value 12 | dealer value 18
player value 18 | dealer value 21
player value 12 | dealer value 17
player value 24 | dealer value 20
player value 28 | dealer value 21
player value 21 | dealer value 22
player value 19 | dealer value 20
player value 18 | dealer value 18
player value 17 | dealer value 23
player value 20 | dealer value 21
player value 12 | dealer value 17
player value 17 | dealer value 20
player value 16 | dealer value 17
player value 19 | dealer value 17
player value 24 | dealer value 20
player value 17 | dealer value 24
player value 20 | dealer value 26
player value 19 | dealer value 23
player value 1

player value 22 | dealer value 20
player value 15 | dealer value 20
player value 18 | dealer value 18
player value 19 | dealer value 21
player value 21 | dealer value 20
player value 19 | dealer value 19
player value 12 | dealer value 18
player value 15 | dealer value 18
player value 19 | dealer value 17
player value 17 | dealer value 18
player value 17 | dealer value 17
player value 21 | dealer value 17
player value 13 | dealer value 19
player value 22 | dealer value 18
player value 19 | dealer value 24
player value 18 | dealer value 26
player value 17 | dealer value 20
player value 21 | dealer value 22
player value 17 | dealer value 18
player value 16 | dealer value 19
player value 19 | dealer value 17
player value 13 | dealer value 22
player value 19 | dealer value 18
player value 17 | dealer value 25
player value 21 | dealer value 18
player value 21 | dealer value 24
player value 24 | dealer value 17
player value 14 | dealer value 18
player value 18 | dealer value 20
player value 1

player value 23 | dealer value 20
player value 19 | dealer value 21
player value 23 | dealer value 19
player value 20 | dealer value 17
player value 25 | dealer value 18
player value 14 | dealer value 22
player value 23 | dealer value 25
player value 17 | dealer value 19
player value 25 | dealer value 26
player value 21 | dealer value 23
player value 20 | dealer value 18
player value 17 | dealer value 21
player value 18 | dealer value 24
player value 19 | dealer value 24
player value 28 | dealer value 22
player value 17 | dealer value 20
player value 18 | dealer value 23
player value 16 | dealer value 26
player value 19 | dealer value 17
player value 19 | dealer value 18
player value 15 | dealer value 19
player value 16 | dealer value 20
player value 21 | dealer value 19
player value 23 | dealer value 17
player value 17 | dealer value 26
player value 25 | dealer value 20
player value 18 | dealer value 18
player value 24 | dealer value 19
player value 15 | dealer value 22
player value 1

player value 17 | dealer value 20
player value 16 | dealer value 23
player value 14 | dealer value 24
player value 19 | dealer value 18
player value 18 | dealer value 20
player value 22 | dealer value 19
player value 16 | dealer value 26
player value 23 | dealer value 18
player value 15 | dealer value 21
player value 27 | dealer value 21
player value 23 | dealer value 19
player value 19 | dealer value 20
player value 19 | dealer value 20
player value 17 | dealer value 20
player value 21 | dealer value 21
player value 19 | dealer value 22
player value 21 | dealer value 23
player value 15 | dealer value 25
player value 16 | dealer value 20
player value 20 | dealer value 17
player value 20 | dealer value 23
player value 12 | dealer value 18
player value 26 | dealer value 19
player value 18 | dealer value 21
player value 23 | dealer value 24
player value 17 | dealer value 19
player value 26 | dealer value 19
player value 20 | dealer value 19
player value 21 | dealer value 18
player value 2

player value 25 | dealer value 24
player value 21 | dealer value 20
player value 28 | dealer value 18
player value 19 | dealer value 18
player value 21 | dealer value 20
player value 20 | dealer value 21
player value 18 | dealer value 17
player value 14 | dealer value 26
player value 27 | dealer value 24
player value 20 | dealer value 23
player value 22 | dealer value 18
player value 14 | dealer value 18
player value 17 | dealer value 17
player value 19 | dealer value 20
player value 17 | dealer value 19
player value 20 | dealer value 19
player value 30 | dealer value 21
player value 20 | dealer value 20
player value 21 | dealer value 20
player value 14 | dealer value 26
player value 26 | dealer value 23
player value 24 | dealer value 17
player value 19 | dealer value 18
player value 31 | dealer value 25
player value 18 | dealer value 18
player value 23 | dealer value 20
player value 15 | dealer value 20
player value 24 | dealer value 19
player value 26 | dealer value 17
player value 1

player value 18 | dealer value 22
player value 12 | dealer value 24
player value 23 | dealer value 23
player value 22 | dealer value 19
player value 22 | dealer value 20
player value 21 | dealer value 18
player value 22 | dealer value 17
player value 25 | dealer value 19
player value 15 | dealer value 21
player value 20 | dealer value 23
player value 24 | dealer value 17
player value 12 | dealer value 22
player value 19 | dealer value 23
player value 17 | dealer value 22
player value 19 | dealer value 18
player value 23 | dealer value 21
player value 26 | dealer value 26
player value 16 | dealer value 22
player value 27 | dealer value 20
player value 27 | dealer value 26
player value 12 | dealer value 19
player value 20 | dealer value 18
player value 16 | dealer value 23
player value 12 | dealer value 26
player value 16 | dealer value 17
player value 20 | dealer value 17
player value 15 | dealer value 19
player value 17 | dealer value 20
player value 15 | dealer value 19
player value 1

player value 20 | dealer value 25
player value 19 | dealer value 19
player value 18 | dealer value 19
player value 20 | dealer value 19
player value 23 | dealer value 25
player value 18 | dealer value 17
player value 27 | dealer value 20
player value 17 | dealer value 19
player value 14 | dealer value 18
player value 13 | dealer value 24
player value 20 | dealer value 19
player value 16 | dealer value 23
player value 19 | dealer value 25
player value 21 | dealer value 17
player value 22 | dealer value 20
player value 22 | dealer value 20
player value 17 | dealer value 17
player value 22 | dealer value 17
player value 24 | dealer value 25
player value 19 | dealer value 20
player value 21 | dealer value 17
player value 20 | dealer value 21
player value 18 | dealer value 18
player value 20 | dealer value 22
player value 26 | dealer value 19
player value 13 | dealer value 18
player value 21 | dealer value 19
player value 22 | dealer value 24
player value 14 | dealer value 20
player value 2

player value 18 | dealer value 20
player value 20 | dealer value 25
player value 29 | dealer value 20
player value 25 | dealer value 19
player value 21 | dealer value 20
player value 18 | dealer value 19
player value 25 | dealer value 19
player value 20 | dealer value 26
player value 23 | dealer value 17
player value 19 | dealer value 20
player value 18 | dealer value 19
player value 15 | dealer value 21
player value 17 | dealer value 17
player value 21 | dealer value 24
player value 19 | dealer value 25
player value 20 | dealer value 20
player value 17 | dealer value 25
player value 22 | dealer value 20
player value 19 | dealer value 24
player value 18 | dealer value 20
player value 27 | dealer value 26
player value 19 | dealer value 18
player value 19 | dealer value 17
player value 23 | dealer value 19
player value 17 | dealer value 20
player value 18 | dealer value 20
player value 18 | dealer value 22
player value 20 | dealer value 20
player value 21 | dealer value 21
player value 2

player value 20 | dealer value 17
player value 21 | dealer value 22
player value 25 | dealer value 24
player value 15 | dealer value 17
player value 19 | dealer value 20
player value 18 | dealer value 19
player value 24 | dealer value 20
player value 21 | dealer value 24
player value 12 | dealer value 17
player value 15 | dealer value 24
player value 19 | dealer value 24
player value 20 | dealer value 21
player value 18 | dealer value 22
player value 17 | dealer value 18
player value 23 | dealer value 20
player value 14 | dealer value 17
player value 18 | dealer value 25
player value 18 | dealer value 19
player value 19 | dealer value 21
player value 18 | dealer value 17
player value 17 | dealer value 20
player value 20 | dealer value 23
player value 23 | dealer value 19
player value 16 | dealer value 17
player value 18 | dealer value 20
player value 22 | dealer value 19
player value 19 | dealer value 22
player value 15 | dealer value 19
player value 20 | dealer value 20
player value 2

player value 12 | dealer value 17
player value 21 | dealer value 22
player value 14 | dealer value 17
player value 18 | dealer value 26
player value 24 | dealer value 20
player value 20 | dealer value 17
player value 18 | dealer value 17
player value 17 | dealer value 18
player value 13 | dealer value 18
player value 18 | dealer value 23
player value 15 | dealer value 19
player value 23 | dealer value 20
player value 28 | dealer value 20
player value 25 | dealer value 17
player value 20 | dealer value 17
player value 18 | dealer value 20
player value 16 | dealer value 26
player value 16 | dealer value 20
player value 13 | dealer value 20
player value 23 | dealer value 23
player value 20 | dealer value 20
player value 14 | dealer value 22
player value 18 | dealer value 20
player value 19 | dealer value 19
player value 14 | dealer value 17
player value 19 | dealer value 25
player value 22 | dealer value 26
player value 24 | dealer value 19
player value 20 | dealer value 18
player value 2

player value 13 | dealer value 20
player value 12 | dealer value 17
player value 18 | dealer value 25
player value 19 | dealer value 20
player value 19 | dealer value 22
player value 23 | dealer value 22
player value 23 | dealer value 19
player value 15 | dealer value 18
player value 18 | dealer value 20
player value 17 | dealer value 24
player value 16 | dealer value 19
player value 17 | dealer value 18
player value 19 | dealer value 18
player value 15 | dealer value 20
player value 14 | dealer value 17
player value 23 | dealer value 19
player value 20 | dealer value 17
player value 16 | dealer value 17
player value 18 | dealer value 21
player value 24 | dealer value 18
player value 18 | dealer value 17
player value 14 | dealer value 18
player value 29 | dealer value 17
player value 18 | dealer value 18
player value 24 | dealer value 23
player value 23 | dealer value 18
player value 30 | dealer value 21
player value 24 | dealer value 17
player value 13 | dealer value 20
player value 1

player value 19 | dealer value 18
player value 25 | dealer value 20
round 23000
player value 25 | dealer value 24
player value 23 | dealer value 18
player value 26 | dealer value 20
player value 13 | dealer value 18
player value 16 | dealer value 18
player value 20 | dealer value 26
player value 16 | dealer value 24
player value 24 | dealer value 18
player value 27 | dealer value 19
player value 18 | dealer value 18
player value 19 | dealer value 18
player value 24 | dealer value 22
player value 17 | dealer value 25
player value 17 | dealer value 18
player value 14 | dealer value 22
player value 13 | dealer value 18
player value 20 | dealer value 23
player value 16 | dealer value 26
player value 19 | dealer value 23
player value 20 | dealer value 20
player value 16 | dealer value 23
player value 30 | dealer value 18
player value 21 | dealer value 17
player value 18 | dealer value 22
player value 14 | dealer value 22
player value 16 | dealer value 19
player value 18 | dealer value 23
pl

player value 18 | dealer value 18
player value 25 | dealer value 17
player value 19 | dealer value 19
player value 17 | dealer value 20
player value 21 | dealer value 18
player value 20 | dealer value 19
player value 20 | dealer value 19
player value 17 | dealer value 19
player value 17 | dealer value 18
player value 22 | dealer value 22
player value 24 | dealer value 19
player value 23 | dealer value 22
player value 22 | dealer value 19
player value 17 | dealer value 22
player value 26 | dealer value 19
player value 18 | dealer value 19
player value 20 | dealer value 20
player value 15 | dealer value 17
player value 19 | dealer value 22
player value 27 | dealer value 21
player value 21 | dealer value 18
player value 15 | dealer value 21
player value 23 | dealer value 22
player value 23 | dealer value 18
player value 20 | dealer value 26
player value 18 | dealer value 19
player value 20 | dealer value 18
player value 17 | dealer value 26
player value 19 | dealer value 20
player value 1

player value 12 | dealer value 23
player value 21 | dealer value 19
player value 28 | dealer value 21
player value 12 | dealer value 22
player value 16 | dealer value 20
player value 20 | dealer value 19
player value 17 | dealer value 17
player value 16 | dealer value 19
player value 18 | dealer value 17
player value 14 | dealer value 18
player value 13 | dealer value 17
player value 25 | dealer value 18
player value 17 | dealer value 21
player value 15 | dealer value 17
player value 22 | dealer value 20
player value 21 | dealer value 17
player value 20 | dealer value 25
player value 21 | dealer value 17
player value 19 | dealer value 24
player value 20 | dealer value 22
player value 19 | dealer value 25
player value 19 | dealer value 21
player value 21 | dealer value 19
player value 13 | dealer value 20
player value 19 | dealer value 18
player value 17 | dealer value 21
player value 28 | dealer value 19
player value 19 | dealer value 17
player value 20 | dealer value 18
player value 2

player value 12 | dealer value 22
player value 19 | dealer value 20
player value 15 | dealer value 21
player value 17 | dealer value 24
player value 17 | dealer value 21
player value 18 | dealer value 17
player value 20 | dealer value 19
player value 29 | dealer value 18
player value 13 | dealer value 24
player value 20 | dealer value 18
player value 22 | dealer value 21
player value 25 | dealer value 22
player value 15 | dealer value 20
player value 17 | dealer value 25
player value 19 | dealer value 22
player value 19 | dealer value 17
player value 18 | dealer value 19
player value 22 | dealer value 18
player value 16 | dealer value 20
player value 19 | dealer value 21
player value 20 | dealer value 20
player value 19 | dealer value 21
player value 15 | dealer value 21
player value 22 | dealer value 21
player value 20 | dealer value 18
player value 15 | dealer value 23
player value 24 | dealer value 20
player value 26 | dealer value 25
player value 23 | dealer value 24
player value 2

player value 16 | dealer value 22
player value 25 | dealer value 24
player value 19 | dealer value 20
player value 18 | dealer value 17
player value 17 | dealer value 18
player value 23 | dealer value 25
player value 18 | dealer value 20
player value 21 | dealer value 18
player value 21 | dealer value 23
player value 24 | dealer value 20
player value 18 | dealer value 18
player value 23 | dealer value 25
player value 14 | dealer value 18
player value 16 | dealer value 22
round 28000
player value 19 | dealer value 20
player value 19 | dealer value 23
player value 17 | dealer value 24
player value 15 | dealer value 24
player value 19 | dealer value 20
player value 17 | dealer value 17
player value 20 | dealer value 20
player value 21 | dealer value 24
player value 20 | dealer value 25
player value 18 | dealer value 18
player value 14 | dealer value 20
player value 21 | dealer value 18
player value 18 | dealer value 19
player value 17 | dealer value 18
player value 19 | dealer value 20
pl

player value 21 | dealer value 25
player value 16 | dealer value 17
player value 14 | dealer value 18
player value 22 | dealer value 21
player value 18 | dealer value 17
player value 17 | dealer value 18
player value 19 | dealer value 19
player value 24 | dealer value 20
player value 25 | dealer value 26
player value 12 | dealer value 20
player value 22 | dealer value 26
player value 20 | dealer value 17
player value 30 | dealer value 23
player value 15 | dealer value 26
player value 21 | dealer value 21
player value 17 | dealer value 20
player value 19 | dealer value 25
player value 18 | dealer value 17
player value 22 | dealer value 20
player value 18 | dealer value 24
player value 18 | dealer value 21
player value 30 | dealer value 18
player value 18 | dealer value 20
player value 20 | dealer value 23
player value 16 | dealer value 24
player value 21 | dealer value 18
player value 20 | dealer value 25
player value 20 | dealer value 19
player value 21 | dealer value 21
player value 1

player value 12 | dealer value 19
player value 16 | dealer value 26
player value 25 | dealer value 22
player value 25 | dealer value 18
player value 20 | dealer value 22
player value 21 | dealer value 21
player value 17 | dealer value 20
player value 15 | dealer value 19
player value 20 | dealer value 20
player value 29 | dealer value 21
player value 19 | dealer value 20
player value 16 | dealer value 17
player value 18 | dealer value 22
player value 13 | dealer value 20
player value 20 | dealer value 23
player value 13 | dealer value 20
player value 19 | dealer value 17
player value 19 | dealer value 25
player value 18 | dealer value 21
player value 16 | dealer value 17
player value 26 | dealer value 24
player value 14 | dealer value 24
player value 15 | dealer value 22
player value 28 | dealer value 20
player value 13 | dealer value 17
player value 23 | dealer value 23
player value 20 | dealer value 26
player value 24 | dealer value 18
player value 20 | dealer value 25
player value 2

player value 24 | dealer value 23
player value 19 | dealer value 21
player value 13 | dealer value 24
player value 19 | dealer value 25
player value 14 | dealer value 26
player value 14 | dealer value 25
player value 20 | dealer value 17
player value 22 | dealer value 19
player value 14 | dealer value 17
player value 27 | dealer value 20
player value 20 | dealer value 17
player value 20 | dealer value 20
player value 15 | dealer value 20
player value 21 | dealer value 17
player value 21 | dealer value 20
player value 25 | dealer value 20
player value 18 | dealer value 22
player value 14 | dealer value 19
player value 28 | dealer value 26
player value 16 | dealer value 22
player value 24 | dealer value 23
player value 21 | dealer value 23
player value 20 | dealer value 17
player value 20 | dealer value 20
round 32000
player value 16 | dealer value 26
player value 20 | dealer value 20
player value 20 | dealer value 20
player value 21 | dealer value 25
player value 22 | dealer value 25
pl

player value 24 | dealer value 17
player value 21 | dealer value 26
player value 14 | dealer value 17
player value 21 | dealer value 20
player value 17 | dealer value 20
player value 21 | dealer value 23
player value 20 | dealer value 23
player value 21 | dealer value 19
player value 18 | dealer value 18
player value 19 | dealer value 21
player value 20 | dealer value 20
player value 20 | dealer value 25
player value 17 | dealer value 26
player value 20 | dealer value 22
player value 21 | dealer value 17
player value 23 | dealer value 18
player value 16 | dealer value 18
player value 18 | dealer value 21
player value 19 | dealer value 22
player value 21 | dealer value 18
player value 29 | dealer value 17
player value 23 | dealer value 22
player value 22 | dealer value 22
player value 20 | dealer value 21
player value 20 | dealer value 21
player value 16 | dealer value 24
player value 19 | dealer value 19
player value 16 | dealer value 24
player value 28 | dealer value 18
player value 2

player value 21 | dealer value 25
player value 20 | dealer value 18
player value 20 | dealer value 22
player value 14 | dealer value 20
player value 18 | dealer value 17
player value 20 | dealer value 26
player value 23 | dealer value 17
player value 20 | dealer value 21
player value 18 | dealer value 18
player value 19 | dealer value 17
player value 19 | dealer value 24
player value 25 | dealer value 17
player value 17 | dealer value 23
player value 18 | dealer value 17
player value 18 | dealer value 19
player value 19 | dealer value 17
player value 27 | dealer value 17
player value 16 | dealer value 20
player value 21 | dealer value 21
player value 25 | dealer value 18
player value 17 | dealer value 23
player value 21 | dealer value 21
player value 21 | dealer value 23
player value 14 | dealer value 25
player value 19 | dealer value 26
player value 14 | dealer value 18
player value 23 | dealer value 17
player value 21 | dealer value 21
player value 18 | dealer value 21
player value 1

player value 22 | dealer value 26
player value 18 | dealer value 17
player value 15 | dealer value 20
player value 24 | dealer value 25
player value 23 | dealer value 18
player value 20 | dealer value 20
player value 21 | dealer value 17
player value 20 | dealer value 20
player value 13 | dealer value 18
player value 20 | dealer value 19
player value 19 | dealer value 24
player value 25 | dealer value 19
player value 22 | dealer value 18
player value 22 | dealer value 24
player value 21 | dealer value 20
player value 20 | dealer value 20
player value 13 | dealer value 17
player value 20 | dealer value 17
player value 19 | dealer value 19
player value 21 | dealer value 18
player value 15 | dealer value 25
player value 24 | dealer value 17
player value 17 | dealer value 19
player value 13 | dealer value 25
player value 21 | dealer value 23
player value 26 | dealer value 25
player value 13 | dealer value 18
player value 16 | dealer value 18
player value 15 | dealer value 22
player value 2

player value 28 | dealer value 26
player value 17 | dealer value 26
player value 25 | dealer value 20
player value 18 | dealer value 22
player value 20 | dealer value 19
player value 21 | dealer value 20
player value 22 | dealer value 22
player value 21 | dealer value 22
player value 20 | dealer value 19
player value 18 | dealer value 23
player value 18 | dealer value 20
player value 22 | dealer value 21
player value 23 | dealer value 21
player value 21 | dealer value 20
player value 26 | dealer value 22
player value 20 | dealer value 19
player value 18 | dealer value 18
player value 18 | dealer value 18
player value 18 | dealer value 20
player value 19 | dealer value 21
player value 22 | dealer value 19
player value 26 | dealer value 18
player value 12 | dealer value 19
player value 17 | dealer value 24
player value 13 | dealer value 21
player value 18 | dealer value 25
player value 24 | dealer value 18
player value 18 | dealer value 25
player value 19 | dealer value 23
player value 2

player value 16 | dealer value 25
player value 25 | dealer value 22
player value 13 | dealer value 20
player value 19 | dealer value 20
player value 20 | dealer value 20
player value 20 | dealer value 25
player value 22 | dealer value 21
player value 23 | dealer value 22
player value 19 | dealer value 26
player value 17 | dealer value 18
player value 26 | dealer value 17
player value 21 | dealer value 17
player value 23 | dealer value 24
player value 20 | dealer value 25
player value 22 | dealer value 21
player value 22 | dealer value 19
player value 19 | dealer value 20
player value 25 | dealer value 20
player value 22 | dealer value 24
player value 21 | dealer value 18
player value 19 | dealer value 20
player value 20 | dealer value 19
player value 23 | dealer value 18
player value 19 | dealer value 25
player value 27 | dealer value 20
player value 17 | dealer value 20
player value 20 | dealer value 18
player value 21 | dealer value 22
player value 20 | dealer value 21
player value 1

player value 25 | dealer value 24
player value 20 | dealer value 23
player value 20 | dealer value 17
player value 27 | dealer value 19
player value 20 | dealer value 17
player value 24 | dealer value 20
player value 26 | dealer value 25
player value 18 | dealer value 20
player value 20 | dealer value 24
player value 19 | dealer value 19
player value 21 | dealer value 17
player value 25 | dealer value 21
player value 23 | dealer value 17
player value 24 | dealer value 20
player value 26 | dealer value 25
player value 28 | dealer value 24
player value 19 | dealer value 17
player value 20 | dealer value 22
player value 18 | dealer value 20
player value 18 | dealer value 24
player value 18 | dealer value 20
player value 29 | dealer value 22
player value 14 | dealer value 24
player value 18 | dealer value 17
player value 13 | dealer value 24
player value 31 | dealer value 20
player value 17 | dealer value 17
player value 20 | dealer value 21
player value 30 | dealer value 18
player value 2

player value 21 | dealer value 19
player value 12 | dealer value 18
player value 20 | dealer value 25
player value 18 | dealer value 19
player value 28 | dealer value 19
player value 20 | dealer value 20
player value 18 | dealer value 22
player value 27 | dealer value 23
player value 20 | dealer value 17
player value 20 | dealer value 21
player value 16 | dealer value 20
player value 22 | dealer value 18
player value 26 | dealer value 25
player value 15 | dealer value 25
player value 19 | dealer value 17
player value 26 | dealer value 20
player value 19 | dealer value 17
player value 20 | dealer value 20
player value 25 | dealer value 19
player value 18 | dealer value 18
player value 24 | dealer value 23
player value 18 | dealer value 17
player value 15 | dealer value 20
player value 20 | dealer value 20
player value 19 | dealer value 18
player value 19 | dealer value 20
player value 17 | dealer value 24
player value 13 | dealer value 17
player value 15 | dealer value 19
player value 2

player value 19 | dealer value 19
player value 20 | dealer value 19
player value 25 | dealer value 18
player value 22 | dealer value 17
player value 20 | dealer value 18
player value 15 | dealer value 22
player value 26 | dealer value 21
player value 19 | dealer value 21
player value 15 | dealer value 21
player value 16 | dealer value 19
player value 25 | dealer value 23
player value 26 | dealer value 26
player value 18 | dealer value 25
player value 14 | dealer value 21
player value 21 | dealer value 19
player value 12 | dealer value 17
player value 23 | dealer value 20
player value 24 | dealer value 24
player value 12 | dealer value 25
player value 13 | dealer value 22
player value 21 | dealer value 25
player value 14 | dealer value 19
player value 27 | dealer value 19
player value 25 | dealer value 20
player value 13 | dealer value 21
player value 20 | dealer value 25
player value 20 | dealer value 26
player value 23 | dealer value 18
player value 18 | dealer value 18
player value 2

player value 23 | dealer value 17
player value 16 | dealer value 21
player value 25 | dealer value 22
player value 13 | dealer value 18
player value 19 | dealer value 23
player value 23 | dealer value 21
player value 20 | dealer value 26
player value 15 | dealer value 19
player value 21 | dealer value 19
player value 23 | dealer value 18
player value 19 | dealer value 26
player value 15 | dealer value 22
player value 27 | dealer value 23
player value 27 | dealer value 19
player value 21 | dealer value 21
player value 23 | dealer value 18
player value 17 | dealer value 21
player value 17 | dealer value 26
player value 16 | dealer value 18
player value 18 | dealer value 21
player value 16 | dealer value 18
player value 30 | dealer value 18
player value 12 | dealer value 20
player value 12 | dealer value 17
player value 24 | dealer value 22
player value 25 | dealer value 20
player value 22 | dealer value 21
player value 21 | dealer value 19
player value 21 | dealer value 26
player value 1

player value 18 | dealer value 25
player value 20 | dealer value 20
player value 15 | dealer value 17
player value 19 | dealer value 17
player value 20 | dealer value 24
player value 14 | dealer value 19
player value 21 | dealer value 23
player value 13 | dealer value 26
player value 24 | dealer value 20
player value 22 | dealer value 18
player value 19 | dealer value 19
player value 22 | dealer value 21
player value 13 | dealer value 23
player value 15 | dealer value 19
player value 21 | dealer value 20
player value 16 | dealer value 18
player value 20 | dealer value 19
player value 23 | dealer value 19
player value 17 | dealer value 21
player value 18 | dealer value 20
player value 21 | dealer value 23
player value 21 | dealer value 19
player value 23 | dealer value 23
player value 20 | dealer value 20
player value 20 | dealer value 19
player value 23 | dealer value 17
player value 22 | dealer value 25
player value 22 | dealer value 18
player value 20 | dealer value 24
player value 1

player value 19 | dealer value 17
player value 15 | dealer value 18
player value 14 | dealer value 20
player value 12 | dealer value 19
player value 17 | dealer value 23
player value 21 | dealer value 20
player value 16 | dealer value 26
player value 23 | dealer value 23
player value 22 | dealer value 17
player value 18 | dealer value 21
player value 17 | dealer value 19
player value 19 | dealer value 19
player value 18 | dealer value 17
player value 24 | dealer value 24
player value 22 | dealer value 23
player value 20 | dealer value 23
player value 27 | dealer value 17
player value 22 | dealer value 18
player value 19 | dealer value 20
player value 18 | dealer value 21
player value 12 | dealer value 22
player value 17 | dealer value 23
player value 27 | dealer value 20
player value 23 | dealer value 20
player value 21 | dealer value 20
player value 19 | dealer value 21
player value 24 | dealer value 19
player value 19 | dealer value 19
player value 20 | dealer value 18
player value 1

player value 20 | dealer value 22
player value 16 | dealer value 20
player value 27 | dealer value 23
player value 19 | dealer value 20
player value 12 | dealer value 23
player value 27 | dealer value 20
player value 19 | dealer value 20
player value 18 | dealer value 23
player value 21 | dealer value 23
player value 23 | dealer value 21
player value 14 | dealer value 21
player value 14 | dealer value 19
player value 21 | dealer value 25
player value 23 | dealer value 19
player value 20 | dealer value 25
player value 25 | dealer value 19
player value 26 | dealer value 24
player value 21 | dealer value 17
player value 19 | dealer value 20
player value 27 | dealer value 25
player value 23 | dealer value 20
player value 25 | dealer value 24
player value 19 | dealer value 17
player value 21 | dealer value 22
player value 15 | dealer value 19
player value 18 | dealer value 19
player value 24 | dealer value 18
player value 13 | dealer value 22
player value 17 | dealer value 21
player value 1

player value 17 | dealer value 25
player value 21 | dealer value 17
player value 19 | dealer value 24
player value 25 | dealer value 19
player value 24 | dealer value 20
player value 23 | dealer value 23
player value 17 | dealer value 20
player value 23 | dealer value 21
player value 17 | dealer value 21
player value 14 | dealer value 21
player value 18 | dealer value 25
player value 25 | dealer value 18
player value 16 | dealer value 22
player value 18 | dealer value 20
player value 17 | dealer value 20
player value 15 | dealer value 20
player value 18 | dealer value 23
player value 17 | dealer value 19
player value 25 | dealer value 22
player value 24 | dealer value 26
player value 20 | dealer value 19
player value 20 | dealer value 23
player value 26 | dealer value 26
player value 19 | dealer value 21
player value 18 | dealer value 22
player value 26 | dealer value 20
player value 26 | dealer value 22
player value 22 | dealer value 18
player value 26 | dealer value 17
player value 2

player value 19 | dealer value 18
player value 24 | dealer value 22
player value 21 | dealer value 21
player value 21 | dealer value 19
player value 20 | dealer value 21
player value 20 | dealer value 19
player value 12 | dealer value 21
player value 20 | dealer value 18
player value 21 | dealer value 24
player value 16 | dealer value 19
player value 18 | dealer value 22
player value 23 | dealer value 26
player value 22 | dealer value 18
player value 18 | dealer value 22
player value 22 | dealer value 21
player value 23 | dealer value 21
player value 19 | dealer value 17
player value 15 | dealer value 20
player value 15 | dealer value 19
player value 18 | dealer value 20
player value 18 | dealer value 20
player value 20 | dealer value 17
player value 20 | dealer value 21
player value 27 | dealer value 18
player value 16 | dealer value 23
player value 14 | dealer value 21
player value 21 | dealer value 18
player value 21 | dealer value 24
player value 18 | dealer value 23
player value 2

In [120]:
b.savePolicy()

In [122]:
a = b.playWithDealer(rounds=1000)
a

array([444., 162., 394.])

In [123]:
b.player_Q_Values

{(12, 1, True): {1: -0.3131827294507537, 0: -0.40951000000000004},
 (12, 1, False): {1: -0.26146825003634905, 0: -0.3784314005059214},
 (12, 2, True): {1: -0.25777103690130276, 0: -0.21526606900000003},
 (12, 2, False): {1: -0.2081312988752082, 0: -0.40814001661680294},
 (12, 3, True): {1: -0.08571918534892821, 0: -0.008776440100000032},
 (12, 3, False): {1: -0.23240315065384048, 0: -0.31942181338655146},
 (12, 4, True): {1: -0.036448741211515374, 0: 0.03492382391000001},
 (12, 4, False): {1: -0.20958709433004855, 0: -0.45585925821267553},
 (12, 5, True): {1: -0.2066741786047393, 0: -0.3013301179},
 (12, 5, False): {1: -0.1654589254993991, 0: -0.009656454559446692},
 (12, 6, True): {1: -0.22434132315013167, 0: 0.13279858815418996},
 (12, 6, False): {1: -0.1970939379485161, 0: -0.2095287458521104},
 (12, 7, True): {1: -0.0902903266551243, 0: -0.3597031},
 (12, 7, False): {1: -0.2938421683498162, 0: -0.32601905266908704},
 (12, 8, True): {1: -0.246269933151668, 0: -0.27829000000000004},
