### MC Simulation BlackJack State-Value
---
- dealer policy: HIT17
- player policy: sticks on 20 or 21, otherwise hit
---
**States:** player's current sum(12–21), the dealer’s one showing card (ace–10), player's usable ace

In [6]:
import numpy as np

In [80]:
class BlackJackMC(object):
    
    def __init__(self):
        self.player_state_value = {}
        self.player_states = []
        
    
    # give card
    @staticmethod
    def giveCard():
        # 1 stands for ace
        c_list = list(range(1, 11)) + [10, 10, 10]
        return np.random.choice(c_list)
    
    def dealerPolicy(self, current_value, usable_ace, is_end):
        if current_value > 21:
            if usable_ace:
                current_value -= 10
            else:
                return current_value, usable_ace, True
        # HIT17
        if current_value >= 17:
            return current_value, usable_ace, True
        else:
            card = giveCard()
            if card == 1:
                if current_value <= 10:
                    return current_value+11, True, False
                return current_value+1, usable_ace, False
            else:
                return current_value+card, usable_ace, False
            
                        
    def playerPolicy(self, current_value, usable_ace, is_end):
        if current_value > 21:
            if usable_ace:
                current_value -= 10
            else:
                return current_value, usable_ace, True
        # HIT17
        if current_value >= 20:
            return current_value, usable_ace, True
        else:
            card = giveCard()
            if card == 1:
                if current_value <= 10:
                    return current_value+11, True, False
                return current_value+1, usable_ace, False
            else:
                return current_value+card, usable_ace, False
            
    
    def _giveCredit(self, player_value, dealer_value, is_end=True):
        if is_end:
            if player_value > 21:
                if dealer_value > 21:
                    # draw
                    pass
                else:
                    for s in self.player_states:
                        self.player_state_value[s] -= 1
            else:
                if dealer_value > 21:
                    for s in self.player_states:
                        self.player_state_value[s] += 1
                else:
                    if player_value < dealer_value:
                        for s in self.player_states:
                            self.player_state_value[s] -= 1
                    else:
                        for s in self.player_states:
                            self.player_state_value[s] += 1
      
    
    def play(self, rounds=1000):
        # hit 2 cards each
        dealer_value, player_value = 0, 0
        show_card = 0
        
        # give dealer 2 cards and show 1
        dealer_value += giveCard()
        show_card = dealer_value
        dealer_value += giveCard()
        
        # player's turn
        # always hit if less than 12
        usable_ace, is_end = False, False
        while True:
            player_value, usable_ace, is_end = self.playerPolicy(player_value, usable_ace, is_end)
            
            if is_end: 
                break
            # when value goes higher than 12, record states
            if (player_value >= 12) and (player_value <= 21):
                self.player_states.append((player_value, show_card, usable_ace))
        print("player card sum", player_value)
        
        # dealer's turn
        usable_ace, is_end = False, False
        while not is_end:
            dealer_value, usable_ace, is_end = self.dealerPolicy(dealer_value, usable_ace, is_end)
        print("dealer card sum", dealer_value)
        
        # judge winner
        for s in self.player_states:
            self.player_state_value[s] = 0 if self.player_state_value.get(s) is None else self.player_state_value.get(s)
        
        self._giveCredit(player_value, dealer_value)      

In [83]:
b = BlackJackMC()
b.play()

player card sum 25
dealer card sum 18


In [84]:
b.player_states

[(12, 10, False), (16, 10, False)]

In [76]:
b.player_state_value

{(15, 4, False): 1, (20, 4, False): 2}