## 计算21点游戏 （Black Jack）中玩家赢的概率。输的概率，以及获得平局的概率

有一名庄家（dealer）和一名玩家（player）参与21点游戏，假设指派的数量是是无尽的，或者抽出的纸牌被重新放回，保证装甲和玩家不可通过记录已发的牌面来猜测本局21点游戏中可能的牌面。

游戏开始时，庄稼给自己和玩家分别发两张牌，玩家的两张牌均为暗牌（正面朝下），庄家的两张牌中有一张暗牌（正面朝下），有一张明牌（正面朝上）。在21点游戏中，牌面对应的点数如下： 

牌面|2|3|4|5|6|7|8|9|10| J| Q| K|A|
:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:
点数|2|3|4|5|6|7|8|9|10|10|10|10|11或1|

庄家和玩家的目标都是使自己手上的牌面总点数（score）尽量接近于21，但不超过21。如果输了，奖励为-1；如果赢了，奖励为+1；如果平局，奖励为0。

梳理一下21点游戏的整个流程：

1） 玩家获得两张牌，判断是否是黑杰克；判断是否爆炸；计算手上牌面总点数，根据自身策略，判断是否要牌；计算玩家的牌面总点数。

2） 庄家获得两张牌，判断是否是黑杰克；判断是否爆炸；计算手上牌面总点数，根据资深策略，判断是否要牌；计算庄家的牌面总点数。

In [1]:
import numpy as np
import random

def is_burst(score):
    burst_flag = False
    if score > 21:
        burst_flag = True
    return burst_flag

def is_usable_a(hand): # hand为手上牌面列表
    return 1 in hand and sum(hand) + 10 <= 21

def is_nature(hand):
    nature = False
    if is_usable_a(hand) and sum(hand) + 10 == 21:
        nature = True
    return nature

def rule(hand):  
    nature = False
    if is_usable_a(hand):
        hand_point = sum(hand) + 10
    else:
        hand_point = sum(hand)
    burst_flag = is_burst(hand_point)
    if burst_flag:
        hand_point = 0
    else:
        if is_usable_a(hand):
            nature = is_nature(hand)
            if nature:
                hand_point = 21
    return burst_flag, nature, hand_point

def dealing(n):
    # A, 2~10, J/Q/K
    card_point = list(range(1, 11)) + [10, 10, 10]
    return random.sample(card_point, n)

def policy(n):
    hand = dealing(2)
    hand_point = 0
    nature = False
    (burst_flag, nature, hand_point) = rule(hand)
    while hand_point < n:
        if burst_flag:
            break
        hand = hand + dealing(1)
        (burst_flag, nature, hand_point) = rule(hand)
    print(hand)
    return nature, hand_point

def play():
    reward = 0
    (player_nature, player_hand_point) = policy(20)
    print(f'player nature is {player_nature}')
    print(f'player score is {player_hand_point}')
    (dealer_nature, dealer_hand_point) = policy(17)
    print(f'dealer nature is {dealer_nature}')
    print(f'dealer score is {dealer_hand_point}')
    if player_nature and not dealer_nature:
        reward = 1
    elif not player_nature and dealer_nature:
        reward = -1
    elif player_nature and dealer_nature:
        reward = 0
    else:
        if player_hand_point > dealer_hand_point:
            reward = 1
        elif player_hand_point < dealer_hand_point:
            reward = -1
        else:
            reward = 0
    return reward

if __name__ == "__main__":
    play_times = 10
    returnList = []
    winNum = 0
    loseNum = 0
    drawNum = 0
    for i in range(play_times):
        reward = play()
        print(f"play_time={i + 1},reward={reward}")
        print(' ')
        returnList.append(reward)
        if reward == 1:
            winNum += 1
        elif reward == -1:
            loseNum += 1
        else:
            drawNum += 1
    print(f'win rate is {winNum / play_times}')
    print(f'lose rate is {loseNum / play_times}')
    print(f'draw rate is {drawNum / play_times}')

[10, 3, 10]
player nature is False
player score is 0
[4, 10, 3]
dealer nature is False
dealer score is 17
play_time=1,reward=-1
 
[10, 1]
player nature is True
player score is 21
[5, 4, 10]
dealer nature is False
dealer score is 19
play_time=2,reward=1
 
[9, 1]
player nature is False
player score is 20
[7, 9, 10]
dealer nature is False
dealer score is 0
play_time=3,reward=1
 
[6, 10, 6]
player nature is False
player score is 0
[10, 4, 2, 10]
dealer nature is False
dealer score is 0
play_time=4,reward=0
 
[9, 6, 10]
player nature is False
player score is 0
[10, 6, 10]
dealer nature is False
dealer score is 0
play_time=5,reward=0
 
[9, 6, 10]
player nature is False
player score is 0
[7, 9, 7]
dealer nature is False
dealer score is 0
play_time=6,reward=0
 
[3, 10, 2, 4, 7]
player nature is False
player score is 0
[8, 5, 10]
dealer nature is False
dealer score is 0
play_time=7,reward=0
 
[5, 6, 6, 1, 10]
player nature is False
player score is 0
[10, 7]
dealer nature is False
dealer score i

### 本例中21点游戏的规则与上一题相同，假定一名庄家和一名玩家一起玩了N局21点游戏。 

编程实现N局21点游戏的交互，假设N=10，给出每局的模拟交互序列。

In [2]:
import numpy as np
import random

def is_burst(score):
    burst_flag = False
    if score > 21:
        burst_flag = True
    return burst_flag

def is_usable_a(hand): # hand为手上牌面列表
    if 1 in hand and sum(hand) + 10 <= 21:
        return True
    else:
        return False

def is_nature(hand):
    nature = False
    if is_usable_a(hand) and sum(hand) + 10 == 21:
        nature = True
    return nature

def dealing(n):
    # A, 2~10, J/Q/K
    card_point = list(range(1, 11)) + [10, 10, 10]
    return random.sample(card_point, n)

def rule(hand):  
    nature = False
    a_flag = is_usable_a(hand)
    if a_flag:
        hand_point = sum(hand) + 10
    else:
        hand_point = sum(hand)
    burst_flag = is_burst(hand_point)
    if burst_flag:
        hand_point = 0
    else:
        if a_flag:
            nature = is_nature(hand)
            if nature:
                hand_point = 21
    return burst_flag, a_flag, nature, hand_point

def reset():
    hand = dealing(2)
    (burst_flag, a_flag, nature, hand_point) = rule(hand)
    while hand_point < 12:
        hand = hand + dealing(1)
        (burst_flag, a_flag, nature, hand_point) = rule(hand)
        if burst_flag:
            break
    return hand, burst_flag, a_flag, nature, hand_point
      
def action(hand_point, burst_flag):
    # if player point < 20 and is not burst, return 1, action is hit 
    # else, return 0, action is stop
    if hand_point < 20 and not burst_flag:
        return 1
    else:
        return 0

def cmpScore(player_action, player_nature, dealer_nature, player_hand_point, dealer_hand_point):
    #reward = 0
    if player_action:
        reward = 0
    else:
        if player_nature and not dealer_nature:
            reward = 1
        elif not player_nature and dealer_nature:
             reward = -1
        elif player_nature and dealer_nature:
            reward = 0
        else:
            if player_hand_point > dealer_hand_point:
                reward = 1
            elif player_hand_point < dealer_hand_point:
                reward = -1
            else:
                reward = 0
    return reward

def episode():
    dealer_hand, dealer_burst_flag, dealer_a_flag, dealer_nature, dealer_hand_point = reset()
    player_hand, player_burst_flag, player_a_flag, player_nature, player_hand_point = reset()

    while dealer_hand_point < 17 and not dealer_burst_flag:
        dealer_hand = dealer_hand + dealing(1)
        (dealer_burst_flag, dealer_a_flag, dealer_nature, dealer_hand_point) = rule(dealer_hand)
        
    state = (dealer_hand[0], player_hand_point, is_usable_a(player_hand))
    player_action = action(player_hand_point, player_burst_flag)
    reward = cmpScore(player_action, player_nature, dealer_nature, player_hand_point, dealer_hand_point)
    epiRecord = []
    
    if not player_action:
        epiRecord = epiRecord + [state, player_action, reward]
    else:
        while player_action:
            epiRecord = epiRecord + [state, player_action]
            player_hand = player_hand + dealing(1)
            (player_burst_flag, player_a_flag, player_nature, player_hand_point) = rule(player_hand)
            state = (dealer_hand[0], player_hand_point, player_a_flag)
            player_action = action(player_hand_point, player_burst_flag)
            reward = cmpScore(player_action, player_nature, dealer_nature, player_hand_point, dealer_hand_point)
            epiRecord = epiRecord + [reward]
    
    print(f'player_hand = {player_hand}')
    print(f'dealer_hand = {dealer_hand}')
    print(f'episode = {epiRecord}')  
    
if __name__ == "__main__":
    play_times = 10
    
    for i in range(play_times):
        episode()

player_hand = [5, 10, 10]
dealer_hand = [10, 4, 8]
episode = [(10, 15, False), 1, 0]
player_hand = [10, 10]
dealer_hand = [2, 4, 2, 2, 5, 10]
episode = [(2, 20, False), 0, 1]
player_hand = [9, 1]
dealer_hand = [7, 8, 6]
episode = [(7, 20, True), 0, -1]
player_hand = [7, 10, 4]
dealer_hand = [3, 10, 6]
episode = [(3, 17, False), 1, 1]
player_hand = [6, 2, 6, 5, 4]
dealer_hand = [10, 10]
episode = [(10, 14, False), 1, 0, (10, 19, False), 1, -1]
player_hand = [7, 4, 8, 10]
dealer_hand = [3, 7, 8]
episode = [(3, 19, False), 1, -1]
player_hand = [3, 1, 6]
dealer_hand = [10, 4, 10]
episode = [(10, 14, True), 1, 1]
player_hand = [10, 3, 10]
dealer_hand = [6, 10, 2]
episode = [(6, 13, False), 1, -1]
player_hand = [2, 10, 7, 9]
dealer_hand = [2, 10, 6]
episode = [(2, 12, False), 1, 0, (2, 19, False), 1, -1]
player_hand = [10, 10]
dealer_hand = [5, 8, 7]
episode = [(5, 20, False), 0, 0]
