In [23]:
import random 
import matplotlib.pyplot as plt

class Deck:
    def __init__(self):
        self.cards = [Card(s, v) for s in ["Spades" , "Clubs" , "Hearts ", 
        "Diamonds"] for v in ["A" ,"2" ,"3" ,"4" ,"5" ,"6" 
        ,"7" ,"8" ,"9" ,"10" , "J" , "Q" , "K"] ]
        
    def shuffle(self):
        if len(self.cards) > 1:
            random.shuffle(self.cards)
    
    def deal(self):
        if len(self.cards) > 1:
            return self.cards.pop(0)
    

class Card: 
    def __init__(self, suit , value): 
        self.suit = suit 
        self.value = value 
    
    def __repr__(self):
        return " of " .join((self.value , self.suit))
    
class Hand:
    def __init__(self , dealer = False):
        self.dealer = dealer 
        self.cards = []
        self.value = 0 
        self.has_ace = False

    def new_card(self, card):
        self.cards.append(card)

    def calculate_card_values(self): #changed this
        self.value = 0 
        self.has_ace = False
        for card in self.cards:
            if card.value.isnumeric():
                self.value += int(card.value)
            else:
                if card.value == "A":
                    self.has_ace = True
                    self.value += 11
                    if self.value > 21:
                        self.value -= 10 
                        self.has_ace = False
                else:
                    self.value += 10
        return self.value
    
#     def has_ace(self):
#         for card in self.cards:
#             if card.value == "A":
#                 return True
#         return False
            
    def show_cards(self, first_state):
        if self.dealer and first_state: 
            print("Hidden")
            print(self.cards[1])
        else:
            for card in self.cards: 
                print(card)
            print("Value:" , self.calculate_card_values())
    
    def return_dealer_card(self):
        card_val = self.cards[1].value
        if card_val.isnumeric():
            return card_val
        else:
            if card_val == "A":
                return 11
            else:
                return 10
            
class Agent:
    def __init__(self):
        pass
    
    def check(self, player_sum, dealer_card, player_has_ace):
        self.player_sum = player_sum
        self.dealer_card = dealer_card
        self.player_has_ace = player_has_ace
        
        if self.player_sum > 21 and self.player_has_ace == True:
            self.player_sum = player_sum - 10
            self.player_has_ace = False
        
        return self.player_sum, self.player_has_ace
        
        
class Game: 
    
    def __init__(self):
        self.reward = 0
        self.num_episodes = 0
    
    def start_game(self):
        
        agent = Agent()
        
        game_ongoing = True

        print("Game Starting \n")
        
        self.deck = Deck()   
        print(self.deck.cards)
        self.deck.shuffle()
        
        self.player_hand = Hand()
        self.dealer_hand = Hand(dealer=True)
        
        game_actions = []

        for i in range(2):
            card = self.deck.deal() 
            self.player_hand.new_card(card)
            card = self.deck.deal()
            self.dealer_hand.new_card(card)

        while game_ongoing == True: 

            print("\nYour hand is:")
            self.player_hand.show_cards()
            print()
            print("Dealer's hand is: ")
            self.dealer_hand.show_cards()

            player_blackjack, dealer_blackjack = self.blackjack_check()

            if player_blackjack or dealer_blackjack:
                game_ongoing = False 
                outcome = self.blackjack_display_results(player_blackjack , dealer_blackjack)
                return outcome, game_actions

#             print("\nPlease choose: ")
#             print("Enter 1 to Hit. ")
#             print("Enter 2 to Stand.")
#             choice = int(input())
#             print("\nYour choice: ", choice)
#             print()
            
#             while (choice != 1) and choice != 2:
#                 choice = int(input("\nPlease enter 1 to Hit or 2 to Stand: "))
            
            player_hand_value = self.player_hand.calculate_card_values()
            dealer_hand_value = self.dealer_hand.calculate_card_values()
            
            agent = Agent()

            choice = reinforment_learning_algorithm(agent, self.player_hand, player_hand_value, self.dealer_hand.return_dealer_card(), self.player_hand.has_ace(), self.deck.cards)
            game_actions.append([(player_hand_value, self.player_hand.has_ace(), dealer_hand_value), choice])
            
            if (choice == 1):
                #player chose to hit
                self.player_hand.new_card(self.deck.deal())
                self.player_hand.show_cards()
                player_hand_value = self.player_hand.calculate_card_values()
                if (player_hand_value > 21):
                    print("\nPlayer has lost !")
                    game_ongoing = False
                    return -1, game_actions

            elif (choice == 2):
                #player chose to stand
                
                # Dealer Policy
                if dealer_hand_value < 17:
                    self.dealer_hand.new_card(self.deck.deal())
                    self.dealer_hand.show_cards()
                    dealer_hand_value = self.dealer_hand.calculate_card_values()
                    if (dealer_hand_value > 21):
                        print("\nDealer has lost !")
                        game_ongoing = False
                        return 1, game_actions

                print("\nFinal Results:")
                print("Player's hand:" , player_hand_value)
                print("Dealer's hand:" , dealer_hand_value)
                
                if player_hand_value < 21 and dealer_hand_value < 21:
                    if (player_hand_value > dealer_hand_value):
                        print("\nPlayer Wins!")
                        return 1, game_actions
                    elif (player_hand_value == dealer_hand_value):
                        print("\nTie!")
                        return 0, game_actions
                    else: 
                        print("\nDealer Wins!")
                        return -1, game_actions
                elif player_hand_value >= 21 and dealer_hand_value < 21:
                    return -1, game_actions
                elif dealer_hand_value >= 21 and player_hand_value < 21:
                    return 1, game_actions
                else:
                    return 0, game_actions
            
                game_ongoing = False 
        
        first_state = False


    def blackjack_check(self):
        player = False 
        dealer = False
        
        if self.player_hand.calculate_card_values() == 21: 
            player = True
        if self.dealer_hand.calculate_card_values() == 21:
            dealer = True 

        return player, dealer 
    
    def blackjack_display_results(self , player_blackjack , dealer_blackjack):
        if player_blackjack and dealer_blackjack:
            print("Both players have blackjack! Draw!")
            return 0

        elif player_blackjack: 
            print("Player has blackjack! Player wins!")
            return 1
        
        elif dealer_blackjack:
            print("Dealer has blackjack! Dealer wins!")
            return -1

def reinforment_learning_algorithm(agent, player_cards, player_sum, dealer_sum, player_has_ace, deck_cards):
    has_ace = player_has_ace
    agent.check(player_sum, dealer_sum, player_has_ace)
    
    hit_num = 0
    stand_num = 0
    hit_value = 0
    stand_value = 0

    if has_ace == player_has_ace:
        converted_ace = False
    else:
        converted_ace = True

    possible_outcomes = []

    if player_sum < 12:
        #Hit when sum is less than 12 
        hit_num = hit_num + 1
        choice = 1
    elif (player_sum == 21):
        stand_num = stand_num + 1
        # Stand if sum is equal to 21
        choice = 2
    elif(player_sum >= 12 and player_sum < 21):
        for possible_card in deck_cards:
            possible_cards = possible_card.value
            
            if '2' in possible_cards:
                possible_value = 2
            elif '3' in possible_cards:
                possible_value = 3
            elif '4' in possible_cards:
                possible_value = 4
            elif '5' in possible_cards:
                possible_value = 5
            elif '6' in possible_cards:
                possible_value = 6
            elif '7' in possible_cards:
                possible_value = 7
            elif '8' in possible_cards:
                possible_value = 8
            elif '9' in possible_cards:
                possible_value = 9
            elif '10' in possible_cards:
                possible_value = 10
            elif 'J' in possible_cards:
                possible_value = 10
            elif 'Q' in possible_cards:
                possible_value = 10
            elif 'K' in possible_cards:
                possible_value = 10
            elif 'A' in possible_cards:
                possible_value = 11

            player_total = player_sum + possible_value

            if player_total < 21:
                choice = 1 #to hit
                hit_num = hit_num + 1
                if hit_num == 1:
                    hit_value = player_total
                else:
                    hit_value = (hit_value + player_total)/2
            elif player_total >= 21 and possible_value == 11 and converted_ace == False and player_total-10 < 21:
                player_total = player_total - 10
                choice = 1 #to hit
                hit_num = hit_num + 1
                if hit_num == 1:
                    hit_value = player_total
                else:
                    hit_value = (hit_value + player_total)/2
            else:
                player_total = player_sum
                choice = 2 #to stand
                stand_num = stand_num + 1
                if stand_num == 1:
                    stand_value = player_total
                else:
                    stand_value = round((stand_value + player_total)/2)

            possible_outcomes.append([player_sum, possible_card, player_total, choice])

    print("Number of hits on action: ", hit_num)
    print("Number of stands on action: ", stand_num,"\n")
    print("Estimated hit value: ", round(hit_value),"\n")
    print("Estimated stand value: ", stand_value,"\n")
    #print(possible_outcomes, "\n")   

    if hit_num > stand_num:
        print("Hit")
        return 1 #to hit
    elif hit_num < stand_num:
        print("Stand")
        return 2 #to stand
    else:
        if hit_value > stand_value:
            print("Hit")
            return 1 # to hit
        else:
            print("Stand")
            return 2 #if equal then best option is to stand
    
    
if __name__ == "__main__":
    g = Game()
    num_episodes = 1
#     R_est = []
#     N_est = []
#     Q_est = []
    
    # q(s,a)
    N = []
    Q = []
    found = False
    
    for n in range(num_episodes):
        outcome, game_actions = g.start_game()
        #print("Outcome: ",outcome) 
        
        print(len(game_actions))
        #print(game_actions)
        
#         for i in range(len(game_actions)):
#             #print(game_actions[i])
#             if len(N_est) == 0:
#                 N_est.append([game_actions[i], 1])
#             if len(R_est) == 0:
#                 R_est.append([game_actions[i], outcome])                
#             if len(Q_est) == 0:
#                 Q_est.append([game_actions[i], (outcome/1)])
#             else:
#                 for j in range(len(N_est)):
#                     for k in range(len(Q_est)):
#                         for r in range(len(R_est)):
#                             if game_actions[i] == N_est[j][0] and game_actions[i] == Q_est[k][0] and game_actions[i] == R_est[r][0]:  
#                                 found = True
#                                 N_est[j][1] = N_est[j][1] + 1
#                                 R_est[r][1] = R_est[r][1] + outcome
#                                 Q_est[k][1] = R_est[r][1]/N_est[j][1]
#                             if found == False:
#                                 N_est.append([game_actions[i], 1])
#                                 R_est.append([game_actions[i], outcome]) 
#                                 Q_est.append([game_actions[i], (outcome/1)])
#                 found = False
        
        # q(s,a)
        for i in range(len(game_actions)):
            #print(game_actions[i])
            if len(N) == 0:
                N.append([game_actions[i][0], game_actions[i][1], 1])            
            if len(Q) == 0:
                Q.append([game_actions[i][0], game_actions[i][1], 0 + ((1/1)*(outcome - 0))])
            else:
                for j in range(len(N)):
                    for k in range(len(Q)):
                        if game_actions[i][0] == N[j][0] and game_actions[i][1] == N[j][1] and game_actions[i][0] == Q[k][0] and game_actions[i][1] == Q[k][1]:  
                            found = True
                            N[j][2] = N[j][2] + 1
                            Q[k][2] = Q[k][2] + ((1/N[j][2])*(outcome - Q[k][2]))
                        if found == False:
                            N.append([game_actions[i][0], game_actions[i][1], 1])
                            Q.append([game_actions[i][0], game_actions[i][1], 0 + ((1/1)*(outcome - 0))])
                found = False
                
#         if len(Q) > 0:
#             print("A")
#             print(Q[0][0])
#             print(Q[0][1])
    
    #[(player_hand_value, self.player_hand.has_ace(), dealer_hand_value), choice]
#     print("N_est: ",N_est)
#     print("Q_est: ",Q_est)
#     print("R_est: ",R_est)

                
    print("N: ",N)
    print("Q: ",Q) 

        

        #g.num_episodes = g.num_episodes + 1
        
    # Q Learning training
    b = BlackJack_QLearning()
    b.play(10000)
    print("Done training")

    # save policy
    b.savePolicy()

    # play
    result = b.playWithDealer(rounds=1000)
    print(result)
    
    # printing graphs
    plot_oneline()

Game Starting 

[A of Spades, 2 of Spades, 3 of Spades, 4 of Spades, 5 of Spades, 6 of Spades, 7 of Spades, 8 of Spades, 9 of Spades, 10 of Spades, J of Spades, Q of Spades, K of Spades, A of Clubs, 2 of Clubs, 3 of Clubs, 4 of Clubs, 5 of Clubs, 6 of Clubs, 7 of Clubs, 8 of Clubs, 9 of Clubs, 10 of Clubs, J of Clubs, Q of Clubs, K of Clubs, A of Hearts , 2 of Hearts , 3 of Hearts , 4 of Hearts , 5 of Hearts , 6 of Hearts , 7 of Hearts , 8 of Hearts , 9 of Hearts , 10 of Hearts , J of Hearts , Q of Hearts , K of Hearts , A of Diamonds, 2 of Diamonds, 3 of Diamonds, 4 of Diamonds, 5 of Diamonds, 6 of Diamonds, 7 of Diamonds, 8 of Diamonds, 9 of Diamonds, 10 of Diamonds, J of Diamonds, Q of Diamonds, K of Diamonds]

Your hand is:
7 of Spades
7 of Diamonds
Value: 14

Dealer's hand is: 
Hidden
9 of Spades
Number of hits on action:  23
Number of stands on action:  25 

Estimated hit value:  18 

Estimated stand value:  14 

Stand

Final Results:
Player's hand: 14
Dealer's hand: 20

Dealer W

TypeError: giveCard() takes 0 positional arguments but 1 was given

In [39]:
a = ["8 of spades"]
b = ['8 of Hearts']

if '8' in a:
    print("t")
else:
    print("f")

f


In [11]:
Cards = ['A of Spades', '2 of Spades', '3 of Spades', '4 of Spades' ]

card = 'A'
for c in Cards:
    if '2' in c:
        print(c)



2 of Spades


In [3]:
cards = ['A of Spades', '2 of Spades', '3 of Spades', '4 of Spades', '5 of Spades', '6 of Spades', '7 of Spades',
'8 of Spades', '9 of Spades', '10 of Spades', 'J of Spades', 'Q of Spades', 'K of Spades', 'A of Clubs', 
'2 of Clubs', '3 of Clubs', '4 of Clubs', '5 of Clubs', '6 of Clubs', '7 of Clubs', '8 of Clubs', '9 of Clubs', 
'10 of Clubs', 'J of Clubs', 'Q of Clubs', 'K of Clubs', 'A of Hearts' , '2 of Hearts' , '3 of Hearts' , 
'4 of Hearts' , '5 of Hearts' , '6 of Hearts' , '7 of Hearts' , '8 of Hearts' , '9 of Hearts' , '10 of Hearts' , 
'J of Hearts' , 'Q of Hearts' , 'K of Hearts' , 'A of Diamonds', '2 of Diamonds', '3 of Diamonds', '4 of Diamonds', 
'5 of Diamonds', '6 of Diamonds', '7 of Diamonds', '8 of Diamonds', '9 of Diamonds', '10 of Diamonds', 'J of Diamonds', 
'Q of Diamonds', 'K of Diamonds']

for i in cards:
    print(i)

A of Spades
2 of Spades
3 of Spades
4 of Spades
5 of Spades
6 of Spades
7 of Spades
8 of Spades
9 of Spades
10 of Spades
J of Spades
Q of Spades
K of Spades
A of Clubs
2 of Clubs
3 of Clubs
4 of Clubs
5 of Clubs
6 of Clubs
7 of Clubs
8 of Clubs
9 of Clubs
10 of Clubs
J of Clubs
Q of Clubs
K of Clubs
A of Hearts
2 of Hearts
3 of Hearts
4 of Hearts
5 of Hearts
6 of Hearts
7 of Hearts
8 of Hearts
9 of Hearts
10 of Hearts
J of Hearts
Q of Hearts
K of Hearts
A of Diamonds
2 of Diamonds
3 of Diamonds
4 of Diamonds
5 of Diamonds
6 of Diamonds
7 of Diamonds
8 of Diamonds
9 of Diamonds
10 of Diamonds
J of Diamonds
Q of Diamonds
K of Diamonds


In [1]:
for i in range(1):
    print(i)

0


In [6]:
round(2.3)

2

In [None]:
def monte_carlo(num_episodes):
        
        
        
        
    def update_states(self, visited):
        reward = self.env.get_reward()
        changed = set()
        for state, action in visited:
            if (state, action) not in changed:
                changed.add((state, action))
                average, count = self.q[(state, action)]
                self.q[(state, action)] = (
                    (average * count + reward) / (count + 1),
                    count + 1
                )

                if self.q[(state, 0)][0] > self.q[(state, 1)][0]:
                    self.pi[state] = 0
                else:
                    self.pi[state] = 1

In [None]:
# Monte Carlo Sample with On-Policy
def monte_carlo_on_policy(num_episodes):
    states_usable_ace = np.zeros((10, 10))
    # initialze counts to 1 to avoid 0 being divided
    states_usable_ace_count = np.ones((10, 10))
    states_no_usable_ace = np.zeros((10, 10))
    # initialze counts to 1 to avoid 0 being divided
    states_no_usable_ace_count = np.ones((10, 10))
    for i in tqdm(range(0, episodes)):
        _, reward, player_trajectory = play(target_policy_player)
        for (usable_ace, player_sum, dealer_card), _ in player_trajectory:
            player_sum -= 12
            dealer_card -= 1
            if usable_ace:
                states_usable_ace_count[player_sum, dealer_card] += 1
                states_usable_ace[player_sum, dealer_card] += reward
            else:
                states_no_usable_ace_count[player_sum, dealer_card] += 1
                states_no_usable_ace[player_sum, dealer_card] += reward
    return states_usable_ace / states_usable_ace_count, states_no_usable_ace / states_no_usable_ace_count

# Monte Carlo with Exploring Starts
def monte_carlo_es(episodes):
    # (playerSum, dealerCard, usableAce, action)
    state_action_values = np.zeros((10, 10, 2, 2))
    # initialze counts to 1 to avoid division by 0
    state_action_pair_count = np.ones((10, 10, 2, 2))

    # behavior policy is greedy
    def behavior_policy(usable_ace, player_sum, dealer_card):
        usable_ace = int(usable_ace)
        player_sum -= 12
        dealer_card -= 1
        # get argmax of the average returns(s, a)
        values_ = state_action_values[player_sum, dealer_card, usable_ace, :] / \
                  state_action_pair_count[player_sum, dealer_card, usable_ace, :]
        return np.random.choice([action_ for action_, value_ in enumerate(values_) if value_ == np.max(values_)])

    # play for several episodes
    for episode in tqdm(range(episodes)):
        # for each episode, use a randomly initialized state and action
        initial_state = [bool(np.random.choice([0, 1])),
                       np.random.choice(range(12, 22)),
                       np.random.choice(range(1, 11))]
        initial_action = np.random.choice(ACTIONS)
        current_policy = behavior_policy if episode else target_policy_player
        _, reward, trajectory = play(current_policy, initial_state, initial_action)
        first_visit_check = set()
        for (usable_ace, player_sum, dealer_card), action in trajectory:
            usable_ace = int(usable_ace)
            player_sum -= 12
            dealer_card -= 1
            state_action = (usable_ace, player_sum, dealer_card, action)
            if state_action in first_visit_check:
                continue
            first_visit_check.add(state_action)
            # update values of state-action pairs
            state_action_values[player_sum, dealer_card, usable_ace, action] += reward
            state_action_pair_count[player_sum, dealer_card, usable_ace, action] += 1

    return state_action_values / state_action_pair_count

In [7]:
import random
print(random.random())

TypeError: 'module' object is not callable

In [22]:
# Q-learning

class BlackJack_QLearning: 
    
    def __init__(self, lr=0.1, exp_rate=0.3):
            self.player_Q_Values = {}

            for i in range(12,22):
                for j in range(1,11):
                    for k in [True, False]:
                        self.player_Q_Values[(i,j,k)] = {}
                        for a in [1,0]:
                            if(i==21) and (a==0):
                                self.player_Q_Values[(i,j,k)][a] = 1
                            else:
                                self.player_Q_Values[(i,j,k)][a] = 0

            self.state_action = []
            self.state = (0, 0, False) 
            self.actions = [1, 0] #HIT, STAND
            self.final = False;
            self.lr = lr
            self.exp_rate = exp_rate

    def giveCard():
        c_list = list(range(1,11)) + [10, 10, 10]
        return np.random.choice(c_list)

    def dealerPolicy(self, value, usable_ace, is_end):
            if value > 21:
                if usable_ace:
                    value -= 10
                    usable_ace = False
                else:
                    return value, usable_ace, True

            if value >= 17:
                return value, usable_ace, True
            else:
                card = self.giveCard()
                if card == 1:
                    if value <= 10:
                        return value+11, True, False
                    return value+1, usable_ace, False
                else:
                    return value+card, usable_ace, False     

    def chooseAction(self):
            value = self.state[0]
            if value <= 11:
                return 1

            if np.random.uniform(0, 1) <= self.exp_rate:
                action = np.random.choice(self.actions)
            else:
                # greedy action
                v = -999
                action = 0
                for a in self.player_Q_Values[self.state]:
                    if self.player_Q_Values[self.state][a] > v:
                        action = a
                        v = self.player_Q_Values[self.state][a]
            return action    

    def playerNextState(self, action):
            value = self.state[0]
            show_card = self.state[1]
            usable_ace = self.state[2]

            if action:
                card = self.giveCard()
                if card == 1:
                    if value <= 10:
                        value += 11
                        usable_ace = True
                    else:
                        value += 1
                else:
                    value += card
            else:
                self.end = True
                return (value, show_card, usable_ace)

            if current_value > 21:
                if usable_ace:
                    value -= 10
                    usable_ace = False
                else:
                    self.end = True
                    return (value, show_card, usable_ace)

            return (value, show_card, usable_ace)    

    def winner(self, player_value, dealer_value):
            winner = 0
            if player_value > 21:
                if dealer_value > 21:
                    winner = 0
                else:
                    winner = -1
            else:
                if dealer_value > 21:
                    winner = 1
                else:
                    if player_value < dealer_value:
                        winner = -1
                    elif player_value > dealer_value:
                        winner = 1
                    else:
                        winner = 0
            return winner

    def _giveCredit(self, player_hand_value, dealer_hand_value):
            reward = self.winner(player_hand_value, dealer_hand_value)
            for s in reversed(self.player_state_action):
                state, action = s[0], s[1]
                reward = self.player_Q_Values[state][action] + self.lr*(reward - self.player_Q_Values[state][action])
                self.player_Q_Values[state][action] = round(reward, 3)    

    def reset(self):
        self.player_state_action = []
        self.state = (0, 0, False)  
        self.end = False            

    def deal_2Cards(self, show=False):
            value, usable_ace = 0, False
            cards = [self.giveCard(), self.giveCard()]
            if 1 in cards:
                value = sum(cards) + 10
                usable_ace = True
            else:
                value = sum(cards)
                usable_ace = False

            if show:
                return value, usable_ace, cards[0]
            else:
                return value, usable_ace
            
    def play(self, rounds=1000):
            for i in range(rounds):
                if i % 1000 == 0:
                    print("round", i)

                dealer_hand_value, d_usable_ace, show_card = self.deal_2Cards(show=True)
                player_hand_value, p_usable_ace = self.deal_2Cards(show=False)

                self.state = (player_hand_value, show_card, p_usable_ace)
                print("init", self.state)

                if player_hand_value == 21 or dealer_hand_value == 21:
                    next
                else:
                    while True:
                        action = self.chooseAction()  
                        if self.state[0] >= 12:
                            state_action_pair = [self.state, action]
                            self.player_state_action.append(state_action_pair)
                        self.state = self.playerNextState(action) #updating next state
                        if self.end:
                            break

                    is_end = False
                    while not is_end:
                        dealer_hand_value, d_usable_ace, is_end = self.dealerPolicy(dealer_value, d_usable_ace, is_end)

                    # judge winner, give reward and update Q value
                    player_hand_value = self.state[0]
                    print("player value {} | dealer value {}".format(player_hand_value, dealer_hand_value))
                    self._giveCredit(player_hand_value, dealer_hand_value)

                self.reset()   

    def savePolicy(self, file="policy"):
            fw = open(file, 'wb')
            pickle.dump(self.player_Q_Values, fw)
            fw.close()

    def loadPolicy(self, file="policy"):
        fr = open(file, 'rb')
        self.player_Q_Values = pickle.load(fr)
        fr.close()   

    # trained robot playing against dealer
    def playWithDealer(self, rounds=1000):
        self.reset()
        self.loadPolicy()
        self.exp_rate = 0

        result = np.zeros(3)  # player [win, draw, lose]
        for _ in range(rounds):
            dealer_hand_value, d_usable_ace, show_card = self.deal_2Cards(show=True)
            player_hand_value, p_usable_ace = self.deal_2Cards(show=False)

            self.state = (player_hand_value, show_card, p_usable_ace)

            if player_hand_value == 21 or dealer_hand_value == 21:
                if player_hand_value == dealer_hand_value:
                    result[1] += 1
                elif player_hand_value > dealer_hand_value:
                    result[0] += 1
                else:
                    result[2] += 1
            else:
                # player
                while True:
                    action = self.chooseAction()
                    # update next state
                    self.state = self.playerNextState(action)
                    if self.end:
                        break

                # dealer
                is_end = False
                while not is_end:
                    dealer_hand_value, d_usable_ace, is_end = self.dealerPolicy(dealer_hand_value, d_usable_ace, is_end)

                # judge
                player_hand_value = self.state[0]
                # print("player value {} | dealer value {}".format(player_value, dealer_value))
                w = self.winner(player_hand_value, dealer_hand_value)
                if w == 1:
                    result[0] += 1
                elif w == 0:
                    result[1] += 1
                else:
                    result[2] += 1
            self.reset()
        return                 
    
    def plot_oneline():
        episodes = 1000
        
        x = np.linspace(result[0], result[1], result[2])
        y = np.exp(episodes)
        
        fig, ax = plt.subplots()
        ax.plot(x, y)
        ax.hlines(y=0.2, xmin=4, xmax=20, linewidth=2, color='r')
        
        plt.show()