In [100]:
import random 
import math
#from collection import defaultdict

class Deck:
    def __init__(self):
        self.cards = [Card(s, v) for s in ["Spades" , "Clubs" , "Hearts ", 
        "Diamonds"] for v in ["A" ,"2" ,"3" ,"4" ,"5" ,"6" 
        ,"7" ,"8" ,"9" ,"10" , "J" , "Q" , "K"] ]
        
    def shuffle(self):
        if len(self.cards) > 1:
            random.shuffle(self.cards)
    
    def deal(self):
        if len(self.cards) > 1:
            return self.cards.pop(0)
    

class Card: 
    def __init__(self, suit , value): 
        self.suit = suit 
        self.value = value 
    
    def __repr__(self):
        return " of " .join((self.value , self.suit))
    
class Hand:
    def __init__(self , dealer = False):
        self.dealer = dealer 
        self.cards = []
        self.value = 0 
        self.has_ace = False

    def new_card(self, card):
        self.cards.append(card)

    def calculate_card_values(self):
        self.value = 0 
        self.has_ace = False
        for card in self.cards:
            if card.value.isnumeric():
                self.value += int(card.value)
            else:
                if card.value == "A":
                    self.has_ace = True
                    self.value += 11
                    if self.value > 21:
                        self.value -= 10 
                        self.has_ace = False
                else:
                    self.value += 10
        return self.value

    def show_cards(self, first_state):
        if self.dealer and first_state: 
            print("Hidden")
            print(self.cards[1])
        else:
            for card in self.cards: 
                print(card)
            print("Value:" , self.calculate_card_values())
    
    def return_dealer_card(self):
        card_val = self.cards[1].value
        if card_val.isnumeric():
            return card_val
        else:
            if card_val == "A":
                return 11
            else:
                return 10
            
class Agent:
    def __init__(self):
        pass
    
    def check(self, player_sum, dealer_card, player_has_ace):
        self.player_sum = player_sum
        self.dealer_card = dealer_card
        self.player_has_ace = player_has_ace
        
        if self.player_sum > 21 and self.player_has_ace == True:
            self.player_sum = player_sum - 10
            self.player_has_ace = False
        
        return self.player_sum, self.player_has_ace
        
        
class Game: 
    
    def __init__(self):
        self.reward = 0
        #self.num_episodes = 0
        #creating agent for each enivornment
        self.agent = Agent() 
    
    def start_game(self, num_of_episodes, Q, exploring_starts, e_config = None):
        
        agent = self.agent
        
        game_ongoing = True
        first_state = True

        print("Game Starting \n")
        
        self.deck = Deck()
        self.deck.shuffle()
        
        self.player_hand = Hand()
        self.dealer_hand = Hand(dealer=True)
        
        game_actions = []

        for i in range(2):
            card = self.deck.deal() 
            self.player_hand.new_card(card)
            card = self.deck.deal()
            self.dealer_hand.new_card(card)

        while game_ongoing == True: 
            
            print("\nYour hand is:")
            self.player_hand.show_cards(first_state)
            print()
            print("Dealer's hand is: ")
            self.dealer_hand.show_cards(first_state)
            
            player_blackjack, dealer_blackjack = self.blackjack_check()

            if player_blackjack or dealer_blackjack:
                game_ongoing = False 
                outcome = self.blackjack_display_results(player_blackjack , dealer_blackjack)
                return outcome, game_actions
            
            
            player_hand_value = self.player_hand.calculate_card_values()
            dealer_hand_value = self.dealer_hand.calculate_card_values()
            
            
            #dealer card is only hidden before player plays first round
            if first_state == True:
                dealer_sum = self.dealer_hand.return_dealer_card()
            else:
                dealer_sum = dealer_hand_value
            
           
            player_hand_value, player_has_ace = agent.check(player_hand_value, dealer_sum, self.player_hand.has_ace)
            
            if player_hand_value < 12:
                choice = 1
            elif player_hand_value > 20:
                choice = 2
            else:
                #monte carlo
                if exploring_starts == True and first_state == True:
                    #random action (explore)
                    choice = random.randint(1,2) #either choose hit or stand

                else:
                    #action is selected randomly with probability 𝜖 and  greedily with probability 1 − 𝜖
                    
                    if exploring_starts == False and e_config == 2:
                        #for not exploring starts configuration 2 (epsilon = 𝑒^(−𝑘/1000) )
                        epsilon = pow(math.e, ((0-num_of_episodes)/1000))
                    elif exploring_starts == False and e_config == 3:
                        #for not exploring starts configuration 2 (epsilon = 𝑒^(−𝑘/10000) )
                        epsilon = pow(math.e, ((0-num_of_episodes)/10000))
                    else:
                        #for exploring starts or not exploring starts configuration 1 (epsilon = 1/k)
                        epsilon = 1/num_of_episodes
                    
                    random_num = random.random() #generating random number between 0 and 1
                    
                    #if random number < epsilon, random action will be chosen
                    if random_num < epsilon:
                        #random action (explore)
                        choice = random.randint(1,2) #either choose hit or stand
                    else:
                        #greedy
                        for k, v in Q.items():
                            if k == (player_hand_value, player_has_ace, dealer_hand_value):
                                #choosing the greatest q value
                                if v[1] > v[2]:
                                    #hit has greater q value
                                    choice = 1
                                elif v[1] < v[2]:
                                    #stand has greater q value
                                    choice = 2
                                else:
                                    #if both actions have equal value then choose random action
                                    choice = random.randint(1,2)
                
                #choice = reinforment_learning_algorithm(agent, self.player_hand, player_hand_value, self.dealer_hand.return_dealer_card(), self.player_hand.has_ace, self.deck.cards)
           
                #game actions only stored in Q table if between 12 and 20 
                game_actions.append([(player_hand_value, player_has_ace, dealer_hand_value), choice])
            
            if (choice == 1):
                print("\nPlayer Hit")
                #player chose to hit
                self.player_hand.new_card(self.deck.deal())
                self.player_hand.show_cards(first_state)
                player_hand_value = self.player_hand.calculate_card_values()
                if (player_hand_value > 21):
                    print("\nPlayer has lost !")
                    game_ongoing = False
                    return -1, game_actions

            elif (choice == 2):
                #player chose to stand
                print("\nPlayer Stand")
                
                first_state = False
                
                # Dealer Policy
                if dealer_hand_value < 17:
                    print("\nDealer Hit")
                    self.dealer_hand.new_card(self.deck.deal())
                    self.dealer_hand.show_cards(first_state)
                    dealer_hand_value = self.dealer_hand.calculate_card_values()
                    if (dealer_hand_value > 21):
                        print("\nDealer has lost !")
                        game_ongoing = False
                        return 1, game_actions

                print("\nFinal Results:")
                print("Player's hand:" , player_hand_value)
                print("Dealer's hand:" , dealer_hand_value)
                
                if player_hand_value < 21 and dealer_hand_value < 21:
                    if (player_hand_value > dealer_hand_value):
                        print("\nPlayer Wins!")
                        return 1, game_actions
                    elif (player_hand_value == dealer_hand_value):
                        print("\nTie!")
                        return 0, game_actions
                    else: 
                        print("\nDealer Wins!")
                        return -1, game_actions
                elif player_hand_value >= 21 and dealer_hand_value < 21:
                    print("\nDealer Wins!")
                    return -1, game_actions
                elif dealer_hand_value >= 21 and player_hand_value < 21:
                    print("\nPlayer Wins!")
                    return 1, game_actions
                else:
                    print("\nTie!")
                    return 0, game_actions
            
                game_ongoing = False 
        
        first_state = False


    def blackjack_check(self):
        player = False 
        dealer = False
        
        if self.player_hand.calculate_card_values() == 21: 
            player = True
        if self.dealer_hand.calculate_card_values() == 21:
            dealer = True 

        return player, dealer 
    
    def blackjack_display_results(self , player_blackjack , dealer_blackjack):
        if player_blackjack and dealer_blackjack:
            print("Both players have blackjack! Draw!")
            return 0

        elif player_blackjack: 
            print("Player has blackjack! Player wins!")
            return 1
        
        elif dealer_blackjack:
            print("Dealer has blackjack! Dealer wins!")
            return -1

def reinforment_learning_algorithm(agent, player_cards, player_sum, dealer_sum, player_has_ace, deck_cards):
    has_ace = player_has_ace
    agent.check(player_sum, dealer_sum, player_has_ace)
    
    hit_num = 0
    stand_num = 0
    hit_value = 0
    stand_value = 0

    if has_ace == player_has_ace:
        converted_ace = False
    else:
        converted_ace = True

    possible_outcomes = []

    if player_sum < 12:
        #Hit when sum is less than 12 
        hit_num = hit_num + 1
        choice = 1
    elif (player_sum == 21):
        stand_num = stand_num + 1
        # Stand if sum is equal to 21
        choice = 2
    elif(player_sum >= 12 and player_sum < 21):
        for possible_card in deck_cards:
            possible_cards = possible_card.value
            
            if '2' in possible_cards:
                possible_value = 2
            elif '3' in possible_cards:
                possible_value = 3
            elif '4' in possible_cards:
                possible_value = 4
            elif '5' in possible_cards:
                possible_value = 5
            elif '6' in possible_cards:
                possible_value = 6
            elif '7' in possible_cards:
                possible_value = 7
            elif '8' in possible_cards:
                possible_value = 8
            elif '9' in possible_cards:
                possible_value = 9
            elif '10' in possible_cards:
                possible_value = 10
            elif 'J' in possible_cards:
                possible_value = 10
            elif 'Q' in possible_cards:
                possible_value = 10
            elif 'K' in possible_cards:
                possible_value = 10
            elif 'A' in possible_cards:
                possible_value = 11

            player_total = player_sum + possible_value

            if player_total < 21:
                choice = 1 #to hit
                hit_num = hit_num + 1
                if hit_num == 1:
                    hit_value = player_total
                else:
                    hit_value = (hit_value + player_total)/2
            elif player_total >= 21 and possible_value == 11 and converted_ace == False and player_total-10 < 21:
                player_total = player_total - 10
                choice = 1 #to hit
                hit_num = hit_num + 1
                if hit_num == 1:
                    hit_value = player_total
                else:
                    hit_value = (hit_value + player_total)/2
            else:
                player_total = player_sum
                choice = 2 #to stand
                stand_num = stand_num + 1
                if stand_num == 1:
                    stand_value = player_total
                else:
                    stand_value = round((stand_value + player_total)/2)

            possible_outcomes.append([player_sum, possible_card, player_total, choice])

    print("Number of hits on action: ", hit_num)
    print("Number of stands on action: ", stand_num,"\n")
    print("Estimated hit value: ", round(hit_value),"\n")
    print("Estimated stand value: ", stand_value,"\n")
    #print(possible_outcomes, "\n")   

    if hit_num > stand_num:
        print("Hit")
        return 1 #to hit
    elif hit_num < stand_num:
        print("Stand")
        return 2 #to stand
    else:
        if hit_value > stand_value:
            print("Hit")
            return 1 # to hit
        else:
            print("Stand")
            return 2 #if equal then best option is to stand
    
    
if __name__ == "__main__":
    g = Game()
    num_episodes = 10
    
    win_count = 0
    loss_count = 0
    draw_count = 0
    win = []
    loss = []
    draw = []
    
    # q(s,a)    
    Q = {}
    N = {}
    
    found = False
    
    for i in range(12,21):
        for j in [True, False]:
            for k in range(1,22):
                Q[(i,j,k)] = {}
                N[(i,j,k)] = {}
                for a in [1,2]:
                    Q[(i,j,k)][a] = 0
                    N[(i,j,k)][a] = 0
    
    for n in range(num_episodes):
        outcome, game_actions = g.start_game((n+1), Q, exploring_starts = True, e_config = None)
        #outcome, game_actions = g.start_game((n+1), Q, exploring_starts = False, e_config = 1)
        #outcome, game_actions = g.start_game((n+1), Q, exploring_starts = False, e_config = 2)
        #outcome, game_actions = g.start_game((n+1), Q, exploring_starts = False, e_config = 3)
        
        print("Exited game")
        
        if outcome == 1:
            win_count = win_count+1
        elif outcome == 0:
            draw_count = draw_count+1
        elif outcome == -1:
            loss_count = loss_count+1
        
        for i in range(len(game_actions)):
            for k_q, v_q in Q.items():
                for k_n, v_n in N.items():
                    if k_q == game_actions[i][0] and k_n == game_actions[i][0]:
                        v_n[game_actions[i][1]] = v_n[game_actions[i][1]] + 1
                        v_q[game_actions[i][1]] = (v_q[game_actions[i][1]] + ((1/v_n[game_actions[i][1]]) * (outcome - v_q[game_actions[i][1]])))
        
        
        if ((num_episodes % 1000) == 0):
            win.append(win_count)
            loss.append(loss_count)
            draw.append(draw_count)
            
            win_count = 0
            loss_count = 0
            draw_count = 0
    print("Finished")
    print("N: ",N)
    print("Q: ",Q) 


        

        #g.num_episodes = g.num_episodes + 1

Game Starting 


Your hand is:
9 of Clubs
5 of Diamonds
Value: 14

Dealer's hand is: 
Hidden
Q of Hearts 

Player Hit
9 of Clubs
5 of Diamonds
7 of Hearts 
Value: 21

Your hand is:
9 of Clubs
5 of Diamonds
7 of Hearts 
Value: 21

Dealer's hand is: 
Hidden
Q of Hearts 
Player has blackjack! Player wins!
Exited game
Game Starting 


Your hand is:
5 of Diamonds
8 of Diamonds
Value: 13

Dealer's hand is: 
Hidden
9 of Spades

Player Stand

Dealer Hit
Hidden
9 of Spades

Dealer has lost !
Exited game
Game Starting 


Your hand is:
2 of Clubs
9 of Spades
Value: 11

Dealer's hand is: 
Hidden
3 of Hearts 

Player Hit
2 of Clubs
9 of Spades
6 of Spades
Value: 17

Your hand is:
2 of Clubs
9 of Spades
6 of Spades
Value: 17

Dealer's hand is: 
Hidden
3 of Hearts 

Player Stand

Dealer Hit
Hidden
3 of Hearts 

Final Results:
Player's hand: 17
Dealer's hand: 11

Player Wins!
Exited game
Game Starting 


Your hand is:
K of Diamonds
10 of Spades
Value: 20

Dealer's hand is: 
Hidden
Q of Hearts 
Dealer 