# Mastering the Card Game of Jaipur through Zero-knowledge Self-Play Reinforcement Learning and Action Masks

# Jaipur Board Game:

In [None]:
import random

import functools

import gymnasium
import numpy as np
from gymnasium import spaces

import pettingzoo
from pettingzoo import AECEnv
from pettingzoo.utils import agent_selector, wrappers

class Player():
    def __init__(self, p_id):
        self.id = p_id
        #self.soe = 0
        #self.reset()

    def reset(self):
        self.cards = {"Diamond": 0, "Gold": 0, "Silver": 0, "Silk": 0, "Spice": 0, "Leather": 0, "Camel": 0}
        self.herd = 0 #herd size
        self.hand = 0 #hand size
        self.bonus = 0
        self.opp_points = 0
        self.opp_camels = 0

class Card():
    def __init__(self):
        #deck contains total of 55 cards
        #leather, spices, silk, silver, gold, diamonds, camel
        self.cards = []

        #11 camel cards
        #6 diamonds
        #6 gold
        #6 silver
        #8 silk
        #8 spice
        #10 leather

        for i in range(6):
            self.cards.append("Diamond")
            self.cards.append("Gold")
            self.cards.append("Silver")

        for i in range(8):
            self.cards.append("Silk")
            self.cards.append("Spice")

        self.cards.append("Camel")
        for i in range(10):
            self.cards.append("Camel")
            self.cards.append("Leather")

        self.cards.sort()

    def shuffle(self):
        #shuffling the cards
        if len(self.cards) > 1:
            random.shuffle(self.cards)

    def deal(self):
        #taking one card from the deck of cards
        if len(self.cards) > 0:
            return self.cards.pop(0)

    def deal_camel(self):
        for i in range(len(self.cards)):
            if self.cards[i] == "Camel":
                return self.cards.pop(i)

class Token():
    def __init__(self):
        #60 tokens

        #38 goods
        #18 bonus
        #1 camel
        #3 soe

        self.leather = [4,3,2,1,1,1,1,1,1]
        self.spice = [5,3,3,2,2,1,1]
        self.silk = [5,3,3,2,2,1,1]
        self.silver = [5,5,5,5,5]
        self.gold = [6,6,5,5,5]
        self.diamond = [7,7,5,5,5]

        #18 total bonus tokens
        self.bonus_3 = [1,1,2,2,2,3,3]
        self.bonus_4 = [4,4,5,5,6,6]
        self.bonus_5 = [8,8,9,10,10]

        #bonus tokens should be shuffled
        random.shuffle(self.bonus_3)
        random.shuffle(self.bonus_4)
        random.shuffle(self.bonus_5)

        #self.soe = ["S1", "S2", "S3"] #3
        self.camel = 5
        #leather, spices, silk, silver, gold, diamonds, bonus, seals of excellence, camel

        self.empty = []


class Jaipur():
    def __init__(self):
        self.p1 = Player(0)
        self.p2 = Player(1)

        self.create_trade_dict()

        #self.round()

    def create_trade_dict(self):
        # creating trade actions dictionary of 25,456 actions

        in_hand = ["Diamond", "Gold", "Silver", "Spice", "Silk", "Leather", "Camel"]
        marketplace_cards = ["Diamond", "Gold", "Silver", "Spice", "Silk", "Leather"]

        self.Q_trade = {}
        count = 13

        #trading 2 cards
        for i in range(len(in_hand)-1, -1, -1):
            for j in range(i, -1, -1):

                for a in range(len(marketplace_cards)-1, -1, -1):
                    for b in range(a, -1, -1):

                        if marketplace_cards[a] != in_hand[i] and marketplace_cards[a] != in_hand[j] and marketplace_cards[b] != in_hand[i] and marketplace_cards[b] != in_hand[j]:
                            self.Q_trade[count] = ((in_hand[i], in_hand[j], 'na', 'na', 'na'), (marketplace_cards[a], marketplace_cards[b], 'na', 'na', 'na')) #= 0
                            count = count + 1

        #trading 3 cards
        for i in range(len(in_hand)-1, -1, -1):
            for j in range(i, -1, -1):
                for k in range(j, -1, -1):

                    for a in range(len(marketplace_cards)-1, -1, -1):
                        for b in range(a, -1, -1):
                            for c in range(b, -1, -1):

                                if in_hand[i] != marketplace_cards[a] and in_hand[j] != marketplace_cards[a] and in_hand[k] != marketplace_cards[a] and in_hand[i] != marketplace_cards[b] and in_hand[j] != marketplace_cards[b] and in_hand[k] != marketplace_cards[b] and in_hand[i] != marketplace_cards[c] and in_hand[j] != marketplace_cards[c] and in_hand[k] != marketplace_cards[c]:
                                    self.Q_trade[count] = ((in_hand[i], in_hand[j], in_hand[k], 'na', 'na'), (marketplace_cards[a], marketplace_cards[b], marketplace_cards[c], 'na', 'na')) #= 0
                                    count = count + 1

        #trading 4 cards
        for i in range(len(in_hand)-1, -1, -1):
            for j in range(i, -1, -1):
                for k in range(j, -1, -1):
                    for l in range(k, -1, -1):

                        for a in range(len(marketplace_cards)-1, -1, -1):
                            for b in range(a, -1, -1):
                                for c in range(b, -1, -1):
                                    for d in range(c, -1, -1):

                                        if in_hand[i] != marketplace_cards[a] and in_hand[j] != marketplace_cards[a] and in_hand[k] != marketplace_cards[a] and in_hand[l] != marketplace_cards[a] and in_hand[i] != marketplace_cards[b] and in_hand[j] != marketplace_cards[b] and in_hand[k] != marketplace_cards[b] and in_hand[l] != marketplace_cards[b] and in_hand[i] != marketplace_cards[c] and in_hand[j] != marketplace_cards[c] and in_hand[k] != marketplace_cards[c] and in_hand[l] != marketplace_cards[c] and in_hand[i] != marketplace_cards[d] and in_hand[j] != marketplace_cards[d] and in_hand[k] != marketplace_cards[d] and in_hand[l] != marketplace_cards[d]:
                                            self.Q_trade[count] = ((in_hand[i], in_hand[j], in_hand[k], in_hand[l], 'na'), (marketplace_cards[a], marketplace_cards[b], marketplace_cards[c], marketplace_cards[d], 'na')) #= 0
                                            count = count + 1

        #trading 5 cards
        for i in range(len(in_hand)-1, -1, -1):
            for j in range(i, -1, -1):
                for k in range(j, -1, -1):
                    for l in range(k, -1, -1):
                        for m in range(l, -1, -1):

                            for a in range(len(marketplace_cards)-1, -1, -1):
                                for b in range(a, -1, -1):
                                    for c in range(b, -1, -1):
                                        for d in range(c, -1, -1):
                                            for e in range(d, -1, -1):

                                                if in_hand[i] != marketplace_cards[a] and in_hand[j] != marketplace_cards[a] and in_hand[k] != marketplace_cards[a] and in_hand[l] != marketplace_cards[a] and in_hand[m] != marketplace_cards[a] and in_hand[i] != marketplace_cards[b] and in_hand[j] != marketplace_cards[b] and in_hand[k] != marketplace_cards[b] and in_hand[l] != marketplace_cards[b] and in_hand[m] != marketplace_cards[b] and in_hand[i] != marketplace_cards[c] and in_hand[j] != marketplace_cards[c] and in_hand[k] != marketplace_cards[c] and in_hand[l] != marketplace_cards[c] and in_hand[m] != marketplace_cards[c] and in_hand[i] != marketplace_cards[d] and in_hand[j] != marketplace_cards[d] and in_hand[k] != marketplace_cards[d] and in_hand[l] != marketplace_cards[d] and in_hand[m] != marketplace_cards[d] and in_hand[i] != marketplace_cards[e] and in_hand[j] != marketplace_cards[e] and in_hand[k] != marketplace_cards[e] and in_hand[l] != marketplace_cards[e] and in_hand[m] != marketplace_cards[e]:
                                                    self.Q_trade[count] = ((in_hand[i], in_hand[j], in_hand[k], in_hand[l], in_hand[m]), (marketplace_cards[a], marketplace_cards[b], marketplace_cards[c], marketplace_cards[d], marketplace_cards[e])) #= 0
                                                    count = count + 1


    def round(self):
        self.cards = Card()
        self.tokens = Token()

        self.p1.reset()
        self.p2.reset()

        self.marketplace = []
        self.discard = []

        #adding 3 camel cards to the marketplace
        for i in range(3):
            self.marketplace.append(self.cards.deal_camel())

        #shuffle cards
        self.cards.shuffle()

        #adding 2 cards from shuffled cards to the marketplace
        for i in range(2):
            self.marketplace.append(self.cards.deal())

        print("Marketplace:", self.marketplace)

        #giving out 5 cards to each player
        for p in self.p1, self.p2:
            for i in range(5):
                #pop card from deck
                c = self.cards.deal()

                #adding card to player's cards
                if c in p.cards:
                    #increasing the value of the card in player's cards
                    p.cards[c] = p.cards[c] + 1
                    if c != "Camel":
                        p.hand = p.hand + 1
                    else:
                        p.herd = p.herd + 1
        print("Player 1 cards: ", self.p1.cards)
        print("Player 2 cards: ", self.p2.cards)
        
        #setting opponent's herd size
        self.p1.opp_camels = self.p2.herd
        self.p2.opp_camels = self.p1.herd

    def final(self):
        player = ""

        #checking for which player had the most camels
        if self.p1.cards["Camel"] > self.p2.cards["Camel"]:
        #len(self.p1.herd) > len(self.p2.herd):
            #if p1 had more camels, p1 will take the camel token bonus
            self.p1.bonus = self.p1.bonus + self.tokens.camel
            #print("P1 has more camels")
            player = "player_1"
        elif self.p1.cards["Camel"] < self.p2.cards["Camel"]:
        #len(self.p1.herd) < len(self.p2.herd):
            #if p2 had more camels, p2 will take the camel token bonus
            self.p2.bonus = self.p2.bonus + self.tokens.camel
            #print("P2 has more camels")
            player = "player_2"
        #if both players had the same amount of camels, neither player will take camel token

        print("Player 1 has ", self.p1.bonus, " points.\nPlayer 2 has ", self.p2.bonus, " points.")
        return player


    def take_1_good(self, chosen, player):
        #checking that the player's hand is not 7 or greater
        if player.hand > 6:
            #displaying error message
            #print("ERROR! Unable to take card as hand size must never be more than 7 cards.")
            return

        #making sure that the chosen card is not a camel card
        if chosen != "Camel":
            #setting found to False
            found = False
            #looping to find chosen card to take
            for i in range(len(self.marketplace)):
                #if card is found
                if self.marketplace[i] == chosen:
                    #removing card from marketplace
                    card = self.marketplace.pop(i)
                    #adding card to player's cards
                    if card in player.cards:
                        player.cards[card] = player.cards[card] + 1
                        #increasing player hand amount
                        player.hand = player.hand + 1


                    #setting found to True
                    found = True

                    #getting the next card from the deck
                    new_card = self.cards.deal()

                    #if there was a card in the deck
                    if new_card != None:
                        #adding one card from deck to marketplace
                        self.marketplace.append(new_card)

                    #exiting out of the loop
                    break
            #displaying error message if the chosen card was not found in the marketplace
            if found == False:
                #print("\nERROR! The chosen card: ",chosen, " was not available in the marketplace.")
                return
        else:
            #print("\nERROR! The chosen card must be a goods card")
            return


    def take_goods(self, chosen, player, replacable):
        #checking that the player is replacing all cards taken
        if(len(chosen) != len(replacable)):
            #print("You must replace all the cards taken!")
            return

        #to keep track of how many camels the player is going to use to replace the cards
        camels = 0

        p_cards = player.cards.copy()

        #looping to check that all the cards are in the player's hand or herd
        for k in range(len(replacable)):
            #if the card is not found in the player's cards (hand/herd)
            if p_cards[replacable[k]] == 0:
            #if not (replacable[k] in player.hand or replacable[k] in player.herd):
                #displaying error message
                #print("ERROR! The card: ", replacable[k], " is not present in your hand/herd.")
                #exiting the function
                return
            #checking for Camel cards
            elif replacable[k] == "Camel":
                #increasing the count by 1 if a camel is found
                camels = camels + 1
            #decreasing the value of the card in temporary dictionary
            #to make sure that the player has enough cards
            p_cards[replacable[k]] = p_cards[replacable[k]] - 1


        #checking the new hand size
        if((player.hand-(len(chosen)-camels))+len(replacable)) > 7:
            #displaying error message if hand size will be greater than 7
            #print("ERROR! Unable to trade these cards as hand size must never be more than 7 cards.")
            #exiting the function
            return
        #print("Player hand: ", player.hand)

        for i in chosen:
            if i in replacable:
                #print("ERROR! Unable to trade cards of same card type.")
                return

        if "Camel" in chosen:
            #print("ERROR! The chosen cards must be goods cards not camel cards! ")
            return

        temp = self.marketplace.copy()
        found = False
        for i in chosen:
            for j in range(len(temp)-1, -1, -1):
                if i == temp[j]:
                    temp.pop(j)
                    found = True
                    break
            if found == False:
                #print("ERROR! The chosen cards were not available in the marketplace.")
                return

        player.hand = (player.hand-(len(chosen)-camels))+len(replacable)
        #print("Player hand: ", player.hand)

        #looping backwards to find the chosen cards to take
        for i in range(len(chosen)-1, -1, -1):
            #looping backwards in the marketplace to search for the chosen cards
            for j in range(len(self.marketplace)-1, -1, -1):
                #if card is found
                if self.marketplace[j] == chosen[i]:
                    #removing card from marketplace
                    card = self.marketplace.pop(j)
                    #adding card to player's hand
                    player.cards[card] = player.cards[card] + 1
                    #player.hand.append(card)

                    #removing card from chosen array
                    chosen.pop(i)

                    #adding the replacement card to the marketplace
                    self.marketplace.append(replacable[i])
                    #removing replacement card from player's cards
                    player.cards[replacable[i]] = player.cards[replacable[i]] - 1

                    #exiting out of the loop to move onto next chosen card
                    break


    def take_camels(self, player):
        #to keep track of how many camels were taken
        count = 0
        #looping backwards in the marketplace to search for camels
        for i in range(len(self.marketplace)-1, -1, -1):
            #if a camel card is found
            if self.marketplace[i] == "Camel":
                #removing camel card from marketplace
                card = self.marketplace.pop(i)
                #adding camel card to player's herd
                player.cards[card] = player.cards[card] + 1
                player.herd = player.herd + 1
                #player.herd.append(card)
                #increasing count
                count = count + 1

        #looping to add cards to marketplace according to how many camels were taken
        #after camels were taken
        for i in range(count):
            if len(self.marketplace) >= 5:
                #print("ERROR! Exceeding marketplace size.")
                return
            #getting the next card from the deck
            new_card = self.cards.deal()

            #if there was a card in the deck
            if new_card != None:
                #adding the card to the marketplace
                self.marketplace.append(new_card)

    def sell_goods(self, chosen, player):
        #checking that the player is selling at least 2 cards if of type silver, gold or diamond
        if len(chosen) < 2 and (chosen[0] == "Silver" or chosen[0] == "Gold" or chosen[0] == "Diamond"):
            #displaying error message if the player is trying to sell less than 2 cards
            #print("ERROR! Must sell at least 2 cards when selling type ", chosen[0], ".")
            #exiting function
            return

        #looping to check that all cards are of the same type
        for i in range(len(chosen)-1):
            if chosen[0] != chosen[i+1]:
                #displaying error message if a different type is found
                #print("ERROR! Cards must all be of the same type.")
                #exiting function
                return


        #checking that the player has all cards in his hand
        if player.cards[chosen[0]] < len(chosen):
            #displaying error message if player is trying to sell more cards than he owns
            #print("ERROR! You do not have ", len(chosen), " cards of type ", chosen[0], " in your hand.")
            return

        #to keep track of how many tokens were sold
        count = 0

        #looping through the chosen cards to sell
        for i in range(len(chosen)-1, -1, -1):
            #getting token amount
            amount = self.ret_token_amount(chosen[0])
            #print("token amount: ", amount)
            #if there aren't anymore tokens left
            #sell card without taking token
            if amount == 0:
                #removing card from player's hand
                self.sell_card(chosen[i], player)
                #increasing count by 1
                count = count + 1

            elif amount != 0:
                #removing card from player's hand
                self.sell_card(chosen[i],player)
                #taking a token and getting it's value
                tok = self.pop_token(chosen[0])
                #adding token value to player's bonus
                player.bonus = player.bonus + tok
                #increasing count by 1
                count = count + 1

                #checking if the token array is empty
                self.check_token(chosen[0])

        #player will know opponent's token points
        if player.id == 0:
            #if p1 sold cards
            self.p2.opp_points = player.bonus
        elif player.id == 1:
            #if p2 sold cardsret_token_amount
            self.p1.opp_points = player.bonus

        #player will receive their appropriate bonus token if they sell more than 3 cards
        bonus = self.pop_bonus_token(count)
        #adding bonus token value to player's bonus
        player.bonus = player.bonus + bonus


    def ret_token_amount(self, c_type):
        #returning the amount of tokens left of the card type
        if c_type == "Diamond":
            return len(self.tokens.diamond)
        elif c_type == "Gold":
            return len(self.tokens.gold)
        elif c_type == "Silver":
            return len(self.tokens.silver)
        elif c_type == "Silk":
            return len(self.tokens.silk)
        elif c_type == "Spice":
            return len(self.tokens.spice)
        elif c_type == "Leather":
            return len(self.tokens.leather)
        else:
            #print("ERROR! Token type not found.")
            return

    def pop_token(self, c_type):
        #returning the token value depending on the card type
        #token will be removed from the array
        if c_type == "Diamond":
            return self.tokens.diamond.pop(0)
        elif c_type == "Gold":
            return self.tokens.gold.pop(0)
        elif c_type == "Silver":
            return self.tokens.silver.pop(0)
        elif c_type == "Silk":
            return self.tokens.silk.pop(0)
        elif c_type == "Spice":
            return self.tokens.spice.pop(0)
        elif c_type == "Leather":
            return self.tokens.leather.pop(0)
        else:
            #print("ERROR! Token type not found.")
            return

    def pop_bonus_token(self, count):
        #returning the token value depending on how many cards were sold
        #token will be removed from the array
        if count == 3 and len(self.tokens.bonus_3) > 0:
            return self.tokens.bonus_3.pop(0)
        elif count == 4 and len(self.tokens.bonus_4) > 0:
            return self.tokens.bonus_4.pop(0)
        elif count >= 5 and len(self.tokens.bonus_5) > 0:
            return self.tokens.bonus_5.pop(0)
        else:
            return 0

    def sell_card(self, card, player):
        if card in player.cards:
            if player.cards[card] > 0:
                #removing the card from the players' cards
                player.cards[card] = player.cards[card] - 1
                player.hand = player.hand - 1
                #adding the card to the discard pile
                self.discard.append(card)
            else:
                print("ERROR! Card not found in player's hand.")
                return

    def check_token(self, c_type):
        #checking if there are any more tokens left of the card type
        #if an array is found to be empty, it will be appended to the empty tokens array
        if c_type == "Diamond":
            if(len(self.tokens.diamond) == 0):
                print("Finished tokens of type: ", c_type)
                self.tokens.empty.append(c_type)
        elif c_type == "Gold":
            if(len(self.tokens.gold) == 0):
                print("Finished tokens of type: ", c_type)
                self.tokens.empty.append(c_type)
        elif c_type == "Silver":
            if(len(self.tokens.silver) == 0):
                print("Finished tokens of type: ", c_type)
                self.tokens.empty.append(c_type)
        elif c_type == "Silk":
            if(len(self.tokens.silk) == 0):
                print("Finished tokens of type: ", c_type)
                self.tokens.empty.append(c_type)
        elif c_type == "Spice":
            if(len(self.tokens.spice) == 0):
                print("Finished tokens of type: ", c_type)
                self.tokens.empty.append(c_type)
        elif c_type == "Leather":
            if(len(self.tokens.leather) == 0):
                print("Finished tokens of type: ", c_type)
                self.tokens.empty.append(c_type)
        else:
            print("ERROR! Token type not found.")
            return

    def finished(self):
        #if at least three goods arrays are empty or marketplace has less than 5 cards, the round is finished
        if ((len(self.tokens.empty) >= 3) or (len(self.marketplace) < 5)):
            #print("finished",self.tokens.empty, len(self.marketplace), len(self.cards.cards))
            return True
        #otherwise the round continues
        else:
            return False

    def sell_cards(self, player, card_type):
        chosen = []
        if player.cards[card_type] > 0:
            #print("Amount: ", player.cards[card_type])
            for i in range(player.cards[card_type]):
                chosen.append(card_type)
        else:
            #print("ERROR! Must sell at least 1 card.")
            return []

        return chosen

    def sell_min2_cards(self, player, card_type):
        chosen = []
        if player.cards[card_type] > 1:
            #print("Amount: ", player.cards[card_type])
            for i in range(player.cards[card_type]):
                chosen.append(card_type)
        else:
            #print("ERROR! Must sell at least 2 cards when selling type ", card_type,".")
            return []

        return chosen

    def options(self, player, choice):
        #take diamond, take gold, take silver, take silk, take spice, take leather, take_goods, take_camels, sell_goods

        if choice == 0:
            print("Taking 1 Diamond")
#             card = self.choose_card()
#             self.take_1_good(card, player)
            self.take_1_good("Diamond", player)
        elif choice == 1:
            print("Taking 1 Gold")
            self.take_1_good("Gold", player)
        elif choice == 2:
            print("Taking 1 Silver")
            self.take_1_good("Silver", player)
        elif choice == 3:
            print("Taking 1 Silk")
            self.take_1_good("Silk", player)
        elif choice == 4:
            print("Taking 1 Spice")
            self.take_1_good("Spice", player)
        elif choice == 5:
            print("Taking 1 Leather")
            self.take_1_good("Leather", player)
        elif choice == 6:
            print("Taking All Camels")
            self.take_camels(player)
        elif choice >= 7 and choice <= 12:
            chosen = []
            if choice == 7:
                #print("Selling Diamond")
                chosen = self.sell_min2_cards(player, "Diamond")
            elif choice == 8:
                #print("Selling Gold")
                chosen = self.sell_min2_cards(player, "Gold")
            elif choice == 9:
                #print("Selling Silver")
                chosen = self.sell_min2_cards(player, "Silver")
            elif choice == 10:
                #print("Selling Silk")
                chosen = self.sell_cards(player, "Silk")
            elif choice == 11:
                #print("Selling Spice")
                chosen = self.sell_cards(player, "Spice")
            elif choice == 12:
                #print("Selling Leather")
                chosen = self.sell_cards(player, "Leather")

            if chosen != []:
                print("Selling:",chosen)
                self.sell_goods(chosen, player)

        elif choice > 12 and choice <= 25468:
            #trading goods

            replacable = []
            chosen = []
            for i in self.Q_trade[choice][0]: #replacable
                if i == "Camel":
                    replacable.append("Camel")
                elif i == "Leather":
                    replacable.append("Leather")
                elif i == "Spice":
                    replacable.append("Spice")
                elif i == "Silk":
                    replacable.append("Silk")
                elif i == "Silver":
                    replacable.append("Silver")
                elif i == "Gold":
                    replacable.append("Gold")
                elif i == "Diamond":
                    replacable.append("Diamond")

            for j in self.Q_trade[choice][1]: #chosen
                if j == "Leather":
                    chosen.append("Leather")
                elif j == "Spice":
                    chosen.append("Spice")
                elif j == "Silk":
                    chosen.append("Silk")
                elif j == "Silver":
                    chosen.append("Silver")
                elif j == "Gold":
                    chosen.append("Gold")
                elif j == "Diamond":
                    chosen.append("Diamond")

            if (len(replacable) == len(chosen)) and len(replacable) >= 2:
                print("Trading cards",chosen, " from the marketplace with ", replacable)
                self.take_goods(chosen, player, replacable)


        #updating player herd number
        player.herd = player.cards["Camel"]

        #player will know opponent's number of camels
        if player.id == 0:
            #if p1 is playing
            self.p2.opp_camels = player.herd
        elif player.id == 1:
            #if p2 is playing
            self.p1.opp_camels = player.herd


    def get_masked_options(self, agent):
        if agent == "player_1":
            player = self.p1
        elif agent == "player_2":
            player = self.p2
        else:
            print("Error! Incorrect agent entered.")
            return

        options = []
        for i in range(0, 25469):
            options.append(1) #all possible

        #take 1 goods
        #checking that player has less than 7 cards in his hand
        if player.hand <= 6:
            if "Diamond" not in self.marketplace:
                options[0] = 0

            if "Gold" not in self.marketplace:
                options[1] = 0

            if "Silver" not in self.marketplace:
                options[2] = 0

            if "Silk" not in self.marketplace:
                options[3] = 0

            if "Spice" not in self.marketplace:
                options[4] = 0

            if "Leather" not in self.marketplace:
                options[5] = 0

        else:
            #if player has 7 cards in hand, they cannot take another card
            for i in range(0,6):
                options[i] = 0


        mar_camel = 0
        #checking if there are Camel cards in the marketplace
        if "Camel" not in self.marketplace:
            options[6] = 0
        else:
          #to be used for trading cards check
            for i in self.marketplace:
                if i == "Camel":
                    mar_camel = mar_camel + 1

        #checking if the player can sell any cards
        if player.cards["Diamond"] < 2:
            options[7] = 0

        if player.cards["Gold"] < 2:
            options[8] = 0

        if player.cards["Silver"] < 2:
            options[9] = 0

        if player.cards["Silk"] < 1:
            options[10] = 0

        if player.cards["Spice"] < 1:
            options[11] = 0

        if player.cards["Leather"] < 1:
            options[12] = 0

        #if player doesn't have at least 2 cards or marketplace has 4 or more camels, all trading options are impossible
        if ((player.hand + player.herd) < 2) or mar_camel >= 4:
            #("All trading card options impossible")
            for i in range(13, 25469):
                options[i] = 0
        else:

            mar_leather = 0
            mar_spice = 0
            mar_silk = 0
            mar_silver = 0
            mar_gold = 0
            mar_diamond = 0

            for i in self.marketplace:
                if i == "Leather":
                    mar_leather = mar_leather + 1
                elif i == "Spice":
                    mar_spice = mar_spice + 1
                elif i == "Silk":
                    mar_silk = mar_silk + 1
                elif i == "Silver":
                    mar_silver = mar_silver + 1
                elif i == "Gold":
                    mar_gold = mar_gold + 1
                elif i == "Diamond":
                    mar_diamond = mar_diamond + 1

            #trading options
            for i in self.Q_trade:
                diamond = 0
                gold = 0
                silver = 0
                silk = 0
                spice = 0
                leather = 0
                camel = 0

                for j in self.Q_trade[i][0]: #in_hand options

                    if j == "Camel":
                        camel = camel + 1
                    elif j == "Leather":
                        leather = leather + 1
                    elif j == "Spice":
                        spice = spice + 1
                    elif j == "Silk":
                        silk = silk + 1
                    elif j == "Silver":
                        silver = silver + 1
                    elif j == "Gold":
                        gold = gold + 1
                    elif j == "Diamond":
                        diamond = diamond + 1

                if camel > 0 and (camel > player.cards["Camel"] or camel > (7-player.hand)):
                    options[i] = 0 #not possible

                elif leather > 0 and leather > player.cards["Leather"]:
                    options[i] = 0 #not possible

                elif spice > 0 and spice > player.cards["Spice"]:
                    options[i] = 0 #not possible

                elif silk > 0 and silk > player.cards["Silk"]:
                    options[i] = 0 #not possible

                elif silver > 0 and silver > player.cards["Silver"]:
                    options[i] = 0 #not possible

                elif gold > 0 and gold > player.cards["Gold"]:
                    options[i] = 0 #not possible

                elif diamond > 0 and diamond > player.cards["Diamond"]:
                    options[i] = 0 #not possible

                if options[i] == 1: #if option is still possible
                    diamond_mar = 0
                    gold_mar = 0
                    silver_mar = 0
                    silk_mar = 0
                    spice_mar = 0
                    leather_mar = 0

                    for k in self.Q_trade[i][1]: #looping through marketplace options
                        if k == "Leather":
                            leather_mar = leather_mar + 1
                        elif k == "Spice":
                            spice_mar = spice_mar + 1
                        elif k == "Silk":
                            silk_mar = silk_mar + 1
                        elif k == "Silver":
                            silver_mar = silver_mar + 1
                        elif k == "Gold":
                            gold_mar = gold_mar + 1
                        elif k == "Diamond":
                            diamond_mar = diamond_mar + 1

                    if leather_mar > 0 and leather_mar > mar_leather:
                        options[i] = 0 #not possible

                    elif spice_mar > 0 and spice_mar > mar_spice:
                        options[i] = 0 #not possible

                    elif silk_mar > 0 and silk_mar > mar_silk:
                        options[i] = 0 #not possible

                    elif silver_mar > 0 and silver_mar > mar_silver:
                        options[i] = 0 #not possible

                    elif gold_mar > 0 and gold_mar > mar_gold:
                        options[i] = 0 #not possible

                    elif diamond_mar > 0 and diamond_mar > mar_diamond:
                        options[i] = 0 #not possible

        masked = np.array(options, dtype = np.int8)

        return masked

j = Jaipur()

# Multi-Agent Reinforcement Learning PettingZoo Environment

In [None]:
def env(**kwargs):
    """
    The env function often wraps the environment in wrappers by default.
    You can find full documentation for these methods
    elsewhere in the developer documentation.
    """
    # internal_render_mode = render_mode if render_mode != "ansi" else "human"
    env = raw_env(**kwargs)

    # this wrapper helps error handling for discrete action spaces
    env = wrappers.AssertOutOfBoundsWrapper(env)
    #Provides a wide variety of helpful user errors

    #Strongly recommended
    env = wrappers.OrderEnforcingWrapper(env)

    return env

class raw_env(AECEnv):
    metadata = {"render_modes": ["human"], "name": "rps_v2"}

    def __init__(self, *args, **kwargs):
        super().__init__()

        """
        The init method takes in environment arguments and
         should define the following attributes:
        - possible_agents
        - action_spaces
        - observation_spaces
        These attributes should not be changed after initialization.
        """
        self.agents = ["player_" + str(r+1) for r in range(2)]
        self.possible_agents = self.agents[:]

        self._agent_selector = agent_selector(self.agents)

        action_space = [spaces.Discrete(25469) for agent in self.agents] #actions from 0 to 25468
        self.action_spaces =  dict(zip(self.agents, action_space))

        self.observation_spaces = {agent: spaces.Dict({
                'observation': spaces.Box(low=np.array([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]), high=np.array([7,7,7,7,7,7,5,5,5,5,5,5,5,11,221,11,5,5,5,7,7,9,7,6,5]), dtype=np.int16),
                'action_mask': spaces.Box(low=0, high=1, shape=(25469,), dtype=np.int8),
            }) for agent in self.agents
        }

        self.j = Jaipur()

        self.rewards = {agent: 0 for agent in self.agents}
        self.terminations = {agent: False for agent in self.agents}
        self.truncations = {agent: False for agent in self.agents}
        self.infos = {agent: {} for agent in self.agents}

        self.agent_selection = None
        self.r_agent1 = 0

        self.num_steps = 0

    #@functools.lru_cache(maxsize=None)
    def observation_space(self, agent):
        return self.observation_spaces[agent]


    #@functools.lru_cache(maxsize=None)
    def action_space(self, agent):
        return self.action_spaces[agent]

    def get_observation(self, agent):
        m_diamond = 0
        m_gold = 0
        m_silver = 0
        m_silk = 0
        m_spice = 0
        m_leather = 0
        m_camel = 0

        for i in self.j.marketplace:
            if i == "Camel":
                m_camel = m_camel + 1
            elif i == "Leather":
                m_leather = m_leather + 1
            elif i == "Spice":
                m_spice = m_spice + 1
            elif i == "Silk":
                m_silk = m_silk + 1
            elif i == "Silver":
                m_silver = m_silver + 1
            elif i == "Gold":
                m_gold = m_gold + 1
            elif i == "Diamond":
                m_diamond = m_diamond + 1

        marketplace = [m_diamond, m_gold, m_silver, m_silk, m_spice, m_leather, m_camel]

        if agent == "player_1":
            p_cards = self.j.p1.cards
            score = self.j.p1.bonus
            opp_herd = self.j.p1.opp_camels
        elif agent == "player_2":
            p_cards = self.j.p2.cards
            score = self.j.p2.bonus
            opp_herd = self.j.p2.opp_camels

        if p_cards != None:
            herd = p_cards["Camel"]
            hand = [p_cards["Diamond"], p_cards["Gold"], p_cards["Silver"], p_cards["Silk"], p_cards["Spice"], p_cards["Leather"]]
        else:
            print("ERROR! Empty player cards")
            return

        obs = []
        for i in hand:
            obs.append(i)
        for i in marketplace:
            obs.append(i)

        obs.append(herd)
        obs.append(score)
        obs.append(opp_herd)

        obs.append(len(self.j.tokens.diamond))
        obs.append(len(self.j.tokens.gold))
        obs.append(len(self.j.tokens.silver))
        obs.append(len(self.j.tokens.silk))
        obs.append(len(self.j.tokens.spice))
        obs.append(len(self.j.tokens.leather))

        obs.append(len(self.j.tokens.bonus_3))
        obs.append(len(self.j.tokens.bonus_4))
        obs.append(len(self.j.tokens.bonus_5))


        return obs

    def observe(self, agent):
        # observation of one agent is the previous state of the other
        observation = self.get_observation(agent)

        mask = self.j.get_masked_options(agent)

        return {"observation": observation, "action_mask": mask}

    def reset(self, seed=None, options=None, return_info = False):
        """
        Reset needs to initialize the following attributes
        - agents
        - rewards
        - _cumulative_rewards
        - terminations
        - truncations
        - infos
        - agent_selection
        And must set up the environment so that step(), and observe()
        can be called without issues.
        Here it sets up the state dictionary which is used by step() and the observations dictionary which is used by step() and observe()
        """
        self.agents = self.possible_agents[:]

        self._agent_selector = agent_selector(self.agents)
        self.agent_selection = self._agent_selector.reset()

        self.rewards = {agent: 0 for agent in self.agents}
        self._cumulative_rewards = {agent: 0 for agent in self.agents}
        self.terminations = {agent: False for agent in self.agents}
        self.truncations = {agent: False for agent in self.agents}
        self.infos = {agent: {} for agent in self.agents}
        self.num_steps = 0

        self.observation_spaces = {agent: spaces.Dict({
                'observation': spaces.Box(low=np.array([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]), high=np.array([7,7,7,7,7,7,5,5,5,5,5,5,5,11,221,11,5,5,5,7,7,9,7,6,5]), dtype=np.int16),
                'action_mask': spaces.Box(low=0, high=1, shape=(25469,), dtype=np.int8),
            }) for agent in self.agents
        }
        self.r_agent1 = 0

        self.j.round()
        print("\nNEW GAME:")

    def render(self):
        pass

    def close(self):
        pass

    def step(self, action):
        """
        step(action) takes in an action for the current agent (specified by
        agent_selection) and needs to update
        - rewards
        - _cumulative_rewards (accumulating the rewards)
        - terminations
        - truncations
        - infos
        - agent_selection (to the next agent)
        And any internal state used by observe() or render()
        """
        if (self.terminations[self.agent_selection] or self.truncations[self.agent_selection]):
            # handles stepping an agent which is already dead
            # accepts a None action for the one agent, and moves the agent_selection to
            # the next dead agent,  or if there are no more dead agents, to the next live agent
            self._was_dead_step(action)
            return

        current_agent = self.agent_selection

        observe = self.observe(current_agent)
        obs = observe['observation']

        mask = observe["action_mask"]
        if mask[action] == 0 and self.num_steps == 0:
            print("invalid after reset")
        elif mask[action] == 0 and self.num_steps > 0:
            print("invalid after step ", self.num_steps)

        print(current_agent, "Cards: Diamond:",obs[0], ", Gold:",obs[1], ", Silver:",obs[2], ", Silk:",obs[3], ", Spice:",obs[4], ", Leather:",obs[5], ",Camel:",obs[13])
        print(current_agent, "Score:", obs[14], ", Opponent's Camels:", obs[15])
        print("Marketplace cards: Diamond:",obs[6], ", Gold:",obs[7], ", Silver:",obs[8], ", Silk:",obs[9], ", Spice:",obs[10], ", Leather:",obs[11], ",Camel:",obs[12])
        print("Tokens: Diamond:",obs[16], ", Gold:",obs[17], ", Silver:",obs[18], ", Silk:",obs[19], ", Spice:",obs[20], ", Leather:",obs[21])

        #jaipur
        if current_agent == 'player_1':
            #print("\nPlayer 1:")
            #p1 will play
            self.j.options(self.j.p1, action)


        elif current_agent == 'player_2':
            #print("\nPlayer 2:")
            #p2 will play
            self.j.options(self.j.p2, action)

        if(self.j.finished() == True):
            player = self.j.final() #return which player has the most camels to increase rewards for player
            self.terminations = {agent: True for agent in self.agents}
            if player == "player_1":
                self.r_agent1 = self.r_agent1 + 5 #increasing camel reward for agent 1

        if current_agent == 'player_1':
            new_score = self.j.p1.bonus
        elif current_agent == 'player_2':
            new_score = self.j.p2.bonus

        print("")
        reward = (new_score - obs[14])


        if self._agent_selector.is_last():
            self.rewards[self.agents[0]], self.rewards[self.agents[1]] = (self.r_agent1, reward)
        else:
            self.r_agent1 = reward
            self._clear_rewards()

        self._cumulative_rewards[current_agent] = 0

        self.agent_selection = (
            self._agent_selector.next()
        )

        self._accumulate_rewards()

        info = {}

        self.num_steps = self.num_steps + 1

        if self.num_steps >= 1000:
            self.truncations = {agent: True for agent in self.agents}

# Action Mask Model and Registering MARL Environment:

In [None]:
from ray.rllib.env import PettingZooEnv
from ray.rllib.models import ModelCatalog
from ray.tune.registry import register_env
from ray.rllib.models.tf.fcnet import FullyConnectedNetwork
from ray.rllib.models.tf.tf_modelv2 import TFModelV2
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC
from ray.rllib.utils.framework import try_import_tf, try_import_torch
from ray.rllib.utils.torch_utils import FLOAT_MIN

tf1, tf, tfv = try_import_tf()
torch, nn = try_import_torch()

class ActionMaskModel(TFModelV2):
    """Model that handles simple discrete action masking.
    This assumes the outputs are logits for a single Categorical action dist.
    Getting this to work with a more complex output (e.g., if the action space
    is a tuple of several distributions) is also possible but left as an
    exercise to the reader.
    """

    def __init__(
        self, obs_space, action_space, num_outputs, model_config, name, **kwargs
    ):

        orig_space = getattr(obs_space, "original_space", obs_space)
        assert (
            isinstance(orig_space, spaces.Dict)
            and "action_mask" in orig_space.spaces
            and "observation" in orig_space.spaces
        )

        super().__init__(obs_space, action_space, num_outputs, model_config, name)

        self.internal_model = FullyConnectedNetwork(
            orig_space["observation"],
            action_space,
            num_outputs,
            model_config,
            name + "_internal",
        )

    def forward(self, input_dict, state, seq_lens):
        # Extract the available actions tensor from the observation.
        action_mask = input_dict["obs"]["action_mask"]

        # Compute the unmasked logits.
        logits, _ = self.internal_model({"obs": input_dict["obs"]["observation"]})

        # Convert action_mask into a [0.0 || -inf]-type mask.
        inf_mask = tf.maximum(tf.math.log(action_mask), tf.float32.min)
        masked_logits = logits + inf_mask

        # Return masked logits.
        return masked_logits, state

    def value_function(self):
        return self.internal_model.value_function()

ModelCatalog.register_custom_model("action_masking_model", ActionMaskModel)

def env_creator():
    env2 = env()
    return env2

register_env("jaipur", lambda config: PettingZooEnv(env_creator()))

test_env = PettingZooEnv(env_creator())
obs_space = test_env.observation_space
print(obs_space)
act_space = test_env.action_space

# Configuring and Training the Algorithms:

## PPO:

In [None]:
from ray.rllib.algorithms.ppo import PPOConfig
config = PPOConfig()
config = config.training(gamma=0.9, lr=0.01, kl_coeff=0, model={"custom_model": "action_masking_model"})
config = config.resources(num_gpus=0)
config = config.rollouts(num_rollout_workers=0)

config = config.multi_agent(
        policies={"player_1": (None, obs_space, act_space, {}), "player_2": (None, obs_space, act_space, {})},
        policy_mapping_fn=(
                lambda agent_id, episode, worker, **kwargs: agent_id
            ),
        policies_to_train=["player_1", "player_2"],)
config = config.framework(framework="tf")
PPO = config.build(env="jaipur")

for i in range(1):
    result = PPO.train()
    if i % 50 == 0:
        print(i)
    if i % 80 == 0 and i != 0:
        checkpoint = PPO.save()
        print(checkpoint)


checkpoint = PPO.save()
print(checkpoint)

## A2C:

In [None]:
from ray.rllib.algorithms.a2c import A2CConfig
config = A2CConfig()
config = config.training(lr=0.01, grad_clip=30.0, model={"custom_model": "action_masking_model"})
config = config.resources(num_gpus=0)
config = config.rollouts(num_rollout_workers=2)
config = config.multi_agent(
            policies={"player_1": (None, obs_space, act_space, {}), "player_2": (None, obs_space, act_space, {})},
            policy_mapping_fn=(
                    lambda agent_id, episode, worker, **kwargs: agent_id
                ),
            policies_to_train=["player_1", "player_2"],)
config = config.framework(framework="tf")
A2C = config.build(env="jaipur")

for i in range(700):
    result = A2C.train()
    if i % 50 == 0:
        print(i)
    if result['num_env_steps_trained'] > 600000 and result['episode_reward_max'] > 144 and count == 0:
        checkpoint = A2C.save()
        print(checkpoint)
        count = count + 1

    if result['num_env_steps_trained'] > 700000 and result['episode_reward_max'] > 144 and count < 2:
        checkpoint = A2C.save()
        print(checkpoint)
        count = count + 1

    if result['num_env_steps_trained'] > 800000 and result['episode_reward_max'] > 144 and count < 3:
        checkpoint = A2C.save()
        print(checkpoint)
        count = count + 1

    if result['num_env_steps_trained'] > 900000 and result['episode_reward_max'] > 144 and count < 4:
        checkpoint = A2C.save()
        print(checkpoint)
        count = count + 1

checkpoint = A2C.save()
print(checkpoint)

## DQN:

In [None]:
from ray.rllib.algorithms.dqn.dqn import DQNConfig
config = DQNConfig()
config = config.training(replay_buffer_config={"type": "MultiAgentPrioritizedReplayBuffer",
        "capacity": 1000,
        "prioritized_replay_alpha": 0.6,
        "prioritized_replay_beta": 0.4,})

config = config.multi_agent(
            policies={"player_1": (None, obs_space, act_space, {}), "player_2": (None, obs_space, act_space, {})},
            policy_mapping_fn=(
                    lambda agent_id, episode, worker, **kwargs: agent_id
                ),
            policies_to_train=["player_1", "player_2"],)
config = config.training(
        hiddens=[],
        dueling=False,
        double_q=False,
        model={"custom_model": "action_masking_model"},
    )
config = config.resources(num_gpus=0)
config = config.rollouts(num_rollout_workers=0, batch_mode = 'complete_episodes')
config = config.framework(framework="tf")
DQN = config.build(env="jaipur")

for i in range(500):
    result = DQN.train()
    if i % 50 == 0:
        print(i)

checkpoint = DQN.save()
print(checkpoint)

## DDQN:

In [None]:
from ray.rllib.algorithms.dqn.dqn import DQNConfig
config = DQNConfig()
config = config.training(replay_buffer_config={"type": "MultiAgentPrioritizedReplayBuffer",
        "capacity": 1000,
        "prioritized_replay_alpha": 0.6,
        "prioritized_replay_beta": 0.4,})

config = config.multi_agent(
            policies={"player_1": (None, obs_space, act_space, {}), "player_2": (None, obs_space, act_space, {})},
            policy_mapping_fn=(
                    lambda agent_id, episode, worker, **kwargs: agent_id
                ),
            policies_to_train=["player_1", "player_2"],)
config = config.training(
        hiddens=[],
        dueling=False,
        double_q=True,
        model={"custom_model": "action_masking_model"},
    )
config = config.resources(num_gpus=0)
config = config.rollouts(num_rollout_workers=0, batch_mode = 'complete_episodes')
config = config.framework(framework="tf")
DDQN = config.build(env="jaipur")

for i in range(500):
    result = DDQN.train()
    if i % 50 == 0:
        print(i)

checkpoint = DDQN.save()
print(checkpoint)

# Importing the Trained Policies:

In [None]:
from ray.rllib.algorithms.algorithm import Algorithm

algo = Algorithm.from_checkpoint('')

# Checking the Trained Policies by Simulating the Game Environment:

In [None]:
k = 0
reward_sum = 0
test_env.env.reset()
for agent in test_env.env.agent_iter():
    observation, reward, termination, truncation, info = test_env.env.last()
    reward_sum += reward
    if termination or truncation:
        action = None
        break
    else:
        action = algo.compute_single_action(observation, policy_id = agent)
        test_env.env.step(action)
        print()

# Checking the Trained Policies by Creating Custom Observations:

In [None]:
import numpy as np
in_hand = ["Diamond", "Gold", "Silver", "Spice", "Silk", "Leather", "Camel"]
marketplace_cards = ["Diamond", "Gold", "Silver", "Spice", "Silk", "Leather"]

Q_trade = {}
count = 13

#trading 2 cards
for i in range(len(in_hand)-1, -1, -1):
    for j in range(i, -1, -1):

        for a in range(len(marketplace_cards)-1, -1, -1):
            for b in range(a, -1, -1):

                if marketplace_cards[a] != in_hand[i] and marketplace_cards[a] != in_hand[j] and marketplace_cards[b] != in_hand[i] and marketplace_cards[b] != in_hand[j]:
                    Q_trade[count] = ((in_hand[i], in_hand[j], 'na', 'na', 'na'), (marketplace_cards[a], marketplace_cards[b], 'na', 'na', 'na')) #= 0
                    count = count + 1

#trading 3 cards
for i in range(len(in_hand)-1, -1, -1):
    for j in range(i, -1, -1):
        for k in range(j, -1, -1):

            for a in range(len(marketplace_cards)-1, -1, -1):
                for b in range(a, -1, -1):
                    for c in range(b, -1, -1):

                        if in_hand[i] != marketplace_cards[a] and in_hand[j] != marketplace_cards[a] and in_hand[k] != marketplace_cards[a] and in_hand[i] != marketplace_cards[b] and in_hand[j] != marketplace_cards[b] and in_hand[k] != marketplace_cards[b] and in_hand[i] != marketplace_cards[c] and in_hand[j] != marketplace_cards[c] and in_hand[k] != marketplace_cards[c]:
                            Q_trade[count] = ((in_hand[i], in_hand[j], in_hand[k], 'na', 'na'), (marketplace_cards[a], marketplace_cards[b], marketplace_cards[c], 'na', 'na')) #= 0
                            count = count + 1

#trading 4 cards
for i in range(len(in_hand)-1, -1, -1):
    for j in range(i, -1, -1):
        for k in range(j, -1, -1):
            for l in range(k, -1, -1):

                for a in range(len(marketplace_cards)-1, -1, -1):
                    for b in range(a, -1, -1):
                        for c in range(b, -1, -1):
                            for d in range(c, -1, -1):

                                if in_hand[i] != marketplace_cards[a] and in_hand[j] != marketplace_cards[a] and in_hand[k] != marketplace_cards[a] and in_hand[l] != marketplace_cards[a] and in_hand[i] != marketplace_cards[b] and in_hand[j] != marketplace_cards[b] and in_hand[k] != marketplace_cards[b] and in_hand[l] != marketplace_cards[b] and in_hand[i] != marketplace_cards[c] and in_hand[j] != marketplace_cards[c] and in_hand[k] != marketplace_cards[c] and in_hand[l] != marketplace_cards[c] and in_hand[i] != marketplace_cards[d] and in_hand[j] != marketplace_cards[d] and in_hand[k] != marketplace_cards[d] and in_hand[l] != marketplace_cards[d]:
                                    Q_trade[count] = ((in_hand[i], in_hand[j], in_hand[k], in_hand[l], 'na'), (marketplace_cards[a], marketplace_cards[b], marketplace_cards[c], marketplace_cards[d], 'na')) #= 0
                                    count = count + 1

#trading 5 cards
for i in range(len(in_hand)-1, -1, -1):
    for j in range(i, -1, -1):
        for k in range(j, -1, -1):
            for l in range(k, -1, -1):
                for m in range(l, -1, -1):

                    for a in range(len(marketplace_cards)-1, -1, -1):
                        for b in range(a, -1, -1):
                            for c in range(b, -1, -1):
                                for d in range(c, -1, -1):
                                    for e in range(d, -1, -1):

                                        if in_hand[i] != marketplace_cards[a] and in_hand[j] != marketplace_cards[a] and in_hand[k] != marketplace_cards[a] and in_hand[l] != marketplace_cards[a] and in_hand[m] != marketplace_cards[a] and in_hand[i] != marketplace_cards[b] and in_hand[j] != marketplace_cards[b] and in_hand[k] != marketplace_cards[b] and in_hand[l] != marketplace_cards[b] and in_hand[m] != marketplace_cards[b] and in_hand[i] != marketplace_cards[c] and in_hand[j] != marketplace_cards[c] and in_hand[k] != marketplace_cards[c] and in_hand[l] != marketplace_cards[c] and in_hand[m] != marketplace_cards[c] and in_hand[i] != marketplace_cards[d] and in_hand[j] != marketplace_cards[d] and in_hand[k] != marketplace_cards[d] and in_hand[l] != marketplace_cards[d] and in_hand[m] != marketplace_cards[d] and in_hand[i] != marketplace_cards[e] and in_hand[j] != marketplace_cards[e] and in_hand[k] != marketplace_cards[e] and in_hand[l] != marketplace_cards[e] and in_hand[m] != marketplace_cards[e]:
                                            Q_trade[count] = ((in_hand[i], in_hand[j], in_hand[k], in_hand[l], in_hand[m]), (marketplace_cards[a], marketplace_cards[b], marketplace_cards[c], marketplace_cards[d], marketplace_cards[e])) #= 0
                                            count = count + 1


def get_mask(hand, herd, marketplace, cards, Q_trade):
    options = []
    for i in range(0, 25469):
        options.append(1) #all possible

    #take 1 goods
    #checking that player has less than 7 cards in his hand
    if hand <= 6:
        if "Diamond" not in marketplace:
            options[0] = 0

        if "Gold" not in marketplace:
            options[1] = 0

        if "Silver" not in marketplace:
            options[2] = 0

        if "Silk" not in marketplace:
            options[3] = 0

        if "Spice" not in marketplace:
            options[4] = 0

        if "Leather" not in marketplace:
            options[5] = 0

    else:
        #if player has 7 cards in hand, they cannot take another card
        for i in range(0,6):
            options[i] = 0


    mar_camel = 0
    #checking if there are Camel cards in the marketplace
    if "Camel" not in marketplace:
        options[6] = 0
    else:
      #to be used for trading cards check
        for i in marketplace:
            if i == "Camel":
                mar_camel = mar_camel + 1

    #checking if the player can sell any cards
    if cards["Diamond"] < 2:
        options[7] = 0

    if cards["Gold"] < 2:
        options[8] = 0

    if cards["Silver"] < 2:
        options[9] = 0

    if cards["Silk"] < 1:
        options[10] = 0

    if cards["Spice"] < 1:
        options[11] = 0

    if cards["Leather"] < 1:
        options[12] = 0

    #if player doesn't have at least 2 cards or marketplace has 4 or more camels, all trading options are impossible
    if ((hand + herd) < 2) or mar_camel >= 4:
        #("All trading card options impossible")
        for i in range(13, 25469):
            options[i] = 0
    else:

        mar_leather = 0
        mar_spice = 0
        mar_silk = 0
        mar_silver = 0
        mar_gold = 0
        mar_diamond = 0

        for i in marketplace:
            if i == "Leather":
                mar_leather = mar_leather + 1
            elif i == "Spice":
                mar_spice = mar_spice + 1
            elif i == "Silk":
                mar_silk = mar_silk + 1
            elif i == "Silver":
                mar_silver = mar_silver + 1
            elif i == "Gold":
                mar_gold = mar_gold + 1
            elif i == "Diamond":
                mar_diamond = mar_diamond + 1

        #trading options
        for i in Q_trade:
            diamond = 0
            gold = 0
            silver = 0
            silk = 0
            spice = 0
            leather = 0
            camel = 0

            for j in Q_trade[i][0]: #in_hand options

                if j == "Camel":
                    camel = camel + 1
                elif j == "Leather":
                    leather = leather + 1
                elif j == "Spice":
                    spice = spice + 1
                elif j == "Silk":
                    silk = silk + 1
                elif j == "Silver":
                    silver = silver + 1
                elif j == "Gold":
                    gold = gold + 1
                elif j == "Diamond":
                    diamond = diamond + 1

            if camel > 0 and (camel > cards["Camel"] or camel > (7-hand)):
                options[i] = 0 #not possible

            elif leather > 0 and leather > cards["Leather"]:
                options[i] = 0 #not possible

            elif spice > 0 and spice > cards["Spice"]:
                options[i] = 0 #not possible

            elif silk > 0 and silk > cards["Silk"]:
                options[i] = 0 #not possible

            elif silver > 0 and silver > cards["Silver"]:
                options[i] = 0 #not possible

            elif gold > 0 and gold > cards["Gold"]:
                options[i] = 0 #not possible

            elif diamond > 0 and diamond > cards["Diamond"]:
                options[i] = 0 #not possible

            if options[i] == 1: #if option is still possible
                diamond_mar = 0
                gold_mar = 0
                silver_mar = 0
                silk_mar = 0
                spice_mar = 0
                leather_mar = 0

                for k in Q_trade[i][1]: #looping through marketplace options
                    if k == "Leather":
                        leather_mar = leather_mar + 1
                    elif k == "Spice":
                        spice_mar = spice_mar + 1
                    elif k == "Silk":
                        silk_mar = silk_mar + 1
                    elif k == "Silver":
                        silver_mar = silver_mar + 1
                    elif k == "Gold":
                        gold_mar = gold_mar + 1
                    elif k == "Diamond":
                        diamond_mar = diamond_mar + 1

                if leather_mar > 0 and leather_mar > mar_leather:
                    options[i] = 0 #not possible

                elif spice_mar > 0 and spice_mar > mar_spice:
                    options[i] = 0 #not possible

                elif silk_mar > 0 and silk_mar > mar_silk:
                    options[i] = 0 #not possible

                elif silver_mar > 0 and silver_mar > mar_silver:
                    options[i] = 0 #not possible

                elif gold_mar > 0 and gold_mar > mar_gold:
                    options[i] = 0 #not possible

                elif diamond_mar > 0 and diamond_mar > mar_diamond:
                    options[i] = 0 #not possible

    masked = np.array(options, dtype = np.int8)
    return masked


def action_type(choice):
    if choice == 0:
        print("Taking 1 Diamond")
    elif choice == 1:
        print("Taking 1 Gold")
    elif choice == 2:
        print("Taking 1 Silver")
    elif choice == 3:
        print("Taking 1 Silk")
    elif choice == 4:
        print("Taking 1 Spice")
    elif choice == 5:
        print("Taking 1 Leather")
    elif choice == 6:
        print("Taking All Camels")
    elif choice >= 7 and choice <= 12:
        chosen = []
        if choice == 7:
            print("Selling Diamond")
        elif choice == 8:
            print("Selling Gold")
        elif choice == 9:
            print("Selling Silver")
        elif choice == 10:
            print("Selling Silk")
        elif choice == 11:
            print("Selling Spice")
        elif choice == 12:
            print("Selling Leather")


    elif choice > 12 and choice <= 25468:
        #trading goods

        replacable = []
        chosen = []
        for i in Q_trade[choice][0]: #replacable
            if i == "Camel":
                replacable.append("Camel")
            elif i == "Leather":
                replacable.append("Leather")
            elif i == "Spice":
                replacable.append("Spice")
            elif i == "Silk":
                replacable.append("Silk")
            elif i == "Silver":
                replacable.append("Silver")
            elif i == "Gold":
                replacable.append("Gold")
            elif i == "Diamond":
                replacable.append("Diamond")

        for j in Q_trade[choice][1]: #chosen
            if j == "Leather":
                chosen.append("Leather")
            elif j == "Spice":
                chosen.append("Spice")
            elif j == "Silk":
                chosen.append("Silk")
            elif j == "Silver":
                chosen.append("Silver")
            elif j == "Gold":
                chosen.append("Gold")
            elif j == "Diamond":
                chosen.append("Diamond")

        if (len(replacable) == len(chosen)) and len(replacable) >= 2:
            print("Trading cards",chosen, " from the marketplace with ", replacable)

In [None]:
marketplace = ['Gold', 'Leather', 'Camel', 'Camel', 'Camel']
cards = {"Diamond": 0, "Gold": 1, "Silver": 3, "Silk": 0, "Spice": 0, "Leather": 1, "Camel": 2}
herd = cards['Camel']
hand = cards['Diamond'] + cards['Gold'] + cards['Silver'] + cards['Silk'] + cards['Spice'] + cards['Leather']

observation_np = [0, 1, 3, 0, 0, 1, 0, 1, 0, 0, 0, 1, 3, 2, 0, 2, 5, 5, 5, 7, 7, 9, 7, 6, 5]

masked = get_mask(hand, herd, marketplace, cards, Q_trade)

observation = {'observation': observation_np, 'action_mask': masked}

action = algo.compute_single_action(observation, policy_id = 'player_1')
action_type(action)
action = algo.compute_single_action(observation, policy_id = 'player_2')
action_type(action)

In [None]:
marketplace = ['Diamond', 'Diamond', 'Silver', 'Silk', 'Spice']
cards = {"Diamond": 2, "Gold": 0, "Silver": 0, "Silk": 0, "Spice": 1, "Leather": 0, "Camel": 3}
herd = cards['Camel']
hand = cards['Diamond'] + cards['Gold'] + cards['Silver'] + cards['Silk'] + cards['Spice'] + cards['Leather']

observation_np = [3, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 3, 0, 0, 0, 5, 5, 5, 7, 7, 9, 7, 6, 5]

masked = get_mask(hand, herd, marketplace, cards, Q_trade)

observation = {'observation': observation_np, 'action_mask': masked}

action = algo.compute_single_action(observation, policy_id = 'player_1')
action_type(action)
action = algo.compute_single_action(observation, policy_id = 'player_2')
action_type(action)

In [None]:
marketplace = ['Diamond', 'Silk', 'Leather', 'Leather', 'Leather']
cards = {"Diamond": 1, "Gold": 1, "Silver": 0, "Silk": 0, "Spice": 0, "Leather": 0, "Camel": 2}

herd = cards['Camel']
hand = cards['Diamond'] + cards['Gold'] + cards['Silver'] + cards['Silk'] + cards['Spice'] + cards['Leather']

observation_np = [1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 3, 0, 2, 10, 0, 3, 0, 3, 5, 5, 9, 6, 5, 5]

masked = get_mask(hand, herd, marketplace, cards, Q_trade)

observation = {'observation': observation_np, 'action_mask': masked}

action = algo.compute_single_action(observation, policy_id = 'player_1')
action_type(action)
action = algo.compute_single_action(observation, policy_id = 'player_2')
action_type(action)

In [None]:
marketplace = ['Diamond', 'Diamond', 'Spice', 'Spice', 'Spice']
cards = {"Diamond": 0, "Gold": 2, "Silver": 1, "Silk": 0, "Spice": 0, "Leather": 2, "Camel": 2}
herd = cards['Camel']
hand = cards['Diamond'] + cards['Gold'] + cards['Silver'] + cards['Silk'] + cards['Spice'] + cards['Leather']

observation_np = [0, 2, 1, 0, 0, 2, 2, 0, 0, 0, 3, 0, 0, 2, 0, 0, 5, 5, 5, 7, 7, 9, 7, 6, 5]

masked = get_mask(hand, herd, marketplace, cards, Q_trade)

observation = {'observation': observation_np, 'action_mask': masked}

action = algo.compute_single_action(observation, policy_id = 'player_1')
action_type(action)
action = algo.compute_single_action(observation, policy_id = 'player_2')
action_type(action)

In [None]:
marketplace = ['Gold', 'Gold', 'Silk', 'Spice', 'Spice']
cards = {"Diamond": 0, "Gold": 1, "Silver": 0, "Silk": 0, "Spice": 0, "Leather": 2, "Camel": 2}
herd = cards['Camel']
hand = cards['Diamond'] + cards['Gold'] + cards['Silver'] + cards['Silk'] + cards['Spice'] + cards['Leather']


observation_np = [0, 1, 0, 0, 0, 2, 0, 2, 0, 1, 2, 0, 0, 2, 0, 0, 5, 5, 5, 7, 7, 9, 7, 6, 5]

masked = get_mask(hand, herd, marketplace, cards, Q_trade)

observation = {'observation': observation_np, 'action_mask': masked}

action = algo.compute_single_action(observation, policy_id = 'player_1')
action_type(action)
action = algo.compute_single_action(observation, policy_id = 'player_2')
action_type(action)

In [None]:
marketplace = ['Gold', 'Camel', 'Spice', 'Camel', 'Camel']
cards = {"Diamond": 0, "Gold": 0, "Silver": 0, "Silk": 4, "Spice": 0, "Leather": 2, "Camel": 1}
herd = cards['Camel']
hand = cards['Diamond'] + cards['Gold'] + cards['Silver'] + cards['Silk'] + cards['Spice'] + cards['Leather']


observation_np = [0, 0, 0, 4, 0, 2, 0, 1, 0, 0, 1, 0, 3, 1, 0, 0, 5, 5, 5, 7, 7, 9, 7, 6, 5]

masked = get_mask(hand, herd, marketplace, cards, Q_trade)

observation = {'observation': observation_np, 'action_mask': masked}

action = algo.compute_single_action(observation, policy_id = 'player_1')
action_type(action)
action = algo.compute_single_action(observation, policy_id = 'player_2')
action_type(action)

In [None]:
marketplace = ['Silk', 'Silk', 'Leather', 'Leather', 'Camel']
cards = {"Diamond": 0, "Gold": 0, "Silver": 1, "Silk": 0, "Spice": 0, "Leather": 0, "Camel": 5}
herd = cards['Camel']
hand = cards['Diamond'] + cards['Gold'] + cards['Silver'] + cards['Silk'] + cards['Spice'] + cards['Leather']


observation_np = [0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 0, 2, 1, 5, 46, 0, 3, 2, 5, 2, 2, 4, 4, 4, 5]

masked = get_mask(hand, herd, marketplace, cards, Q_trade)

observation = {'observation': observation_np, 'action_mask': masked}

action = algo.compute_single_action(observation, policy_id = 'player_1')
action_type(action)
action = algo.compute_single_action(observation, policy_id = 'player_2')
action_type(action)

In [None]:
marketplace = ['Silk', 'Spice', 'Camel', 'Camel', 'Camel']
cards = {"Diamond": 0, "Gold": 1, "Silver": 0, "Silk": 1, "Spice": 1, "Leather": 1, "Camel": 1}
herd = cards['Camel']
hand = cards['Diamond'] + cards['Gold'] + cards['Silver'] + cards['Silk'] + cards['Spice'] + cards['Leather']


observation_np = [0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 3, 1, 0, 0, 5, 5, 5, 7, 7, 9, 7, 6, 5]

masked = get_mask(hand, herd, marketplace, cards, Q_trade)

observation = {'observation': observation_np, 'action_mask': masked}

action = algo.compute_single_action(observation, policy_id = 'player_1')
action_type(action)
action = algo.compute_single_action(observation, policy_id = 'player_2')
action_type(action)

#to take first points

In [None]:
marketplace = ['Diamond', 'Diamond', 'Diamond', 'Diamond', 'Diamond']
cards = {"Diamond": 0, "Gold": 0, "Silver": 0, "Silk": 0, "Spice": 0, "Leather": 0, "Camel": 3}
herd = cards['Camel']
hand = cards['Diamond'] + cards['Gold'] + cards['Silver'] + cards['Silk'] + cards['Spice'] + cards['Leather']


observation_np = [0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 3, 0, 0, 5, 5, 5, 7, 7, 9, 7, 6, 5]

masked = get_mask(hand, herd, marketplace, cards, Q_trade)

observation = {'observation': observation_np, 'action_mask': masked}

action = algo.compute_single_action(observation, policy_id = 'player_1')
action_type(action)
action = algo.compute_single_action(observation, policy_id = 'player_2')
action_type(action)

In [None]:
marketplace = ['Gold', 'Silver', 'Silk', 'Leather', 'Diamond']
cards = {"Diamond": 0, "Gold": 0, "Silver": 0, "Silk": 0, "Spice": 0, "Leather": 1, "Camel": 2}
herd = cards['Camel']
hand = cards['Diamond'] + cards['Gold'] + cards['Silver'] + cards['Silk'] + cards['Spice'] + cards['Leather']


observation_np = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 2, 0, 0, 5, 5, 5, 7, 7, 0, 7, 6, 5]

masked = get_mask(hand, herd, marketplace, cards, Q_trade)

observation = {'observation': observation_np, 'action_mask': masked}

action = algo.compute_single_action(observation, policy_id = 'player_1')
action_type(action)
action = algo.compute_single_action(observation, policy_id = 'player_2')
action_type(action)

In [None]:
marketplace = ['Silver', 'Camel', 'Camel', 'Camel', 'Camel']
cards = {"Diamond": 0, "Gold": 0, "Silver": 0, "Silk": 3, "Spice": 3, "Leather": 2, "Camel": 2}
herd = cards['Camel']
hand = cards['Diamond'] + cards['Gold'] + cards['Silver'] + cards['Silk'] + cards['Spice'] + cards['Leather']


observation_np = [0, 0, 0, 3, 3, 1, 0, 0, 1, 0, 0, 0, 4, 2, 0, 0, 5, 5, 5, 7, 7, 0, 7, 6, 5]

masked = get_mask(hand, herd, marketplace, cards, Q_trade)

observation = {'observation': observation_np, 'action_mask': masked}

action = algo.compute_single_action(observation, policy_id = 'player_1')
action_type(action)
action = algo.compute_single_action(observation, policy_id = 'player_2')
action_type(action)

In [None]:
marketplace = ['Diamond', 'Gold', 'Silver', 'Camel', 'Camel']
cards = {"Diamond": 1, "Gold": 1, "Silver": 1, "Silk": 0, "Spice": 0, "Leather": 0, "Camel": 1}
herd = cards['Camel']
hand = cards['Diamond'] + cards['Gold'] + cards['Silver'] + cards['Silk'] + cards['Spice'] + cards['Leather']


observation_np = [1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 2, 1, 0, 0, 5, 5, 5, 7, 7, 9, 7, 6, 5]

masked = get_mask(hand, herd, marketplace, cards, Q_trade)

observation = {'observation': observation_np, 'action_mask': masked}

action = algo.compute_single_action(observation, policy_id = 'player_1')
action_type(action)
action = algo.compute_single_action(observation, policy_id = 'player_2')
action_type(action)

In [None]:
marketplace = ['Diamond', 'Gold', 'Silver', 'Silk', 'Spice']
cards = {"Diamond": 0, "Gold": 0, "Silver": 0, "Silk": 0, "Spice": 0, "Leather": 0, "Camel": 5}
herd = cards['Camel']
hand = cards['Diamond'] + cards['Gold'] + cards['Silver'] + cards['Silk'] + cards['Spice'] + cards['Leather']


observation_np = [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 5, 0, 0, 5, 5, 5, 7, 7, 9, 7, 6, 5]

masked = get_mask(hand, herd, marketplace, cards, Q_trade)

observation = {'observation': observation_np, 'action_mask': masked}

action = algo.compute_single_action(observation, policy_id = 'player_1')
action_type(action)
action = algo.compute_single_action(observation, policy_id = 'player_2')
action_type(action)

In [None]:
marketplace = ['Silk', 'Silver', 'Silver', 'Silver', 'Leather']
cards = {"Diamond": 3, "Gold": 0, "Silver": 0, "Silk": 1, "Spice": 0, "Leather": 0, "Camel": 5}
herd = cards['Camel']
hand = cards['Diamond'] + cards['Gold'] + cards['Silver'] + cards['Silk'] + cards['Spice'] + cards['Leather']


observation_np = [3, 0, 0, 1, 0, 0, 0, 0, 3, 1, 0, 1, 0, 5, 0, 0, 3, 5, 5, 7, 7, 9, 7, 6, 5]

masked = get_mask(hand, herd, marketplace, cards, Q_trade)

observation = {'observation': observation_np, 'action_mask': masked}

action = algo.compute_single_action(observation, policy_id = 'player_1')
action_type(action)
action = algo.compute_single_action(observation, policy_id = 'player_2')
action_type(action)