# Intro

Purpose: to create a game environment: deck of cards, player draws cards, needs to discard if has more than hand size limit. 

Thenumber of cards in the deck of each set, along with the goal, and the hand size, are parameters

we will give rewards for getting a set, and a large negative at the end (running out of cards) for not having all

# Environment

In [1]:
import numpy as np
import gym
import random

In [35]:
class CardSetFinder(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(
        self, 
        cardset_tot = [10,10],
        cardset_goal = [3,3],
        hand_limit = 3):
        
        self.reward_for_set = 3
        self.reward_for_invalid_action = -5
        self.reward_for_win = 10
        self.reward_for_lose = -10
        
        self.cardset_tot = cardset_tot
        self.cardset_goal = cardset_goal
        self.hand_limit = hand_limit
        self.sets_num = len(self.cardset_tot)
        
        # actions: to discard one of the cards
        # we will code it as a discrete 0 to sets_num
        # so an action is to discard a type of card, not a specific card
        # NB there will be invalid actions, not all the card types will be in hand all the time
        self.action_space = gym.spaces.Discrete(self.sets_num)
        
        # states are going to be coded in a matrix with sets_num number of rows and  columns
        # each row corresponds to a type of card
        # col 0: number of cards in the deck, min: 0, max: cardset_tot
        # col 1: ... in hand, min: 0, max: hand limit
        # col 2: 1 if we already have the set for that type (in which case the cards are useless), 0 if not
        self.observation_space = gym.spaces.Box(
            low = np.tile(np.array([0,0,0]),(self.sets_num,1)), 
            high = np.tile(np.array([max(self.cardset_tot),self.hand_limit,1]),(self.sets_num,1)), 
            dtype = int)
        
        # note: this is slightly inefficient, we could limit each row's 1st col by the corresponding card's number
        # not goin to bother now
        
        self.state = np.zeros((self.sets_num, 3), dtype = int)
        
        # deck: a list of randomly arranged integers, from 0 to 
        self.deck = []
        
        # hand: a list of sets_num, each element is an integer with the number of cards hold 
        self.hand = np.zeros(self.sets_num, dtype = int)
    
    def create_deck(self):
        # creates a list of 0, 1, 2, ... etc in random order
        
        for i in range(0, self.sets_num):
            self.deck += [i] * self.cardset_tot[i]
            
        random.shuffle(self.deck)
        
    def draw_cards(self):
        # draws cards until hand limit is met
        curr_handsize = sum(self.hand)
        if curr_handsize < self.hand_limit:
            for i in range(curr_handsize,self.hand_limit):
                self.draw_card()
        
    def draw_card(self):
        # draws the top card from the dack to hand
        current_card = self.deck[-1]
        self.hand[current_card] += 1
        self.deck.pop()

    def step(self, action):
        
        reward = 0
        done = False
        info = {}

        # incoming action is a number between 0 and sets_num
        if self.hand[action] == 0:
            # does not change anything, just returns invalid action penalty
            reward = self.reward_for_invalid_action
        else:
            # deletes a card from hand
            discard_card(action)
            # draws a new card
            self.draw_cards()
            # checks for set (at this point, there can only be one new found)
            found_sets = self.check_hand_for_sets
            reward = found_sets * self.reward_for_win
            self.draw_cards()
            
            # at this point, also needs to check if the game is done
            done = self.check_if_done()
            
            


        return self.state, reward, done, info

    def reset(self):
        
        self.create_deck()
        self.draw_cards()
        
        # need to check for sets right at the beginning
        found_sets = self.check_hand_for_sets()
        
        # actually, won't give reward for random good start
        
        # and after that, potentially, draw cards
        self.draw_cards()
        
        self.calc_state()
        
        return self.state
    
    def check_hand_for_sets(self):
        # in the hand set, checks each card type
        # sees if we have any that is equal to the limit
        
        found_sets = 0
        
        for i in range(0, self.sets_num):
            if self.check_hand_for_set(i):
                found_sets +=1
                
        return found_sets
                
    def check_hand_for_set(self, set_num):
        set_found = False
        count_in_hand = sum(1 for set_num in self.hand)
        if count_in_hand >= cardset_goal[set_num]:
            # delete those cards from hand
            for i in range(0, cardset_goal[set_num]):
                self.hand.remove(set_num)
            # set the last element of state as DONE (to 1 from 0)
            state[set_num,2]=1
            set_found = True
            
        return set_found
            
    def calc_state(self):
        # calculates the state variable based on deck list and hand set
        # only changes first two columns
        # the third one, whether we have already found the set, is handled in the check_hand_for_set
        for i in range(0, self.sets_num):
            self.state[i,0] = self.deck.count(i)
            self.state[i,1] = self.hand[i]
            
    def discard_card(self, set_to_discard):
        self.hand[set_to_discard] -= 1
    
    
    

In [36]:
env = CardSetFinder()

In [37]:
env.reset()

array([[9, 1, 0],
       [8, 2, 0]])

# Testing

In [66]:
# check if state is in the observation space

In [126]:
env = CardSetFinder()

In [127]:
env.observation_space

Box(0, 10, (2, 3), int64)

In [131]:
test = np.array([[3,2,1],[1,2,0]])
test2 = np.array([[3,2,2],[1,2,0]])

In [130]:
env.observation_space.contains(test)

True

In [132]:
env.observation_space.contains(test2)

False

In [133]:
env.observation_space.contains(env.state)

True