Trying to solve the problem with dynamic programming. 

In [132]:
import numpy as np
import itertools

In [133]:
cardset_tot = [10,10]
cardset_goal = [5,5]
hand_limit = 5

reward_for_set = 3
reward_for_invalid_action = -5
reward_for_win = 10
reward_for_lose = -10

In [134]:
# create states
# states is a list of numpy arrays, the individual states
# each state is cardset_goal * 3 long, and in order, they show: 
# number of corresponding card left in deck (int)
# number of corresponding card in hand (int)
# whether the set is already won or not (boolean)

In [135]:
# loop through these, and check, delete where not valid
number_of_sets = len(cardset_tot)

lists = []

for i in range(0, number_of_sets):
    lists.append(list(range(0, cardset_tot[i] + 1)))
    lists.append(list(range(0, hand_limit + 1)))
    lists.append(list(range(0,2)))

In [136]:
states = itertools.product(*lists)

In [137]:
def state_valid(state):

    # checks a number of rules that make it nonvalid
    
    # rule 1: sum of cards in hand not more than hand limit
    hand_size = 0
    for i in range(0, number_of_sets):
        hand_size += state[3 * i + 1]
    if hand_size > hand_limit:
        return False
    
    # rule 2: are there enough cards gone from deck to be enough for whatever is in hand
    for i in range(0, number_of_sets):
        if state[3*i] > cardset_tot[i] - state[3*i + 1]:
            return False
        
    # rule 3: can only have won a set if there are enough missing from hand and deck
    for i in range(0, number_of_sets):
        if state[3 * i + 2] == 1:
            if cardset_tot[i] - state[3 * i] - state[3 * i + 1] < cardset_goal[i]:
                return False
        
    # if did not stop at any of the previous rules, the state is valid
    return True

In [138]:
# arrange states into valid and nonvalid states
states_valid = []
states_nonvalid = []

for state in states:
    if state_valid(state):
        states_valid.append(state)
    else:
        states_nonvalid.append(state)
        

In [139]:
len(states_valid)

3829

In [140]:
len(states_nonvalid)

13595

In [141]:
# create a state_value dictionary, where the keys are the states, values are 

In [145]:
def state_won(state):
    for i in range(0, number_of_sets):
        if state[3 * i + 2] == 0:
            return False
    return True

In [148]:
# reset state values to opening
state_values = {}

for state in states_valid:
    if state_won(state):
        state_values[state] = 1
    else:
        state_values[state] = 0
        

In [150]:
# need to define the actions

# action is an integer

# 0: always the same, draw a card from the deck into hand
# in sets of 2: discard a certain type of card from hand, or complete a set
# 1: discard a card from set 1
# 2: cash in set 1 group
# 3: discard a card from set 2
# ...

In [153]:
actions = list(range(0,2*number_of_sets + 1))

In [232]:
def new_state(state, action):
    # returns the next state depending on the action taken from current state
    # assumes that action is valid
    # return is actually a dictionary with states as keys and probabilities as values
    
    new_states = {}
    
    if action == 0:
        # if we draw a new card, decrease one of the sets in deck, increase corresponding hand
        
        # calculate the probs of drawing that particular card from the deck
        deck_size = 0
        for i in range(0, number_of_sets):
            deck_size += state[3 * i]
        probs = []
        for i in range(0, number_of_sets):
            prob = state[3 * i] / deck_size
            probs.append(prob)
            
        for i in range(0, number_of_sets):
            new_state = state.copy()
            new_state[i * 3] -= 1
            new_state[i * 3 + 1] += 1
            new_states[tuple(new_state)] = probs[i]
            
    elif action % 2 == 1:
        # we are discarding a card from hand, deterministic change
        impacted_set = action // 2
        new_state = state.copy()
        new_state[impacted_set * 3 + 1] -= 1
        new_states[tuple(new_state)] = 1
        
    elif action % 2 == 0:
        # we are doing a set, once again, deterministic change
        impacted_set = action // 2 - 1
        new_state = state.copy()
        new_state[impacted_set * 3 + 2] = 1
        new_state[impacted_set * 3 + 1] -= cardset_goal[impacted_set]
        new_states[tuple(new_state)] = 1

    return new_states
        

In [233]:
def get_valid_actions(state):
    
    actions = []

    # if hand size less than hand limit, AND there are cards in the deck, drawing new card is an option
    hand_size = 0
    for i in range(0, number_of_sets):
        hand_size += state[3*i + 2]
    if hand_size < hand_limit & hand_size > 0:
        actions.append(0)
        
    # for each set, check if discarding that card or cashing in a set is an option
    for i in range(0, number_of_sets):
        # if hand has at least one, can discard
        if state[3*i + 1]>0:
            actions.append(2*i + 1)
        # if hand has at least as many as the goal, AND the corresponding set is not yet won, can cash in the set
        if state[3*i + 1] >= cardset_goal[i] and state[3 * i + 2] == 0:
            actions.append(2*i + 2)
            
    return actions

In [234]:
current_state = [0,0,0,0,0,0]

In [235]:
get_valid_actions(current_state)

[]

In [250]:
old_state_values = state_values.copy()

for state in states_valid[0:10]:

    # 1) determine which actions are valid    
    actions = np.array(get_valid_actions(state))
            
    # 2) calculate action value for each action
    actions_values = []
    for action in actions: 
        new_states = new_state(list(state), action)
        action_value = 0
        for state in new_states.keys():
            action_value += old_state_values[state] * new_states[state]
        actions_values.append(action_value)
    actions_values = np.array(actions_values)
        
    # the new value of the state is the value of the action with the highest value
            

TypeError: 'list' object is not callable

In [252]:
testlist = [0,1,2]