Playing SET
---

Encode cards with 2 bits per feature.

In [1]:
import pandas as pd
import numpy as np


import gym
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
#two bits are needed to store the three options
columns = [
    'number 1',
    'number 2',
    'color 1',
    'color 2',
    'symbol 1',
    'symbol 2',
    'texture 1',
    'texture 2'
]

# a feature can be one of three 
feature_options = [
    '10', #this
    '01', #that
    '00' #other
]

#make encoded card data
cards_list = np.array([
    [
        [
            [
                [
                    character
                    for character in numberoption+coloroption+symboloption+textureoption
                ]
                for textureoption in feature_options
            ]
            for symboloption in feature_options
        ]
        for coloroption in feature_options
    ]
    for numberoption in feature_options
])
cards_list = cards_list.reshape(81,8)
cards_list.shape

(81, 8)

In [3]:
#encode each card as a row in a dataframe with 1-bit columns
bit_cards = pd.DataFrame(cards_list, columns = columns) .astype(int).astype(bool) #can't tell the difference between '1' and '0' until it's been turned to int
bit_cards.index.name = 'card index'
bit_cards.head()

Unnamed: 0_level_0,number 1,number 2,color 1,color 2,symbol 1,symbol 2,texture 1,texture 2
card index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,True,False,True,False,True,False,True,False
1,True,False,True,False,True,False,False,True
2,True,False,True,False,True,False,False,False
3,True,False,True,False,False,True,True,False
4,True,False,True,False,False,True,False,True


In [4]:
#encode each card as a row in a dataframe with 2-bit columns
columns = [
    'number',
    'color',
    'symbol',
    'texture'
]

feature_options = 'abc'

cards_list = np.array([
    [
        [
            [
                [numberoption, coloroption, symboloption, textureoption]
                for textureoption in feature_options
            ]
            for symboloption in feature_options
        ]
        for coloroption in feature_options
    ]
    for numberoption in feature_options
]).reshape(81,4)

cards = pd.DataFrame(cards_list, columns = columns)

In [5]:
cards.head(), cards.tail()

(  number color symbol texture
 0      a     a      a       a
 1      a     a      a       b
 2      a     a      a       c
 3      a     a      b       a
 4      a     a      b       b,
    number color symbol texture
 76      c     c      b       b
 77      c     c      b       c
 78      c     c      c       a
 79      c     c      c       b
 80      c     c      c       c)

In [86]:
# determine if three cards are a set


#checks if the feature value for three series is all the same thing

def all_same(series1, series2, series3, feature):
    return series1[feature] == series2[feature] and series2[feature] == series3[feature]

#checks if the feature values for three series are all unique

def all_diffnt(series1, series2, series3, feature):
    return series1[feature] != series2[feature] and series2[feature] != series3[feature] and series3[feature] != series1[feature]
    
    
#gives True if the three cards are a set, False if not
#feed this a pd.Series of the 4 features

def setQ(card_list):
    card1, card2, card3 = card_list
    #assumes the cards are a set at the outset. Will turn to False if it fails any of the tests.
    setA = True
    features = ['number','color','symbol','texture']
    for feature in features:
        feature_check = all_same(card1, card2, card3, feature) or all_diffnt(card1,card2,card3,feature)
        setA = setA and feature_check
    return setA
            

In [7]:
#are the first three cards a set?
setQ(cards.loc[0],cards.loc[1],cards.loc[2])

True

In [8]:
#are the first two and the fourth a set?
setQ(cards.loc[0],cards.loc[1],cards.loc[3])

False

In [107]:
#convert a 2-bit encoded card into a 3-bit card.

def two_to_three_bits(two_bit_series):
    two_bit_columns = [
    'number 1',
    'number 2',
    'color 1',
    'color 2',
    'symbol 1',
    'symbol 2',
    'texture 1',
    'texture 2'
    ]
    three_bit_columns = [
    'number',
    'color',
    'symbol',
    'texture'
    ]
    three_bit_series = pd.Series(dtype='object', index = three_bit_columns)
    for column in three_bit_columns:
        if two_bit_series[column + ' 1']:
            this_value = 'a'
        elif two_bit_series[column + ' 2']:
            this_value = 'b'
        else:
            this_value = 'c'
        three_bit_series[column] = this_value
    return three_bit_series

def index_to_series_list(index_list):
    return [
        cards.loc[index]
        for index in index_list
    ]

In [10]:
#encode the first two-bit card as a three-bit
two_to_three_bits(bit_cards.loc[0])

number     a
color      a
symbol     a
texture    a
dtype: object

In [11]:
#two-bit cards are easier to feed to a model.

actor-critic model
---

the model will be fed `8*12 == 96` bits corresponding to twelve cards data. It then will give a set of cards to take as well as a prediction of whether it

In [12]:
rng_seed = 2022
rng = np.random.default_rng(rng_seed)

In [13]:
rng.choice(range(81), 3, replace = False)

array([19, 60, 55])

In [81]:
#splits a deck into cards on the board and cards left in the deck.
def initialize_board(num_cards, board_size, rng):
    card_list = list(range(num_cards))
    board = rng.choice(card_list, board_size, replace = False)
    card_list = np.array([
        card
        for card in card_list
        if card not in board
    ])
    return board, card_list

#takes a board, a deck, and draws up to the board size.
# returns a new board with added cards and the deck with those
#cards removed.
def draw_cards(board, deck, rng, board_size = 12, p = None):
    num_cards_to_draw = board_size - len(board)
    if len(board) + len(deck) < board_size:
        print(f'{num_cards_to_draw} cards are needed to complete the board, but only {len(card_list)} remain in the deck.' )
        return None
    else:
        drawn_cards = rng.choice(deck, num_cards_to_draw, replace = False, p = p)
        new_deck = list(deck)
        for card in drawn_cards:
            new_deck.remove(card)
        new_deck = np.array(new_deck)
        new_board = np.concatenate([board, drawn_cards])
        return new_board, new_deck

In [16]:
#initialize a game
board, deck = initialize_board(81,12,np.random.default_rng(2022))
board, deck

(array([ 6,  4, 71, 69, 13, 61, 53,  5, 49, 45, 17, 52]),
 array([ 0,  1,  2,  3,  7,  8,  9, 10, 11, 12, 14, 15, 16, 18, 19, 20, 21,
        22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
        39, 40, 41, 42, 43, 44, 46, 47, 48, 50, 51, 54, 55, 56, 57, 58, 59,
        60, 62, 63, 64, 65, 66, 67, 68, 70, 72, 73, 74, 75, 76, 77, 78, 79,
        80]))

In [19]:
#test out drawing cards. this doesn't replace the originals.
draw_cards(board[:6],deck,rng)

(array([ 6,  4, 71, 69, 13, 61, 78, 80, 30, 67, 31,  9]),
 array([ 0,  1,  2,  3,  7,  8, 10, 11, 12, 14, 15, 16, 18, 19, 20, 21, 22,
        23, 24, 25, 26, 27, 28, 29, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
        42, 43, 44, 46, 47, 48, 50, 51, 54, 55, 56, 57, 58, 59, 60, 62, 63,
        64, 65, 66, 68, 70, 72, 73, 74, 75, 76, 77, 79]))

In [56]:
#takes an iterable of cards by index and returns a flat array of the card bits
def board_to_input(card_indices):
    model_input = bit_cards.loc[card_indices].astype(int).values.flatten()
    model_input = tf.convert_to_tensor(model_input)
    model_input = tf.expand_dims(model_input, 0)
    return model_input

In [48]:
board

array([ 6,  4, 71, 69, 13, 61, 53,  5, 49, 45, 17, 52])

In [49]:
sample_input = board_to_input(board)
sample_input

<tf.Tensor: shape=(1, 1, 96), dtype=int32, numpy=
array([[[1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0,
         0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0,
         1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0,
         0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1,
         0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1]]])>

In [50]:
#input should be the number of cards on the board (12) times the number of bits in a card (8).
len(sample_input) == 96

False

In [51]:
tf.convert_to_tensor(board_to_input(board))

<tf.Tensor: shape=(1, 1, 96), dtype=int32, numpy=
array([[[1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0,
         0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0,
         1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0,
         0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1,
         0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1]]])>

In [146]:
#prevents division by 0
eps = np.finfo(np.float32).eps.item()

rng_seed = 2022
rng = np.random.default_rng(rng_seed)


gamma = 0.995 #discount factor for previous rewards
rounds_per_game = 20

num_inputs = 8*12 #12 cards at a time, 8 bits per card
num_hidden = 256
num_picks = 12 #cards per round

inputs = layers.Input(shape = (num_inputs,))
common = layers.Dense(num_hidden, activation = 'relu', )(inputs)
picks = layers.Dense(num_picks, activation = 'softmax')(common)
critic = layers.Dense(1)(common)

model = keras.Model(inputs = inputs, outputs = [picks, critic])

#make optimizer
optimizer = keras.optimizers.Adam(learning_rate = 0.01)
#loss function
loss = sum

choice_probs_history = []
critic_value_history = []
rewards_history = []
running_reward = 0
round_count = 0

#get 0 and 1 values for the entire deck
card_values = bit_cards.astype(int).values

choice_probs_history = []
critic_value_history = []
hi = 1
#repeatedly play full games
while hi <2:
    hi+= 1
    #turn this hi stuff back into while True with a break check at the end
    total_score = 0
    with tf.GradientTape() as tape:
        #play a game. This is analogous to the cartpole "episodes"
        #place initial cards and set the deck aside
        board, deck = initialize_board(81, 12, rng)
        #go through the rounds
        round_results = []
        for this_round in range(rounds_per_game):
            #convert board to input
            state = board_to_input(board)
            #make choices and predict the result
            choice_probs, critic_value = model(state)
            #pick three cards on the board to constitute a set
            choices, board = draw_cards([], board, rng, board_size = 3, p = np.squeeze(choice_probs,0))
            #replace missing cards on the board
            board, deck = draw_cards(board, deck, rng)
            #assess if the three choices are a set. 0 or 1.
            reward = int(setQ(index_to_series_list(choices)))
            #track results
            choice_probs_history.append(choice_probs)
            critic_value_history.append(critic_value)
            round_results.append(reward)
            total_score += reward
        #normalize round results
        round_results = np.array(round_results)
        round_results = (round_results - np.mean(round_results))/(np.std(round_results)+eps)
        history = zip(choice_probs_history, critic_value_history, round_results)
        actor_losses = []
        critic_losses = []
        for log_prob, critic_value, result_value in history:
            #get losses for actor
            diff = result_value - critic_value
            actor_losses.append(-log_prob * diff)
            #get losses for critic
            critic_losses.append(
               np.abs(critic_value_history - round_results)
            )
        print(list(history)[0])
        loss_value = sum(actor_losses) + sum(critic_losses)
        grads = tape.gradient(loss_value, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))
        
        action_probs_history.clear()
        critic_value_history.clear()
        rewards_history.clear()

IndexError: list index out of range

In [136]:
history = zip(choice_probs_history, critic_value_history, round_results)

In [137]:
list(history)[0]

(<tf.Tensor: shape=(1, 12), dtype=float32, numpy=
 array([[0.12798883, 0.06258224, 0.1509525 , 0.13840464, 0.09192531,
         0.03883421, 0.06066033, 0.07969919, 0.08117904, 0.04328948,
         0.05934891, 0.06513539]], dtype=float32)>,
 <tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[0.55433947]], dtype=float32)>,
 0.0)

In [133]:
np.abs([-1,1,1,2,-3])

array([1, 1, 1, 2, 3])

In [148]:
choice_probs_history[0], critic_value_history[0], round_results[0]

(<tf.Tensor: shape=(1, 12), dtype=float32, numpy=
 array([[0.06937498, 0.05384377, 0.07791723, 0.09856275, 0.13054432,
         0.07178988, 0.05387769, 0.0909901 , 0.09842826, 0.08748049,
         0.09093545, 0.07625504]], dtype=float32)>,
 <tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[0.8472874]], dtype=float32)>,
 -0.22941560838716776)