Playing SET
---

Encode cards with 2 bits per feature.

In [64]:
import pandas as pd
import numpy as np


import gym
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [16]:
#two bits are needed to store the three options
columns = [
    'number 1',
    'number 2',
    'color 1',
    'color 2',
    'symbol 1',
    'symbol 2',
    'texture 1',
    'texture 2'
]

# a feature can be one of three 
feature_options = [
    '10', #this
    '01', #that
    '00' #other
]

#make encoded card data
cards_list = np.array([
    [
        [
            [
                [
                    character
                    for character in numberoption+coloroption+symboloption+textureoption
                ]
                for textureoption in feature_options
            ]
            for symboloption in feature_options
        ]
        for coloroption in feature_options
    ]
    for numberoption in feature_options
])
cards_list = cards_list.reshape(81,8)
cards_list.shape

(81, 8)

In [28]:
#encode each card as a row in a dataframe with 1-bit columns
bit_cards = pd.DataFrame(cards_list, columns = columns) .astype(int).astype(bool) #can't tell the difference between '1' and '0' until it's been turned to int
bit_cards.index.name = 'card index'
bit_cards.head()

Unnamed: 0_level_0,number 1,number 2,color 1,color 2,symbol 1,symbol 2,texture 1,texture 2
card index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,True,False,True,False,True,False,True,False
1,True,False,True,False,True,False,False,True
2,True,False,True,False,True,False,False,False
3,True,False,True,False,False,True,True,False
4,True,False,True,False,False,True,False,True


In [35]:
#encode each card as a row in a dataframe with 2-bit columns
columns = [
    'number',
    'color',
    'symbol',
    'texture'
]

feature_options = 'abc'

cards_list = np.array([
    [
        [
            [
                [numberoption, coloroption, symboloption, textureoption]
                for textureoption in feature_options
            ]
            for symboloption in feature_options
        ]
        for coloroption in feature_options
    ]
    for numberoption in feature_options
]).reshape(81,4)

cards = pd.DataFrame(cards_list, columns = columns)

In [38]:
cards.head(), cards.tail()

(  number color symbol texture
 0      a     a      a       a
 1      a     a      a       b
 2      a     a      a       c
 3      a     a      b       a
 4      a     a      b       b,
    number color symbol texture
 76      c     c      b       b
 77      c     c      b       c
 78      c     c      c       a
 79      c     c      c       b
 80      c     c      c       c)

In [39]:
# determine if three cards are a set


#checks if the feature value for three series is all the same thing

def all_same(series1, series2, series3, feature):
    return series1[feature] == series2[feature] and series2[feature] == series3[feature]

#checks if the feature values for three series are all unique

def all_diffnt(series1, series2, series3, feature):
    return series1[feature] != series2[feature] and series2[feature] != series3[feature] and series3[feature] != series1[feature]
    
    
#gives True if the three cards are a set, False if not
#feed this a pd.Series of the 4 features

def setQ(card1, card2, card3):
    #assumes the cards are a set at the outset. Will turn to False if it fails any of the tests.
    setA = True
    features = ['number','color','symbol','texture']
    for feature in features:
        feature_check = all_same(card1, card2, card3, feature) or all_diffnt(card1,card2,card3,feature)
        setA = setA and feature_check
    return setA
            

In [47]:
#are the first three cards a set?
setQ(cards.loc[0],cards.loc[1],cards.loc[2])

True

In [48]:
#are the first two and the fourth a set?
setQ(cards.loc[0],cards.loc[1],cards.loc[3])

False

In [58]:
#convert a 2-bit encoded card into a 3-bit card.

def two_to_three_bits(two_bit_series):
    two_bit_columns = [
    'number 1',
    'number 2',
    'color 1',
    'color 2',
    'symbol 1',
    'symbol 2',
    'texture 1',
    'texture 2'
    ]
    three_bit_columns = [
    'number',
    'color',
    'symbol',
    'texture'
    ]
    three_bit_series = pd.Series(dtype='object', index = three_bit_columns)
    for column in three_bit_columns:
        if two_bit_series[column + ' 1']:
            this_value = 'a'
        elif two_bit_series[column + ' 2']:
            this_value = 'b'
        else:
            this_value = 'c'
        three_bit_series[column] = this_value
    return three_bit_series

In [62]:
#encode the first two-bit card as a three-bit
two_to_three_bits(bit_cards.loc[0])

number     a
color      a
symbol     a
texture    a
dtype: object

In [63]:
#two-bit cards are easier to feed to a model.

actor-critic model
---

the model will be fed `8*12 == 96` bits corresponding to twelve cards data. It then will give a set of cards to take as well as a prediction of whether it

In [105]:
rng_seed = 2022
rng = np.random.default_rng(rng_seed)

In [115]:
rng.choice(range(81), 3, replace = False)

array([57, 53, 17])

In [171]:
#splits a deck into cards on the board and cards left in the deck.
def initialize_board(num_cards, board_size, rng):
    card_list = list(range(num_cards))
    board = rng.choice(card_list, board_size, replace = False)
    card_list = np.array([
        card
        for card in card_list
        if card not in board
    ])
    return board, card_list

#takes a board, a deck, and draws up to the board size.
# returns a new board with added cards and the deck with those
#cards removed.
def draw_cards(board, deck, rng, board_size = 12):
    num_cards_to_draw = board_size - len(board)
    if len(board) + len(deck) < board_size:
        print(f'{num_cards_to_draw} cards are needed to complete the board, but only {len(card_list)} remain in the deck.' )
        return None
    else:
        drawn_cards = rng.choice(deck, num_cards_to_draw, replace = False)
        new_deck = list(deck)
        for card in drawn_cards:
            new_deck.remove(card)
        new_deck = np.array(new_deck)
        new_board = np.concatenate([board, drawn_cards])
        return new_board, new_deck

In [172]:
board

array([ 6,  4, 71, 69, 13, 61, 53,  5, 49, 45, 17, 52])

In [173]:
draw_cards(board[:6],deck,rng)

(array([ 6,  4, 71, 69, 13, 61, 34, 23, 12, 31, 77, 50]),
 array([ 0,  1,  2,  3,  7,  8,  9, 10, 11, 14, 15, 16, 18, 19, 20, 21, 22,
        24, 25, 26, 27, 28, 29, 30, 32, 33, 35, 36, 37, 38, 39, 40, 41, 42,
        43, 44, 46, 47, 48, 51, 54, 55, 56, 57, 58, 59, 60, 62, 63, 64, 65,
        66, 67, 68, 70, 72, 73, 74, 75, 76, 78, 79, 80]))

In [137]:
board, deck = initialize_board(81,12,np.random.default_rng(2022))
board, deck

(array([ 6,  4, 71, 69, 13, 61, 53,  5, 49, 45, 17, 52]),
 array([ 0,  1,  2,  3,  7,  8,  9, 10, 11, 12, 14, 15, 16, 18, 19, 20, 21,
        22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
        39, 40, 41, 42, 43, 44, 46, 47, 48, 50, 51, 54, 55, 56, 57, 58, 59,
        60, 62, 63, 64, 65, 66, 67, 68, 70, 72, 73, 74, 75, 76, 77, 78, 79,
        80]))

In [174]:
#takes an iterable of cards by index and returns a flat array of the card bits
def board_to_input(card_indices):
    model_input = bit_cards.loc[card_indices].astype(int).values.flatten()
    return model_input

In [176]:
board

array([ 6,  4, 71, 69, 13, 61, 53,  5, 49, 45, 17, 52])

In [175]:
sample_input = board_to_input(board)
sample_input

array([1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0,
       0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1,
       0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 1])

In [143]:
len(sample_input)

96

In [146]:
tf.convert_to_tensor(board_to_input(board))

<tf.Tensor: shape=(96,), dtype=int32, numpy=
array([1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0,
       0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1,
       0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 1])>

In [69]:
rng_seed = 2022
rng = np.random.default_rng(rng_seed)


gamma = 0.995 #discount factor for previous rewards
rounds_per_game = 20

num_inputs = 8*12 #12 cards at a time, 8 bits per card
num_hidden = 256
num_picks = 12 #cards per round

inputs = layers.Input(shape = (num_inputs,))
common = layers.Dense(num_hidden, activation = 'relu')(inputs)
picks = layers.Dense(num_picks, activation = 'relu')(common)
critic = layers.Dense(1)(common)

model = keras.Model(inputs = inputs, outputs = [picks, critic])

#make optimizer
optimizer = keras.optimizers.Adam(learning_rate = 0.01)
#loss function
loss = sum

action_probs_history = []
critic_value_history = []
rewards_history = []
running_reward = 0
round_count = 0

#get 0 and 1 values for the entire deck
card_values = bit_cards.astype(int).values

#play a game
#create a list of unused cards
board, deck = initialize_board(81, 12)
this_round = 0
for this_round in range(rounds_per_game):
        board