In [91]:
import tensorflow as tf
import h5py
import numpy as np
import random

In [2]:
def get_embedding_model(output_dim, num_cards):
    
    input_dim_rank = 13
    input_dim_suit = 4
    input_dim_card = 52

    cards_input = tf.keras.Input((num_cards,))

    #### EMBEDDING MODEL (used for each group of cards)
    
    rank_embedding = tf.keras.layers.Embedding(
        input_dim_rank, output_dim, embeddings_initializer='uniform',
        embeddings_regularizer=None, activity_regularizer=None,
        embeddings_constraint=None, mask_zero=False, input_length=None,
    )

    suit_embedding = tf.keras.layers.Embedding(
        input_dim_suit, output_dim, embeddings_initializer='uniform',
        embeddings_regularizer=None, activity_regularizer=None,
        embeddings_constraint=None, mask_zero=False, input_length=None,
    )

    card_embedding = tf.keras.layers.Embedding(
        input_dim_card, output_dim, embeddings_initializer='uniform',
        embeddings_regularizer=None, activity_regularizer=None,
        embeddings_constraint=None, mask_zero=False, input_length=None,
    )

    # cards is a list of card indices (2 for preflop, 3 for flop, 1 for turn, 1 for river)

    x = tf.keras.layers.Flatten()(cards_input)

    valid = tf.cast(x >= tf.constant(0.), tf.float32)

    x = tf.clip_by_value(x, clip_value_min = 0, clip_value_max = 1e6)
    
    embs = card_embedding(x) + rank_embedding(x // 4) + suit_embedding(x%4)
    
    embs = embs * tf.expand_dims(valid, axis=-1)
        
    embs = tf.reduce_sum(embs , axis=1) # sum over num_cards card embeddings
    
    model = tf.keras.Model(cards_input, embs)
    
    return model

#card1 = tf.keras.layers.Dense(output_dim)

In [3]:
model = get_embedding_model(256, 3)

In [4]:
cards_input = tf.constant([[[1],[10],[3]], 
                           [[1],[10],[3]], 
                           [[1],[10],[3]], 
                           [[1],[10],[3]]],dtype=tf.float32)

In [5]:
cards_input.shape

TensorShape([4, 3, 1])

In [6]:
model(cards_input)

<tf.Tensor: shape=(4, 256), dtype=float32, numpy=
array([[ 0.14479068, -0.13707975,  0.17693862, ..., -0.11987062,
         0.05106021,  0.0828777 ],
       [ 0.14479068, -0.13707975,  0.17693862, ..., -0.11987062,
         0.05106021,  0.0828777 ],
       [ 0.14479068, -0.13707975,  0.17693862, ..., -0.11987062,
         0.05106021,  0.0828777 ],
       [ 0.14479068, -0.13707975,  0.17693862, ..., -0.11987062,
         0.05106021,  0.0828777 ]], dtype=float32)>

In [7]:
def get_DeepCFR_model(output_dim, n_cards, n_bets, n_actions):
    """
    output_dim: dimensionality of embedding
    n_cards: a list of card numbers for each phase of the game (e.g. 2 preflop, 3 flop)
    n_bets: maximal number of bets in a game
    n_actions: number of possible action categories
    """
    
    # define inputs
    cards = [tf.keras.Input([n,]) for n in n_cards]
    bets = tf.keras.Input([n_bets,])

    ### define layers

    # embedding layer for each card type (pre-flop, flop, turn, river)
    output_dims = [output_dim for _ in range(len(n_cards))]
    
    embedding_layers = [get_embedding_model(output_dim, num_cards) for num_cards, 
                        num_output_dims in zip(n_cards, output_dims)]

    card1 = tf.keras.layers.Dense(output_dim, activation = "relu")
    card2 = tf.keras.layers.Dense(output_dim, activation = "relu")
    card3 = tf.keras.layers.Dense(output_dim, activation = "relu")

    bet1 = tf.keras.layers.Dense(output_dim)
    bet2 = tf.keras.layers.Dense(output_dim)

    comb1 = tf.keras.layers.Dense(output_dim)
    comb2 = tf.keras.layers.Dense(output_dim)
    comb3 = tf.keras.layers.Dense(output_dim)

    action_head = tf.keras.layers.Dense(n_actions)


    # card branch
    card_embs = []
    for embedding, card_group in zip(embedding_layers, cards):
        card_embs.append(embedding(card_group))

    card_embs = tf.concat(card_embs, axis= 1)

    x = card1(card_embs)
    x = card2(x)
    x = card3(x)

    # bet branch
    bet_size = tf.clip_by_value(bets, tf.constant(0.), tf.constant(1e6)) # clip bet sizes
    bets_occured = tf.cast(bets >= tf.constant(0.), tf.float32) # check if bet occured
    bet_features = tf.concat([bet_size, bets_occured], axis = -1)   # bet size and boolean bet
    y = bet1(bet_features)
    y = bet2(y)
    
    # combine bet history and card embedding branches
    z = tf.concat([x,y],axis=-1)
    z = comb1(z)
    z = tf.nn.relu(comb2(z) + z)
    z = tf.nn.relu(comb3(z) + z)

    # normalize (needed because of bet sizes)
    z = (z - tf.math.reduce_mean(z, axis=None)) / tf.math.reduce_std(z, axis=None)

    output = action_head(z)


    DeepCFR_model = tf.keras.Model(inputs = [cards, bets], outputs = output)
    
    return DeepCFR_model

In [8]:
DeepCFR_model = get_DeepCFR_model(output_dim = 256, n_cards = [2,3], n_bets = 4, n_actions = 3)

In [9]:
### Test Deep CFR Model with batch size of 4



hole_cards = tf.constant( [   
    [[1],[10]],                            
    #[[7],[19]],                          
    #[[35],[51]],                         
    #[[23],[12]]                           
],dtype=tf.float32)


flop = tf.constant([
    [[2],[9],[8]],            
    #[[2],[9],[8]],         
    #[[2],[9],[8]],           
    #[[2],[9],[8]]
], dtype = tf.float32)

cards_inp = [hole_cards, flop]

bets = tf.constant([
    [1,2,2,4],                  
    #[1,0,2,4],                
    #[1,0,2,4],                
    #[1,0,2,4]
], dtype= tf.float32)


DeepCFR_model([cards_inp, bets]).numpy()

array([[-0.5071793 ,  0.24127457, -0.6368238 ]], dtype=float32)

# Save random training examples with a Memory Object to a hdf5 file



### What to do:

* Each memory (advantage_memory_player0, advantage_memory_player1 and strategy_memory) should have its own MemoryWriter object with a different file_name set. This is 

In [10]:
import numpy as np
import tensorflow as tf
import h5py
import numpy as np
import random


def generate_random_sample():
    """
    Generates data in a form the network can process
    """
    
    hole_cards = np.random.randint(0,51,(1,2,1))

    flop_cards = np.random.randint(0,51,(1,3,1))

    bet_history = np.random.randint(0,3,(1,12))


    info_state = [[hole_cards, flop_cards], bet_history] # this is the form in which the model takes its input

    CFR_iteration = np.random.randint(1,40000000)

    action_advantages = np.random.random((1,3)) + np.random.randint(-3,3,(1,3)) # target for the network's output
    
    return info_state, CFR_iteration, action_advantages # input arguments to save_to_memory

info_state, iteration, values = generate_random_sample()

In [190]:
def flatten_data_for_memory(info_state, iteration, values):
    """
    Flattens data to store into a memory object.
    """
    flattened_data = np.concatenate(
                    [
                        info_state[0][0].flatten(),
                        info_state[1].flatten(),
                        np.array([iteration]),
                        values.flatten() 
                    ],  axis = 0)
    
    return flattened_data


class MemoryWriter(object):
    
    """
    Keeps track of how many items are already processed for this memory. 
    It's main purpose is to store the generated data with the save_to_memory method
    """
    
    
    def __init__(self, max_size, vector_length, flatten_func, file_name):
        
        self.max_size = max_size
        
        self.vector_len = vector_length # flatten the input that we want to store and take len
        
        self.flatten_func = flatten_func
            
        self.counter = np.array([0,0]) # can't assign a single number to a dataset in hdf5 files
        
        self.file_name = file_name
        
        # load previous counter or initiate memory file
        try:
            with h5py.File(self.file_name, "r") as hf:
                self.counter = np.array(hf.get("counter"))
            print("previous counter loaded")
                
        except:
            # create new dataset file with a counter and an array
            with h5py.File(self.file_name, "w") as hf:
                
                hf.create_dataset("counter", data= self.counter)
                hf.create_dataset("data", (self.max_size, self.vector_len), dtype = np.float32)
            print(f"new counter set and dataset of size {(self.max_size, self.vector_len)} is initiated.")
        
    def save_to_memory(self, data):
        """
        Takes a list of tuples (info_state, iteration, values) and stores each to the memory hdf5 file.
        
        Uses Reservoir sampling for samples that exceed the specified max_size.
        """
        with h5py.File(self.file_name, 'r+') as hf:
            
            # store each tuple in data
            for info_state, iteration, values in data:
                
                self.counter[1] += 1
                
                # save counter only every 100 steps
                if not self.counter[1]%100:
                    hf.get("counter")[1] = self.counter[1]
                
                # if reservoir is not yet full, simply add new sample
                if self.counter[1] < self.max_size:

                    flattened_data = self.flatten_func(info_state, iteration, values)

                    hf.get("data")[self.counter[1]] = flattened_data # fill empty row with data

                # if reservoir is full already, randomly replace or not replace old data
                else:
                    idx = random.randint(0, self.counter[1]) # index to replace (or not)
                    if idx < self.max_size:

                        flattened_data = self.flatten_func(info_state, iteration, values)

                        hf.get("data")[idx] =  flattened_data # replace the old data at idx with the new sample

                    else:
                        pass # data is not stored in favor of old data

In [191]:
m = MemoryWriter(max_size = 1000, vector_length = 18, flatten_func = flatten_data_for_memory,
                 file_name = "value_memory_p1.h5")

previous counter loaded


In [192]:
data = [generate_random_sample() for _ in range(10000)]

m.save_to_memory(data)

In [34]:
#tf.keras.utils.plot_model(DeepCFR_model, "plotmodel.png")