In [1]:
import tensorflow as tf
import h5py
import numpy as np
import random

In [2]:
def get_embedding_model(output_dim, num_cards):
    
    input_dim_rank = 13
    input_dim_suit = 4
    input_dim_card = 52

    cards_input = tf.keras.Input((num_cards,))

    #### EMBEDDING MODEL (used for each group of cards)
    
    rank_embedding = tf.keras.layers.Embedding(
        input_dim_rank, output_dim, embeddings_initializer='uniform',
        embeddings_regularizer=None, activity_regularizer=None,
        embeddings_constraint=None, mask_zero=False, input_length=None,
    )

    suit_embedding = tf.keras.layers.Embedding(
        input_dim_suit, output_dim, embeddings_initializer='uniform',
        embeddings_regularizer=None, activity_regularizer=None,
        embeddings_constraint=None, mask_zero=False, input_length=None,
    )

    card_embedding = tf.keras.layers.Embedding(
        input_dim_card, output_dim, embeddings_initializer='uniform',
        embeddings_regularizer=None, activity_regularizer=None,
        embeddings_constraint=None, mask_zero=False, input_length=None,
    )

    # cards is a list of card indices (2 for preflop, 3 for flop, 1 for turn, 1 for river)

    #x = tf.keras.layers.Flatten()(cards_input)
    x = cards_input
    valid = tf.cast(x >= tf.constant(0.), tf.float32)

    x = tf.clip_by_value(x, clip_value_min = 0, clip_value_max = 1e6)
    
    embs = card_embedding(x) + rank_embedding(x // 4) + suit_embedding(x%4)
    
    embs = embs * tf.expand_dims(valid, axis=-1)
        
    embs = tf.reduce_sum(embs , axis=1) # sum over num_cards card embeddings
    
    model = tf.keras.Model(cards_input, embs)
    
    return model

#card1 = tf.keras.layers.Dense(output_dim)

In [3]:
model = get_embedding_model(256, 3)

In [61]:
cards_input = tf.constant([[1,10,3], 
                           #[[1],[10],[3]], 
                           #[[1],[10],[3]], 
                           #[[1],[10],[3]]
                          ],dtype=tf.float32)

In [62]:
cards_input.shape

TensorShape([1, 3])

In [63]:
model(cards_input)

<tf.Tensor: shape=(1, 256), dtype=float32, numpy=
array([[-1.18423045e-01,  1.26143247e-02,  2.32719574e-02,
         1.09996662e-01,  9.07058716e-02,  4.27301191e-02,
        -7.79047459e-02, -1.15122154e-01,  1.08759120e-01,
        -1.34283260e-01, -4.10801768e-02,  1.74363822e-01,
        -1.92203037e-02,  1.03308372e-01,  6.82244003e-02,
        -1.47374958e-01, -6.16319329e-02,  8.32098201e-02,
         2.48851016e-01,  3.64313200e-02,  2.13759989e-02,
         6.92469031e-02,  8.32180828e-02,  1.48655456e-02,
         4.25132886e-02, -3.21364887e-02, -2.21473277e-02,
         2.27494970e-01,  1.30238757e-02, -8.74953642e-02,
         2.63550609e-01,  5.74711040e-02,  1.09141007e-01,
        -2.11192071e-02,  1.62717760e-01,  1.10640056e-01,
         6.56881258e-02,  3.80837545e-02,  3.08267772e-05,
         1.67228103e-01, -5.75805753e-02, -1.69220358e-01,
         1.84688754e-02, -5.91592789e-02,  1.29773840e-02,
         4.65814248e-02,  7.06939921e-02,  8.36270750e-02,
      

In [4]:
loss_tracker = tf.keras.metrics.Mean(name="loss")
class CustomModel(tf.keras.Model):

    def train_step(self, data):
        hole_cards, bets, iterations, targets = data
        with tf.GradientTape() as tape:
            predictions = self([[hole_cards],bets])
            
            loss = tf.reduce_mean(iterations+1 * tf.reduce_sum((targets - predictions)**2, axis = -1), axis=None)
            
        gradients = tape.gradient(loss, self.trainable_variables)
        
        
        gradients = [tf.clip_by_norm(g, 1.0)
             for g in gradients]


        # Applying the gradients on the model using the specified optimizer
        self.optimizer.apply_gradients(
            zip(gradients, self.trainable_variables)
        )

        # Let's update and return the training loss metric.
        loss_tracker.update_state(loss)
        return {"loss": loss_tracker.result()}

    @property
    def metrics(self):
        # We need to list our metrics here so the `reset_states()` can be
        # called automatically.
        return [loss_tracker]

In [5]:
def get_DeepCFR_model(output_dim, n_cards, n_bets, n_actions):
    """
    output_dim: dimensionality of embedding
    n_cards: a list of card numbers for each phase of the game (e.g. 2 preflop, 3 flop)
    n_bets: maximal number of bets in a game
    n_actions: number of possible action categories
    """
    
    # define inputs
    
    #info_state = ... (stored_vector)
    
    #hole_cards = [info_state[:2]]
    # ...
    
    
    cards = [tf.keras.Input([n,], name = f"cards{i}") for i,n in enumerate(n_cards)]
    
    
    bets = tf.keras.Input([n_bets], name = "bets")
    
    #shape = [n for n in n_cards]
    #inp = tf.keras.Input((2,None,None,None),ragged=True)   #
    
    #cards = inp[0]
    #bets = inp[1]
    
    ### define layers

    # embedding layer for each card type (pre-flop, flop, turn, river)
    output_dims = [output_dim for _ in range(len(n_cards))]
    
    embedding_layers = [get_embedding_model(output_dim, num_cards) for num_cards, 
                        num_output_dims in zip(n_cards, output_dims)]

    card1 = tf.keras.layers.Dense(output_dim, activation = "relu")
    card2 = tf.keras.layers.Dense(output_dim, activation = "relu")
    card3 = tf.keras.layers.Dense(output_dim, activation = "relu")

    bet1 = tf.keras.layers.Dense(output_dim)
    bet2 = tf.keras.layers.Dense(output_dim)

    comb1 = tf.keras.layers.Dense(output_dim)
    comb2 = tf.keras.layers.Dense(output_dim)
    comb3 = tf.keras.layers.Dense(output_dim)

    action_head = tf.keras.layers.Dense(n_actions)


    # card branch
    card_embs = []
    for embedding, card_group in zip(embedding_layers, cards):
        card_embs.append(embedding(card_group))

    card_embs = tf.concat(card_embs, axis= 1)

    x = card1(card_embs)
    x = card2(x)
    x = card3(x)

    # bet branch
    bet_size = tf.clip_by_value(bets, tf.constant(0.), tf.constant(1e6)) # clip bet sizes
    bets_occured = tf.cast(bets >= tf.constant(0.), tf.float32) # check if bet occured
    bet_features = tf.concat([bet_size, bets_occured], axis = -1)   # bet size and boolean bet
    y = bet1(bet_features)
    y = bet2(y)
    
    # combine bet history and card embedding branches
    z = tf.concat([x,y],axis=-1)
    z = tf.nn.relu(comb1(z))
    z = tf.nn.relu(comb2(z) + z)
    z = tf.nn.relu(comb3(z) + z)

    # normalize (needed because of bet sizes)
    z = (z - tf.math.reduce_mean(z, axis=None)) / tf.math.reduce_std(z, axis=None)

    output = action_head(z)

    
    DeepCFR_model = CustomModel(inputs = [cards, bets], outputs = output)
    
    return DeepCFR_model

In [6]:
DeepCFR_model = get_DeepCFR_model(output_dim = 512, n_cards = [2,3], n_bets = 4, n_actions = 3)

In [67]:
### Test Deep CFR Model with batch size of 4



hole_cards = tf.constant( [   
    [1, 10],                            
    #[[7],[19]],                          
    #[[35],[51]],                         
    #[[23],[12]]                           
],dtype=tf.float32)


flop = tf.constant([
    [2, 9, 8],            
    #[[2],[9],[8]],         
    #[[2],[9],[8]],           
    #[[2],[9],[8]]
], dtype = tf.float32)

cards_inp = [hole_cards, flop]

bets = tf.constant([
    [1,2,2,4],                  
    #[1,0,2,4],                
    #[1,0,2,4],                
    #[1,0,2,4]
], dtype= tf.float32)


DeepCFR_model([cards_inp, bets]).numpy()

array([[-1.4183457, -1.4132977, -0.2892659]], dtype=float32)

# Save random training examples with a Memory Object to a hdf5 file



### What to do:

* Each memory (advantage_memory_player0, advantage_memory_player1 and strategy_memory) should have its own MemoryWriter object with a different file_name set. This is 

In [12]:
import numpy as np
import tensorflow as tf
import h5py
import numpy as np
import random


def generate_random_sample():
    """
    Generates data in a form the network can process
    """
    
    hole_cards = np.random.randint(0,51,(1,2,1))

    flop_cards = np.random.randint(0,51,(1,3,1))

    bet_history = np.random.randint(0,3,(1,12))


    info_state = [[hole_cards, flop_cards], bet_history] # this is the form in which the model takes its input

    CFR_iteration = np.random.randint(1,40000000)

    action_advantages = np.random.random((1,3)) + np.random.randint(-3,3,(1,3)) # target for the network's output
    
    return info_state, CFR_iteration, action_advantages # input arguments to save_to_memory

info_state, iteration, values = generate_random_sample()

In [13]:
def flatten_data_for_memory(info_state, iteration, values):
    """
    Flattens data to store into a memory object.
    """
    flattened_data = np.concatenate(
                    [
                        info_state[0][0].flatten(),
                        info_state[1].flatten(),
                        np.array([iteration]),
                        values.flatten() 
                    ],  axis = 0)
    
    return flattened_data


class MemoryWriter(object):
    
    """
    Keeps track of how many items are already processed for this memory. 
    It's main purpose is to store the generated data with the save_to_memory method
    """
    
    
    def __init__(self, max_size, vector_length, flatten_func, file_name):
        
        self.max_size = max_size
        
        self.vector_len = vector_length # flatten the input that we want to store and take len
        
        self.flatten_func = flatten_func
            
        self.counter = np.array([0,0]) # can't assign a single number to a dataset in hdf5 files
        
        self.file_name = file_name
        
        # load previous counter or initiate memory file
        try:
            with h5py.File(self.file_name, "r") as hf:
                self.counter = np.array(hf.get("counter"))
            print("previous counter loaded")
                
        except:
            # create new dataset file with a counter and an array
            with h5py.File(self.file_name, "w") as hf:
                
                hf.create_dataset("counter", data= self.counter)
                hf.create_dataset("data", (self.max_size, self.vector_len), dtype = np.float32)
            print(f"new counter set and dataset of size {(self.max_size, self.vector_len)} is initiated.")
        
    def save_to_memory(self, data):
        """
        Takes a list of tuples (info_state, iteration, values) and stores each to the memory hdf5 file.
        
        Uses Reservoir sampling for samples that exceed the specified max_size.
        """
        with h5py.File(self.file_name, 'r+') as hf:
            
            # store each tuple in data
            for info_state, iteration, values in data:
                
                self.counter[1] += 1
                
                # save counter only every 100 steps
                if not self.counter[1]%100:
                    hf.get("counter")[1] = self.counter[1]
                
                # if reservoir is not yet full, simply add new sample
                if self.counter[1] < self.max_size:

                    flattened_data = self.flatten_func(info_state, iteration, values)

                    hf.get("data")[self.counter[1]] = flattened_data # fill empty row with data

                # if reservoir is full already, randomly replace or not replace old data
                else:
                    idx = random.randint(0, self.counter[1]) # index to replace (or not)
                    if idx < self.max_size:

                        flattened_data = self.flatten_func(info_state, iteration, values)

                        hf.get("data")[idx] =  flattened_data # replace the old data at idx with the new sample

                    else:
                        pass # data is not stored in favor of old data

In [14]:
m = MemoryWriter(max_size = 1000, vector_length = 18, flatten_func = flatten_data_for_memory,
                 file_name = "value_memory_p1.h5")

new counter set and dataset of size (1000, 18) is initiated.


In [192]:
data = [generate_random_sample() for _ in range(10000)]

m.save_to_memory(data)

In [211]:
with h5py.File("advantage_memory.h5","r+") as hf:
    print(np.array(hf.get("counter"))[1])

16900


# Write Code to Train the Model on data in advantage_memory.h5 and strategy_memory.h5

In [13]:
with h5py.File("advantage_memory.h5","r") as hf:
    print(np.array(hf.get("counter"))[1])
    
    stored_vector = np.array(hf.get("data")[12])

40000


In [13]:
def get_tf_dataset(file_name, batch_size, num_infostates, game_type, num_bets, num_actions):
    """
    Creates a tensorflow dataset from a .h5 file
    """
    
    
    def get_input_targets(stored_vector, game_type):
        
        
        
        if game_type == "hole_cards only":
            #indices = [2,8,9]
            indices = [2, 2+num_bets, 2+num_bets+1]
            hole_cards = stored_vector[:indices[0]]
            bets = stored_vector[indices[0]:indices[1]]
            iteration = stored_vector[indices[1]]
            values = stored_vector[indices[-1]:]
            
            return tf.constant(hole_cards), tf.constant(bets), tf.constant(iteration), tf.constant(values)
        
        elif game_type == "hole_cards + flop":
            indices = [2, 6, 6+num_bets, 6+num_bets+1]
            
            hole_cards = stored_vector[:indices[0]]
            flop_cards = stored_vector[indices[0]:indices[1]]
            bets = stored_vector[indices[1]:indices[2]]
            iteration = stored_vector[indices[2]]
            values = stored_vector[indices[-1]:]
            
            return tf.constant(hole_cards), tf.constant(flop_cards), tf.constant(bets), tf.constant(iteration), tf.constant(values)


        #return network_input, targets, iteration    


    

    def memory_generator():
        with h5py.File(file_name,"r") as hf:

            while True:
                #np.array(hf.get("counter"))[1])
                idx = random.randint(1,num_infostates)
                stored_vector = np.array(hf.get("data")[idx])

                yield get_input_targets(stored_vector, game_type)

    
    if game_type == "hole_cards only":
        out_signature = (tf.TensorSpec(shape=(2,), dtype=tf.float32),
                                       tf.TensorSpec(shape=(num_bets,), dtype=tf.float32),
                                       tf.TensorSpec(shape=(), dtype=tf.float32),
                                       tf.TensorSpec(shape=(num_actions,), dtype=tf.float32))
        
    elif game_type == "hole_cards + flop":
        out_signature = (tf.TensorSpec(shape=(2,), dtype=tf.float32),
                         tf.TensorSpec(shape=(3,), dtype=tf.float32),
                                       tf.TensorSpec(shape=(num_bets,), dtype=tf.float32),
                                       tf.TensorSpec(shape=(), dtype=tf.float32),
                                       tf.TensorSpec(shape=(num_actions,), dtype=tf.float32))

    train_ds = tf.data.Dataset.from_generator(memory_generator, 
                                   output_signature= out_signature
    ).batch(batch_size).prefetch(tf.data.AUTOTUNE)
    
    return train_ds

In [14]:
train_ds = get_tf_dataset("advantage_memory.h5", 32, 40_000, "hole_cards only", 6, 5)

In [15]:
class DeepCFR_Model(tf.keras.Model):
    
    def __init__(self):
        super(DeepCFR_Model,self).__init__()
        self.CFR_model = get_DeepCFR_model(output_dim = 256, n_cards = [2], n_bets = 6, n_actions = 5)
        self.loss_tracker = tf.keras.metrics.Mean(name="loss")
        
    def call(self, inputs):
        return self.CFR_model(inputs)
    
    def train_step(self, data):

        with tf.GradientTape() as tape:
            loss = self._compute_loss(data)

        
        gradients = tape.gradient(loss, self.CFR_model.trainable_weights)

        # Applying the gradients on the model using the specified optimizer
        self.optimizer.apply_gradients(
            zip(gradients, self.CFR_model.trainable_weights)
        )

        # Let's update and return the training loss metric.
        self.loss_tracker.update_state(loss)
        return {"loss": self.loss_tracker.result()}

    def test_step(self, data):
        loss = self._compute_loss(data)

        # Let's update and return the loss metric.
        self.loss_tracker.update_state(loss)
        return {"loss": self.loss_tracker.result()}

    def _compute_loss(self, data):
        
        hole_cards, bets, iterations, targets = data
        
        predictions = self.CFR_model([[hole_cards],bets])
        
        return tf.reduce_mean(iterations+1 * tf.reduce_sum((targets - predictions)**2, axis = -1), axis=None)

    @property
    def metrics(self):
        # We need to list our metrics here so the `reset_states()` can be
        # called automatically.
        return [self.loss_tracker]

In [16]:
model = get_DeepCFR_model(output_dim = 256, n_cards = [2], n_bets = 6, n_actions = 5)

In [17]:
optimizer = "adam"
loss = None
model.compile(optimizer = "adam")

In [27]:
b = model.get_weights()

In [28]:
model.set_weights(b)

In [18]:
model.fit(train_ds)

 194164/Unknown - 3305s 17ms/step - loss: 34.0950

KeyboardInterrupt: 

In [18]:
with h5py.File("advantage_memory.h5","r") as hf:
    array = np.array(hf.get("data"))

In [43]:
array[40006]

array([47.       , 33.       ,  2.       , -1.       , -1.       ,
       -1.       , -1.       , -1.       ,  0.       ,  0.8958747,
        0.8958747, -2.433532 , -1.1041254, -2.344612 ], dtype=float32)

In [24]:
#tf.keras.utils.plot_model(DeepCFR_model, "plotmodel.png")