In [11]:
from utils import get_info_state, print_obs
import os, pickle
from datetime import datetime
import tensorflow as tf
import clubs
import clubs_gym
import gym
from jupyter_nb_utils import cumulative_reward, plot_results, live_plot
from poker_agent import MinRaiseAgent, Bet_Fct, TensorflowAgent
from random import shuffle
import numpy as np
import time
from IPython.display import clear_output
from tqdm import trange
from deep_CFR_model import get_DeepCFR_model
from training_utils import get_tf_dataset

%matplotlib inline

[INFO] - Models trained on GPU, with memory growth activated.


In [None]:
from utils import activate_memory_growth; activate_memory_growth(cpu=False)


In [10]:
model = get_DeepCFR_model(256, [2, 3], 10, 4)

In [12]:
file_name = 'tmp/advantage_memory_0.h5'

In [13]:
train_ds = get_tf_dataset(file_name,
                          batch_size=10_000,
                          num_infostates=100_000,
                          num_cards=5,
                          num_bets=10,
                          num_actions=4)

In [14]:
opt = tf.keras.optimizers.Adam(0.001)
model.compile(optimizer=opt)

In [16]:
history = model.fit(train_ds.take(10))

      6/Unknown - 28s 5s/step - loss: 7.3183

KeyboardInterrupt: 

In [2]:
render_mode = 'Terminal'
save_plot = True
eval_strategy_net = False

## Parameter settings for the Agents

In [3]:
agent_fct = MinRaiseAgent

In [4]:
random_model_path = 'trained_models/action_models/random_model'
agent_fct = TensorflowAgent
action_function = action_fct('action')

## Parameter settings for the poker (clubs_gym) environment

In [5]:
# Set game parameters
env_str = 'LDRL-Poker-v0'
num_players = 2
num_streets = 2
num_raises = 3
num_actions = 4
num_cards = [2, 3]
num_suits = 4
num_ranks = 13

# automatic setting of some params
n_community_cards = [0] + num_cards[1:]
n_cards_for_hand = min(5, sum(num_cards))
max_bet_number = num_players * num_streets * num_raises

# environment params dict
config_dict = {'num_players': num_players,
               'num_streets': num_streets,
               'blinds': [1, 2],
               'antes': 0,
               'raise_sizes': 'pot',
               'num_raises': num_raises,
               'num_suits': num_suits,
               'num_ranks': num_ranks,
               'num_hole_cards': num_cards[0],
               'mandatory_num_hole_cards': 0,
               'num_community_cards': n_community_cards,
               'start_stack': 1_000_000,
               'num_cards_for_hand': n_cards_for_hand}

## Initialization of the Poker environment

In [6]:
clubs_gym.envs.register({env_str: config_dict})
env = gym.make(env_str)
# env.register_agents([agent_fct() for _ in range(2)])
env.register_agents([agent_fct(model_save_path) for model_save_path in [random_model_path, random_model_path]])

OSError: SavedModel file does not exist at: trained_models/action_models/random_model/{saved_model.pbtxt|saved_model.pb}

# The Game traversal

In [None]:
# data logs
reward_history = None
action_history_p0 = []
action_history_p1 = []
preflop_history_p0 = []
preflop_history_p1 = []

In [None]:
sleep_time = 1
max_action = num_actions

if render_mode == 'WebBrowser':
    env.reset()
    env.render()

# if render_mode is None:
#     how_many_games = input(
#         'Type in how many games should be used for ecaluation and hit enter to start. Games: ')
# elif render_mode == 'WebBrowser':
#     how_many_games = input(
#         'Open the rendered Game (link above), input how many games you wanna see and hit Enter to start. Games: ')
# elif render_mode == 'Terminal':
#     how_many_games = input(
#         'How many games do you wanna see? Hit enter to start. Games: ')
# else:
#     print(f'Render mode {render_mode}, not supported.')
#     raise

how_many_games = 500
round_counter = []

for i in range(int(how_many_games)):
    counter = 1
    obs = env.reset()
    
    # display stuff
    if render_mode is None:
        pass
    elif render_mode == 'WebBrowser':
        env.render()
        time.sleep(sleep_time)
    else:
        print(f'\n====================== Game {i} ======================\n')
        print(f'Round: {counter}')
        print_obs(obs, num_suits)

    history = []
    while True:
        counter += 1
        if render_mode == 'WebBrowser':
            time.sleep(sleep_time)   
        
        # non terminal-state
        if all(obs['active']) and not obs['action'] == -1:
            # 1.
            # agent chooses action based on info_state
            info_state = get_info_state(
                obs, history, max_bet_number, env.dealer.num_streets, config_dict)
            action_idx = env.act(info_state)
            
            # save bets for plots
            if obs['action'] == 0:
                action_history_p0.append(action_idx)
                if not obs['community_cards']:
                    preflop_history_p0.append(action_idx)
            else:
                action_history_p1.append(action_idx)
                if not obs['community_cards']:
                    preflop_history_p1.append(action_idx)

            # 2.
            # take action within environment
            bet = action_function(action_idx, obs)
            obs, rewards, done, _ = env.step(bet)
            history.append(bet)  # for info states

            # display stuff
            if render_mode is None:
                pass
            elif render_mode == 'WebBrowser':
                env.render()
            else:
                print(f'Action: {action_idx}')
                print(f'Bet: {bet}', end='\n\n')
                print('----------------------------------------------------\n')
                print(f'Round: {counter}')
                print_obs(obs, num_suits)

        # terminal state
        else:
            # save results
            if reward_history is None:
                reward_history = np.array([rewards])
            else:
                reward_history = np.append(reward_history, [rewards], axis=0)

            # display stuff
            if render_mode is None:
                if i == int(how_many_games)-1:
                    print(f'[INFO] - Done.')
            elif render_mode == 'WebBrowser':
                live_plot(reward_history, i)
            else:
                print(f'Payoffs: Player_0 {rewards[0]}, Player_1 {rewards[1]}')

            break
        
    round_counter.append(counter)
        
print(max(round_counter))

# Evaluate the Poker Game

Player 0 is the player (agent) that has the trained model.

In [None]:
if save_plot == True:
    fn = 'game_results.svg'
save_path = os.path.join(results_dir, fn)

# create plot
plot_results(reward_history, num_actions, action_history_p0,
             action_history_p1, preflop_history_p0, preflop_history_p1, save_path)

In [2]:
a  = [345, 34, 0, -23, -342]

a / np.std(a)

array([ 1.58251953,  0.15595845,  0.        , -0.1055013 , -1.56875849])

In [None]:
import h5py
import random
file_name = 'memories_action_2-Model/strategy_memory.h5'

with h5py.File(file_name,"r") as hf:
    
    l = hf.get("counter")[1]
    
    for i in range(l):
        stored_vector = np.array(hf.get("data")[i])
        
        if np.all(np.isfinite(stored_vector)):
            if not i % (l//100):
                print(f'file {i} of {l}')
        else:
            print(f'idx {i}')
            print(stored_vector)

file 0 of 562567
file 5625 of 562567
file 11250 of 562567
file 16875 of 562567
file 22500 of 562567
file 28125 of 562567
file 33750 of 562567
file 39375 of 562567
file 45000 of 562567
file 50625 of 562567
file 56250 of 562567
file 61875 of 562567
file 67500 of 562567
file 73125 of 562567
file 78750 of 562567
file 84375 of 562567
file 90000 of 562567
file 95625 of 562567
file 101250 of 562567
file 106875 of 562567
file 112500 of 562567
file 118125 of 562567
file 123750 of 562567
file 129375 of 562567
file 135000 of 562567
file 140625 of 562567
file 146250 of 562567
file 151875 of 562567
file 157500 of 562567
file 163125 of 562567
file 168750 of 562567
file 174375 of 562567
file 180000 of 562567
file 185625 of 562567
file 191250 of 562567
file 196875 of 562567
file 202500 of 562567
file 208125 of 562567
file 213750 of 562567
file 219375 of 562567
file 225000 of 562567
file 230625 of 562567
file 236250 of 562567
file 241875 of 562567
file 247500 of 562567
file 253125 of 562567
file 258750