In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from pypokerengine.players import BasePokerPlayer
from pypokerengine.utils.card_utils import Card, Deck
from pypokerengine.api.game import setup_config, start_poker

import pickle
import tensorflow as tf
import random
import os
import scipy
import scipy.signal

import sys
sys.path.insert(0, '../scripts/')

import PlayerModels as pm
from MyEmulator import MyEmulator
# from DQNPlayer import DQNPlayer
from util import *

import threading
import multiprocessing

from random import choice
from time import sleep
from time import time

## Util

In [2]:
# Copies one set of variables to another.
# Used to set worker network parameters to those of global network.
def update_target_graph(from_scope,to_scope):
    from_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, from_scope)
    to_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, to_scope)

    op_holder = []
    for from_var,to_var in zip(from_vars,to_vars):
        op_holder.append(to_var.assign(from_var))
    return op_holder

# Discounting function used to calculate discounted returns.
def discount(x, gamma):
    return scipy.signal.lfilter([1], [1, -gamma], x[::-1], axis=0)[::-1]

#Used to initialize weights for policy and value output layers
def normalized_columns_initializer(std=1.0):
    def _initializer(shape, dtype=None, partition_info=None):
        out = np.random.randn(*shape).astype(np.float32)
        out *= std / np.sqrt(np.square(out).sum(axis=0, keepdims=True))
        return tf.constant(out)
    return _initializer

## Network

In [3]:
class AC_Network():
    def __init__(self, a_size, scope, trainer, h_size=64, is_train=True):
        self.h_size = h_size
        self.is_train = is_train
        
        with tf.variable_scope(scope):
            #Input and visual encoding layers
            self.scalar_input = tf.placeholder(tf.float32, [None, 17 * 17 * 1])
            self.features_input = tf.placeholder(tf.float32, [None, 20])

            xavier_init = tf.contrib.layers.xavier_initializer()

            self.img_in = tf.reshape(self.scalar_input, [-1, 17, 17, 1])
            self.conv1 = tf.layers.conv2d(self.img_in, 32, 5, 2, activation=tf.nn.elu,
                                          kernel_initializer=xavier_init)
            self.conv2 = tf.layers.conv2d(self.conv1, 32, 3, activation=tf.nn.elu, kernel_initializer=xavier_init)
            self.conv3 = tf.layers.conv2d(self.conv2, self.h_size, 5, activation=tf.nn.elu,
                                          kernel_initializer=xavier_init)
            self.conv3_flat = tf.contrib.layers.flatten(self.conv3)
    #             self.conv3_flat = tf.layers.dropout(self.conv3_flat)

            self.d1 = tf.layers.dense(self.features_input, 32, activation=tf.nn.elu, kernel_initializer=xavier_init)
    #             self.d1 = tf.layers.dropout(self.d1)
            self.d2 = tf.layers.dense(self.d1, self.h_size, activation=tf.nn.elu, kernel_initializer=xavier_init)
    #             self.d2 = tf.layers.dropout(self.d2)

            self.merge = tf.concat([self.conv3_flat, self.d2], axis=1)
            self.d3 = tf.layers.dense(self.merge, self.h_size, activation=tf.nn.elu, kernel_initializer=xavier_init)
    #             self.d3 = tf.layers.dropout(self.d3)
            self.d4 = tf.layers.dense(self.d3, self.h_size, activation=tf.nn.elu, kernel_initializer=xavier_init)

            #Output layers for policy and value estimations
            self.policy = tf.layers.dense(self.d4, a_size,
                activation=tf.nn.softmax,
                kernel_initializer=normalized_columns_initializer(0.01))
            self.predict = tf.argmax(self.policy, 1)
            self.value = tf.layers.dense(self.d4, 1,
                activation=None,
                kernel_initializer=normalized_columns_initializer(1.0))

            #Only the worker network need ops for loss functions and gradient updating.
            if scope != 'global' and is_train:
                self.actions = tf.placeholder(shape=[None],dtype=tf.int32)
                self.actions_onehot = tf.one_hot(self.actions,a_size,dtype=tf.float32)
                self.target_v = tf.placeholder(shape=[None],dtype=tf.float32)
                self.advantages = tf.placeholder(shape=[None],dtype=tf.float32)

                self.responsible_outputs = tf.reduce_sum(self.policy * self.actions_onehot, [1])

                #Loss functions
                self.value_loss = 0.5 * tf.reduce_sum(tf.square(self.target_v - tf.reshape(self.value,[-1])))
                self.entropy = tf.reduce_sum(-self.policy * tf.log(tf.maximum(self.policy, 0.00001))\
                                             - (1 - self.policy)\
                                             * tf.log(1 - tf.maximum(0.00001, self.policy)))
                self.policy_loss = -tf.reduce_sum(tf.log(self.responsible_outputs)*self.advantages)
                self.loss = 0.5 * self.value_loss + self.policy_loss - self.entropy * 0.05

                #Get gradients from local network using local losses
                local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope)
                self.gradients = tf.gradients(self.loss,local_vars)
                self.var_norms = tf.global_norm(local_vars)
                grads,self.grad_norms = tf.clip_by_global_norm(self.gradients, 200.0)
    #                 self.grad_norms = tf.global_norm(grads)

                #Apply local gradients to global network
                global_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'global')
                self.apply_grads = trainer.apply_gradients(zip(grads,global_vars))

In [4]:
class A3CPlayer(BasePokerPlayer):
    '''
    A3C Player, bot wich use A3C architecture.

    Parametrs
    ---------
    h_size : shape of layer after conv part (also before double part too)
    '''
    def __init__(self, a_size, h_size=64, debug=False):
        self.h_size = h_size
        self.debug = debug
        
        with open('../cache/hole_card_estimation.pkl', 'rb') as f:
            self.hole_card_est = pickle.load(f)
            
        tf.reset_default_graph()
        self.net = AC_Network(a_size, 'train_0', tf.train.AdamOptimizer(), is_train=False)
        self.saver = tf.train.Saver()
            
        self.init = tf.global_variables_initializer()
        self.sess = tf.Session()
        self.sess.run(self.init)
        
        ckpt = tf.train.get_checkpoint_state('../cache/models/A3C/')
        self.saver.restore(self.sess, ckpt.model_checkpoint_path)
        

    def _print(self, *msg):
        if self.debug:
            print(msg)
        
    def declare_action(self, valid_actions, hole_card, round_state):
        street = round_state['street']
        bank = round_state['pot']['main']['amount']
        stack = [s['stack'] for s in round_state['seats'] if s['uuid'] == self.uuid][0]
        other_stacks = [s['stack'] for s in round_state['seats'] if s['uuid'] != self.uuid]
        dealer_btn = round_state['dealer_btn']
        small_blind_pos = round_state['small_blind_pos']
        big_blind_pos = round_state['big_blind_pos']
        next_player = round_state['next_player']
        round_count = round_state['round_count']
        estimation = self.hole_card_est[(hole_card[0], hole_card[1])]

        
        self.features = get_street(street)
        self.features.extend([bank, stack, dealer_btn, small_blind_pos, big_blind_pos, next_player, round_count])
        self.features.extend(other_stacks)
        self.features.append(estimation)
        
        img_state = img_from_state(hole_card, round_state)
        img_state = process_img(img_state)
        
        action_num = self.sess.run(self.net.predict, feed_dict={self.scalar_input: [img_state],
                                                                self.features_input: [self.features]})[0]
        self._print(qs)
        action, amount = get_action_by_num(action_num, valid_actions)                    
        
        return action, amount
        
    def receive_game_start_message(self, game_info):
        pass
    
    def receive_round_start_message(self, round_count, hole_card, seats):
        self._print(['Hole:', hole_card])        
        self.start_stack = [s['stack'] for s in seats if s['uuid'] == self.uuid][0]
        self._print(['Start stack:', self.start_stack])
        estimation = self.hole_card_est[(hole_card[0], hole_card[1])]
        self._print(['Estimation:', estimation])
    
    def receive_street_start_message(self, street, round_state):
        pass
            
    def receive_game_update_message(self, action, round_state):
        pass
    
    def receive_round_result_message(self, winners, hand_info, round_state):
        end_stack = [s['stack'] for s in round_state['seats'] if s['uuid'] == self.uuid][0]
        self._print(['End stack:', end_stack])

In [5]:
def init_emul(my_uuid_):
    global my_uuid
    my_uuid = my_uuid_

    emul.register_player("1", pm.CallPlayer())
    emul.register_player("2", pm.CallPlayer())
    emul.register_player("3", pm.FoldPlayer())
    emul.register_player("4", pm.FoldPlayer())
    emul.register_player("5", pm.HeuristicPlayer())
    emul.register_player("6", pm.HeuristicPlayer())
    emul.register_player("7", pm.RandomPlayer())
    emul.register_player("8", pm.RandomPlayer())
    emul.register_player("9", pm.CallPlayer())


    players_info = {
        "1": { "name": "CallPlayer1", "stack": 1500 },
        "2": { "name": "CallPlayer2", "stack": 1500 },
        "3": { "name": "FoldPlayer1", "stack": 1500 },
        "4": { "name": "FoldPlayer2", "stack": 1500 },
        "5": { "name": "HeuristicPlayer1", "stack": 1500 },
        "6": { "name": "HeuristicPlayer2", "stack": 1500 },
        "7": { "name": "RandomPlayer1", "stack": 1500 },
        "8": { "name": "RandomPlayer2", "stack": 1500 },
        "9": { "name": "DQN", "stack": 1500 }
    }

In [6]:
class Worker():
    def __init__(self, name, a_size, trainer, model_path, global_episodes):    
        with open('../cache/hole_card_estimation.pkl', 'rb') as f:
            self.hole_card_est = pickle.load(f)
            
        self.name = "worker_" + str(name)
        self.number = name        
        self.model_path = model_path
        self.trainer = trainer
        self.global_episodes = global_episodes
        self.increment = self.global_episodes.assign_add(1)
        self.episode_rewards = []
        self.episode_lengths = []
        self.episode_mean_values = []
        self.summary_writer = tf.summary.FileWriter("../log/A3C/train_"+str(self.number))

        #Create the local copy of the network and the tensorflow op to copy global paramters to local network
        self.local_AC = AC_Network(a_size,self.name,trainer)
        self.update_local_ops = update_target_graph('global',self.name)    
        
        emul = MyEmulator()
        emul.set_game_rule(9, 50, 15, 0)
        self.my_uuid = '9'
        self.players_info = {
            "1": { "name": "f1", "stack": 1500 },
            "2": { "name": "f2", "stack": 1500 },
            "3": { "name": "f3", "stack": 1500 },
            "4": { "name": "f4", "stack": 1500 },
            "5": { "name": "f5", "stack": 1500 },
            "6": { "name": "f6", "stack": 1500 },
            "7": { "name": "f7", "stack": 1500 },
            "8": { "name": "f8", "stack": 1500 },
            "9": { "name": "f9", "stack": 1500 }
        }
    
        emul.register_player("1", pm.CallPlayer())
        emul.register_player("2", pm.CallPlayer())
        emul.register_player("3", pm.FoldPlayer())
        emul.register_player("4", pm.FoldPlayer())
        emul.register_player("5", pm.HeuristicPlayer())
        emul.register_player("6", pm.HeuristicPlayer())
        emul.register_player("7", pm.RandomPlayer())
        emul.register_player("8", pm.RandomPlayer())
        emul.register_player("9", pm.CallPlayer())
    
        self.actions = self.actions = np.identity(a_size,dtype=bool).tolist()
        self.enul = emul
        
    def init_emul(self, my_uuid='9'):
        emul = MyEmulator()
        emul.set_game_rule(9, 50, 15, 0)
        self.my_uuid = my_uuid
        self.players_info = {
            "1": { "name": "f1", "stack": 1500 },
            "2": { "name": "f2", "stack": 1500 },
            "3": { "name": "f3", "stack": 1500 },
            "4": { "name": "f4", "stack": 1500 },
            "5": { "name": "f5", "stack": 1500 },
            "6": { "name": "f6", "stack": 1500 },
            "7": { "name": "f7", "stack": 1500 },
            "8": { "name": "f8", "stack": 1500 },
            "9": { "name": "f9", "stack": 1500 }
        }
    
        emul.register_player("1", pm.CallPlayer())
        emul.register_player("2", pm.CallPlayer())
        emul.register_player("3", pm.FoldPlayer())
        emul.register_player("4", pm.FoldPlayer())
        emul.register_player("5", pm.HeuristicPlayer())
        emul.register_player("6", pm.HeuristicPlayer())
        emul.register_player("7", pm.RandomPlayer())
        emul.register_player("8", pm.RandomPlayer())
        emul.register_player("9", pm.CallPlayer())
        self.emul = emul
        
    def train(self,rollout,sess,gamma,bootstrap_value):
        rollout = np.array(rollout)
        last_img_states = rollout[:,0]
        last_features = rollout[:,1]
        last_actions_num = rollout[:,2]
        rewards = rollout[:,3]
        img_states = rollout[:,4]
        features = rollout[:,5]
        values = rollout[:,7]

        self.value_plus = np.asarray(values.tolist() + [bootstrap_value])
        advantages = rewards + gamma * self.value_plus[1:] - self.value_plus[:-1]
        discounted_rewards = rewards

        # Update the global network using gradients from loss
        # Generate network statistics to periodically save
        feed_dict = {self.local_AC.target_v:discounted_rewards,
            self.local_AC.scalar_input:np.vstack(last_img_states),
            self.local_AC.features_input:np.vstack(last_features),
            self.local_AC.actions:last_actions_num,
            self.local_AC.advantages:advantages}

        v_l,p_l,e_l,g_n,v_n, _ = sess.run([self.local_AC.value_loss,
            self.local_AC.policy_loss,
            self.local_AC.entropy,
            self.local_AC.grad_norms,
            self.local_AC.var_norms,
            self.local_AC.apply_grads],
            feed_dict=feed_dict)
        return v_l / len(rollout),p_l / len(rollout),e_l / len(rollout), g_n, v_n
        
    def work(self,gamma,sess,coord,saver):
        episode_count = sess.run(self.global_episodes)
        total_steps = 0
        print ("Starting worker " + str(self.number))
        with sess.as_default(), sess.graph.as_default():                 
            while not coord.should_stop():
                self.init_emul(str(np.random.randint(1, 10)))
                sess.run(self.update_local_ops)
                episode_buffer = []
                episode_values = []
                episode_reward = 0
                episode_step_count = 0
                d = False
                
                initial_state = self.emul.generate_initial_game_state(self.players_info)
                msgs = []
                game_state, events = self.emul.start_new_round(initial_state)
                is_last_round = False

                last_img_state = None
                last_features = None
                last_action_num = None
                last_v = None

                round_buffer = []
                while not is_last_round:
                    #Take an action using probabilities from policy network output.
                    a = self.emul.run_until_my_next_action(game_state, self.my_uuid, msgs)
                    
                    if len(a) == 4:
                        game_state, valid_actions, hole_card, round_state = a
                        img_state = img_from_state(hole_card, round_state)
                        img_state = process_img(img_state)

                        street = round_state['street']
                        bank = round_state['pot']['main']['amount']
                        stack = [s['stack'] for s in round_state['seats'] if s['uuid'] == self.my_uuid][0]
                        other_stacks = [s['stack'] for s in round_state['seats'] if s['uuid'] != self.my_uuid]
                        dealer_btn = round_state['dealer_btn']
                        small_blind_pos = round_state['small_blind_pos']
                        big_blind_pos = round_state['big_blind_pos']
                        next_player = round_state['next_player']
                        round_count = round_state['round_count']
                        estimation = self.hole_card_est[(hole_card[0], hole_card[1])]

                        features = get_street(street)
                        features.extend([bank, stack, dealer_btn, small_blind_pos, big_blind_pos, next_player,
                                         round_count])
                        features.extend(other_stacks)
                        features.append(estimation)
                     
                        # add to buffer last hand 
                        if last_img_state is not None:
                            round_buffer.append([last_img_state, last_features, last_action_num, 0, img_state,
                                                   features, 0, last_v[0, 0]])
                            episode_values.append(last_v[0, 0])
                     
                        pol_val = sess.run([self.local_AC.policy, self.local_AC.value],
                                              feed_dict={self.local_AC.scalar_input: [img_state],
                                                         self.local_AC.features_input: [features]})
                        a_dist, v = pol_val[0], pol_val[1]

                        a = np.random.choice(a_dist[0],p=a_dist[0])
                        a = np.argmax(a_dist == a)
                        action, amount = get_action_by_num(a, valid_actions)
                        game_state, msgs = self.emul.apply_my_action(game_state, action, amount)

                        last_img_state = img_state.copy()
                        last_features = features.copy()
                        last_action_num = a
                        last_v = v
                    else: # round end
                        game_state, reward = a
                        reward /= 100
                        episode_reward += reward

                        if reward >= 0:
                            reward = np.log(1 + reward)
                        else:
                            reward = -np.log(1 - reward)

                        # add to buffer last hand 
                        if last_img_state is not None:
                            round_buffer.append([last_img_state, last_features, last_action_num, reward,
                                                   last_img_state, last_features, 1, last_v[0, 0]])
                            episode_values.append(last_v[0,0])

                            # apply same reward for all states in round
                            for k in range(len(round_buffer)):
                                round_buffer[k][3] = reward
                                
                        episode_buffer.extend(round_buffer)
                        round_buffer = []

                        is_last_round = self.emul._is_last_round(game_state, self.emul.game_rule)
                        game_state, events = self.emul.start_new_round(game_state)

                        last_img_state = None
                        last_action_num = None   
                        last_v = None
                        
                    self.episode_buffer = episode_buffer # for debug
                    self.episode_values = episode_values

                    total_steps += 1
                    episode_step_count += 1
                                            
                self.episode_rewards.append(episode_reward)
                self.episode_lengths.append(episode_step_count)
                self.episode_mean_values.append(np.mean(episode_values))
                
                # Update the network using the episode buffer at the end of the episode.
                if len(episode_buffer) != 0:
                    v_l,p_l,e_l,g_n,v_n = self.train(episode_buffer,sess,gamma,0.0)
                                
                    
                if episode_count % 10 == 0 and self.name == 'worker_0':
                    saver.save(sess, self.model_path, episode_count)
                    print ("Saved Model", episode_count)
                     
                if episode_count % 1 == 0:
                    mean_reward = np.mean(self.episode_rewards[-3:])
                    mean_length = np.mean(self.episode_lengths[-3:])
                    mean_value = np.mean(self.episode_mean_values[-3:])
                    summary = tf.Summary()
                    summary.value.add(tag='Perf/Reward', simple_value=float(mean_reward))
                    summary.value.add(tag='Perf/Length', simple_value=float(mean_length))
                    summary.value.add(tag='Perf/Value', simple_value=float(mean_value))
                    summary.value.add(tag='Losses/Value Loss', simple_value=float(v_l))
                    summary.value.add(tag='Losses/Policy Loss', simple_value=float(p_l))
                    summary.value.add(tag='Losses/Entropy', simple_value=float(e_l))
                    summary.value.add(tag='Losses/Grad Norm', simple_value=float(g_n))
                    summary.value.add(tag='Losses/Var Norm', simple_value=float(v_n))
                    self.summary_writer.add_summary(summary, episode_count)

                    self.summary_writer.flush()
                if self.name == 'worker_0':
                    sess.run(self.increment)
                episode_count += 1

In [7]:
gamma = .99 # discount rate for advantage estimation and reward discounting
a_size = 5 # Agent can move Left, Right, or Fire
load_model = False
model_path = '../cache/models/A3C/'

In [8]:
tf.reset_default_graph()

if not os.path.exists(model_path):
    os.makedirs(model_path)
    
with tf.device("/cpu:0"): 
    global_episodes = tf.Variable(0,dtype=tf.int32,name='global_episodes',trainable=False)
    trainer = tf.train.AdamOptimizer(learning_rate=1e-4)
    master_network = AC_Network(a_size,'global',None) # Generate global network
    num_workers = multiprocessing.cpu_count() # Set workers ot number of available CPU threads
    workers = []
    # Create worker classes
    for i in range(num_workers):
        workers.append(Worker(i,a_size,trainer,model_path,global_episodes))
    saver = tf.train.Saver(max_to_keep=5)

with tf.Session() as sess:
    coord = tf.train.Coordinator()
    if load_model == True:
        print ('Loading Model...')
        ckpt = tf.train.get_checkpoint_state(model_path)
        saver.restore(sess,ckpt.model_checkpoint_path)
    else:
        sess.run(tf.global_variables_initializer())
        
    # This is where the asynchronous magic happens.
    # Start the "work" process for each worker in a separate threat.
    worker_threads = []
    for worker in workers:
        worker_work = lambda: worker.work(gamma,sess,coord,saver)
        t = threading.Thread(target=(worker_work))
        t.start()
        sleep(0.5)
        worker_threads.append(t)
    coord.join(worker_threads)

Starting worker 0
Starting worker 1
Starting worker 2
Starting worker 3
Saved Model 0


Exception in thread Thread-8:
Traceback (most recent call last):
  File "/home/digitman/miniconda3/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/home/digitman/miniconda3/lib/python3.6/threading.py", line 864, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-8-19e237ff1914>", line 30, in <lambda>
    worker_work = lambda: worker.work(gamma,sess,coord,saver)
  File "<ipython-input-6-b1bc364b83a5>", line 166, in work
    self.local_AC.features_input: [features]})
  File "/home/digitman/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 778, in run
    run_metadata_ptr)
  File "/home/digitman/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 914, in _run
    raise RuntimeError('Attempted to use a closed Session.')
RuntimeError: Attempted to use a closed Session.

Exception in thread Thread-10:
Traceback (most recent call last):
  File "/home/digitman/miniconda3/lib

KeyboardInterrupt: 

tensorboard --logdir=worker_0:'./train_0',worker_1:'./train_1',worker_2:'./train_2',worker_3:'./train_3'

## Testing

In [9]:
player = A3CPlayer(5)

INFO:tensorflow:Restoring parameters from ../cache/models/A3C/-0


NotFoundError: Key train_0/conv2d_2/bias not found in checkpoint
	 [[Node: save/RestoreV2_4 = RestoreV2[dtypes=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](_recv_save/Const_0, save/RestoreV2_4/tensor_names, save/RestoreV2_4/shape_and_slices)]]

Caused by op 'save/RestoreV2_4', defined at:
  File "/home/digitman/miniconda3/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/digitman/miniconda3/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/digitman/miniconda3/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/digitman/miniconda3/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/digitman/miniconda3/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/home/digitman/miniconda3/lib/python3.6/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/home/digitman/miniconda3/lib/python3.6/site-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/home/digitman/miniconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/digitman/miniconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/home/digitman/miniconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/digitman/miniconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/home/digitman/miniconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/digitman/miniconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/digitman/miniconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/digitman/miniconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/digitman/miniconda3/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/digitman/miniconda3/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/digitman/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2683, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/digitman/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2787, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/digitman/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2847, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-9-7cb00f29a204>", line 1, in <module>
    player = A3CPlayer(5)
  File "<ipython-input-4-6107ccce80d2>", line 18, in __init__
    self.saver = tf.train.Saver()
  File "/home/digitman/miniconda3/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1056, in __init__
    self.build()
  File "/home/digitman/miniconda3/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1086, in build
    restore_sequentially=self._restore_sequentially)
  File "/home/digitman/miniconda3/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 691, in build
    restore_sequentially, reshape)
  File "/home/digitman/miniconda3/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 407, in _AddRestoreOps
    tensors = self.restore_op(filename_tensor, saveable, preferred_shard)
  File "/home/digitman/miniconda3/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 247, in restore_op
    [spec.tensor.dtype])[0])
  File "/home/digitman/miniconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_io_ops.py", line 669, in restore_v2
    dtypes=dtypes, name=name)
  File "/home/digitman/miniconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
    op_def=op_def)
  File "/home/digitman/miniconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/home/digitman/miniconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
    self._traceback = _extract_stack()

NotFoundError (see above for traceback): Key train_0/conv2d_2/bias not found in checkpoint
	 [[Node: save/RestoreV2_4 = RestoreV2[dtypes=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](_recv_save/Const_0, save/RestoreV2_4/tensor_names, save/RestoreV2_4/shape_and_slices)]]


In [None]:
config = setup_config(max_round=50, initial_stack=1500, small_blind_amount=15, summary_file='/dev/null')

config.register_player(name="player", algorithm=player)
# config.register_player(name="r2", algorithm=RandomPlayer())
config.register_player(name="CallPlayer1", algorithm=pm.CallPlayer())
config.register_player(name="CallPlayer2", algorithm=pm.CallPlayer())
config.register_player(name="FoldPlayer1", algorithm=pm.FoldPlayer())
config.register_player(name="FoldPlayer2", algorithm=pm.FoldPlayer())
config.register_player(name="HeuristicPlayer1", algorithm=pm.HeuristicPlayer())
config.register_player(name="HeuristicPlayer2", algorithm=pm.HeuristicPlayer())
config.register_player(name="RandomPlayer1", algorithm=pm.RandomPlayer())
config.register_player(name="RandomPlayer2", algorithm=pm.RandomPlayer())