In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline

from pypokerengine.players import BasePokerPlayer
from pypokerengine.utils.card_utils import Card, Deck
from pypokerengine.api.game import setup_config, start_poker

import pickle
import tensorflow as tf
import random

import sys
sys.path.insert(0, '../scripts/')

import PlayerModels as pm
from MyEmulator import MyEmulator
from DQNPlayer import DQNPlayer
from util import *

## Initialization

In [2]:
h_size = 128

In [3]:
%time main_wp = DQNPlayer(h_size=h_size, is_restore=True, is_train=False, debug=True, is_double=True)

INFO:tensorflow:Restoring parameters from ../cache/models/DQN/-8000
CPU times: user 1.4 s, sys: 112 ms, total: 1.51 s
Wall time: 1.44 s


## Testing

In [4]:
config = setup_config(max_round=2, initial_stack=1500, small_blind_amount=15, summary_file='/dev/null')

config.register_player(name="wp", algorithm=main_wp)
# config.register_player(name="r2", algorithm=RandomPlayer())
config.register_player(name="f2", algorithm=pm.CallPlayer())
config.register_player(name="f3", algorithm=pm.CallPlayer())
config.register_player(name="f4", algorithm=pm.CallPlayer())
config.register_player(name="f5", algorithm=pm.CallPlayer())
config.register_player(name="f6", algorithm=pm.CallPlayer())
config.register_player(name="f7", algorithm=pm.CallPlayer())
config.register_player(name="f8", algorithm=pm.CallPlayer())
config.register_player(name="f9", algorithm=pm.CallPlayer())

game_result = start_poker(config, verbose=1)

(['Hole:', ['SQ', 'H2']],)
(['Start stack:', 1500],)
(['Estimation:', 0.0715],)
Started the round 1
Street "preflop" started. (community card = [])
"f4" declared "call:30"
"f5" declared "call:30"
"f6" declared "call:30"
"f7" declared "call:30"
"f8" declared "call:30"
"f9" declared "call:30"
(array([ 2.54183745, -2.77816606, -1.80557466,  2.11717772, -3.04547715], dtype=float32),)
"wp" declared "fold:0"
"f2" declared "call:30"
"f3" declared "call:30"
Street "flop" started. (community card = ['H7', 'CA', 'H4'])
"f2" declared "call:0"
"f3" declared "call:0"
"f4" declared "call:0"
"f5" declared "call:0"
"f6" declared "call:0"
"f7" declared "call:0"
"f8" declared "call:0"
"f9" declared "call:0"
Street "turn" started. (community card = ['H7', 'CA', 'H4', 'D7'])
"f2" declared "call:0"
"f3" declared "call:0"
"f4" declared "call:0"
"f5" declared "call:0"
"f6" declared "call:0"
"f7" declared "call:0"
"f8" declared "call:0"
"f9" declared "call:0"
Street "river" started. (community card = ['H7', '

## Metric

In [14]:
%time main_wp = DQNPlayer(h_size=h_size, is_restore=True, is_train=False, debug=False, is_double=True)

INFO:tensorflow:Restoring parameters from ../cache/models/DQN/-8000
CPU times: user 828 ms, sys: 112 ms, total: 940 ms
Wall time: 867 ms


In [15]:
config = setup_config(max_round=50, initial_stack=1500, small_blind_amount=15, summary_file='/dev/null')

config.register_player(name="wp", algorithm=main_wp)
# config.register_player(name="r2", algorithm=RandomPlayer())
config.register_player(name="CallPlayer1", algorithm=pm.CallPlayer())
config.register_player(name="CallPlayer2", algorithm=pm.CallPlayer())
config.register_player(name="FoldPlayer1", algorithm=pm.FoldPlayer())
config.register_player(name="FoldPlayer2", algorithm=pm.FoldPlayer())
config.register_player(name="HeuristicPlayer1", algorithm=pm.HeuristicPlayer())
config.register_player(name="HeuristicPlayer2", algorithm=pm.HeuristicPlayer())
config.register_player(name="RandomPlayer1", algorithm=pm.RandomPlayer())
config.register_player(name="RandomPlayer2", algorithm=pm.RandomPlayer())

In [16]:
%%time
d = None
for i in range(100):
    game_result = start_poker(config, verbose=0)
    t = pd.DataFrame(game_result['players'])
    t['round'] = i
    if d is None:
        d = t
    else:
        d = pd.concat((d, t))

CPU times: user 22min 27s, sys: 492 ms, total: 22min 27s
Wall time: 22min 25s


### With training only with CallPlayer for 3 hours

In [23]:
d.groupby('name').mean()['stack'].sort_values()

name
RandomPlayer1        847.63
RandomPlayer2        870.24
wp                  1011.58
FoldPlayer1         1106.70
FoldPlayer2         1107.45
HeuristicPlayer2    1257.65
HeuristicPlayer1    1348.60
CallPlayer1         2951.04
CallPlayer2         2987.55
Name: stack, dtype: float64

### With training  with different players for 4 hours

In [8]:
d.groupby('name').mean()['stack'].sort_values()

name
RandomPlayer2        301.11
RandomPlayer1        908.14
FoldPlayer2         1076.40
FoldPlayer1         1077.45
HeuristicPlayer2    1249.55
HeuristicPlayer1    1675.50
wp                  1689.70
CallPlayer1         2550.21
CallPlayer2         2965.17
Name: stack, dtype: float64

### With training  with different players for 8 hours

In [6]:
d.groupby('name').mean()['stack'].sort_values()

name
RandomPlayer1        595.12
RandomPlayer2        719.08
FoldPlayer1         1080.90
FoldPlayer2         1087.95
HeuristicPlayer2    1155.82
HeuristicPlayer1    1365.85
wp                  1451.95
CallPlayer2         2663.48
CallPlayer1         3372.21
Name: stack, dtype: float64

### With training  with different players for 15 hours

In [10]:
d.groupby('name').mean()['stack'].sort_values()

name
RandomPlayer1        706.92
RandomPlayer2        944.37
FoldPlayer1         1076.85
FoldPlayer2         1077.45
HeuristicPlayer2    1270.39
HeuristicPlayer1    1343.57
wp                  2168.95
CallPlayer1         2341.03
CallPlayer2         2560.93
Name: stack, dtype: float64

### With training  with different players for 26 hours

In [None]:
d.groupby('name').mean()['stack'].sort_values()