Adapted from `rlcard/examples/leduc_holdem_dqn_pytorch.py`

In [34]:
import os
import sys
sys.path.insert(0, os.path.abspath('./rlcard'))

In [35]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [36]:
import torch
import rlcard
from DQNAgent import DQNAgent
from rlcard.agents import RandomAgent, CFRAgent
from rlcard.utils import set_global_seed, tournament
from rlcard.utils import Logger
from collections import Counter

In [37]:
# set_global_seed(0)

In [38]:
eval_env = rlcard.make('kuhn-poker', config={'record_action': True})

In [39]:
dqn_agent = DQNAgent(eval_env.action_num, eval_env.state_shape[0])

dqn_agent.load(os.path.join('models/kuhn_poker_dqn', 'model.pth'))

In [40]:
random_agent = RandomAgent(action_num=eval_env.action_num)

In [41]:
cfr_agent = CFRAgent(eval_env)
cfr_agent.load()

In [60]:
eval_env.set_agents([cfr_agent, dqn_agent])

print('cfr, dqn')

for i in range(10000):
    trajectories, payoffs = eval_env.run(is_training=False)

    ps = ['cfr', 'dqn']
    
    cards = ['J', 'Q', 'K']
    card0 = '?'
    card1 = '?'
    
    if trajectories[0]:
        card0 = cards[list(trajectories[0][-1][0]['obs']).index(1)]
    
    if trajectories[1]:
        card1 = cards[list(trajectories[1][-1][0]['obs']).index(1)]

    cs = [card0, card1]
    
    if i > 9950:   
        print(f'------------')    

        if trajectories[0]:
            state = trajectories[0][-1][0]
            for (p, m) in state['action_record']:
                print(f'{ps[p]} {m} {cs[p]}')
        else:
            for (p, m) in state['action_record']:
                print(f'{ps[p]} {m} {cs[p]}')

        print(f'{ps[0]}: {payoffs[0]}, {ps[1]}: {payoffs[1]}')

cfr, dqn
------------
dqn check J
cfr raise ?
dqn fold J
cfr: 1.0, dqn: -1.0
------------
cfr fold J
cfr: -1.0, dqn: 1.0
------------
dqn raise Q
cfr call K
cfr: 2.0, dqn: -2.0
------------
cfr check Q
dqn raise K
cfr fold Q
cfr: -1.0, dqn: 1.0
------------
cfr check Q
dqn fold J
cfr: 1.0, dqn: -1.0
------------
cfr check ?
dqn fold J
cfr: 1.0, dqn: -1.0
------------
cfr raise K
dqn fold J
cfr: 1.0, dqn: -1.0
------------
cfr raise K
dqn fold J
cfr: 1.0, dqn: -1.0
------------
dqn raise K
cfr fold J
cfr: -1.0, dqn: 1.0
------------
dqn raise J
cfr fold ?
cfr: 1.0, dqn: -1.0
------------
dqn raise J
cfr fold ?
cfr: 1.0, dqn: -1.0
------------
cfr fold J
cfr: -1.0, dqn: 1.0
------------
cfr raise K
dqn fold J
cfr: 1.0, dqn: -1.0
------------
cfr fold J
cfr: -1.0, dqn: 1.0
------------
cfr fold ?
cfr: 1.0, dqn: -1.0
------------
cfr check Q
dqn fold J
cfr: 1.0, dqn: -1.0
------------
cfr check ?
dqn fold J
cfr: 1.0, dqn: -1.0
------------
cfr check J
dqn raise K
cfr fold J
cfr: -1.0, dqn:

In [61]:
eval_env.set_agents([dqn_agent, cfr_agent])

print('dqn, cfr')

for i in range(10000):
    trajectories, payoffs = eval_env.run(is_training=False)

    ps = ['dqn', 'cfr']
    
    cards = ['J', 'Q', 'K']
    card0 = '?'
    card1 = '?'
    
    if trajectories[0]:
        card0 = cards[list(trajectories[0][-1][0]['obs']).index(1)]
    
    if trajectories[1]:
        card1 = cards[list(trajectories[1][-1][0]['obs']).index(1)]

    cs = [card0, card1]
    
    if i > 9950:   
        print(f'------------')    

        if trajectories[0]:
            state = trajectories[0][-1][0]
            for (p, m) in state['action_record']:
                print(f'{ps[p]} {m} {cs[p]}')
        else:
            for (p, m) in state['action_record']:
                print(f'{ps[p]} {m} {cs[p]}')

        print(f'{ps[0]}: {payoffs[0]}, {ps[1]}: {payoffs[1]}')

dqn, cfr
------------
cfr check Q
dqn fold J
dqn: -1.0, cfr: 1.0
------------
cfr check Q
dqn raise K
cfr call Q
dqn: 2.0, cfr: -2.0
------------
cfr check Q
dqn raise K
cfr call Q
dqn: 2.0, cfr: -2.0
------------
dqn raise Q
cfr call K
dqn: -2.0, cfr: 2.0
------------
cfr check J
dqn raise Q
cfr fold J
dqn: 1.0, cfr: -1.0
------------
cfr raise K
dqn call Q
dqn: -2.0, cfr: 2.0
------------
dqn raise Q
cfr fold J
dqn: 1.0, cfr: -1.0
------------
dqn raise Q
cfr fold J
dqn: 1.0, cfr: -1.0
------------
dqn raise Q
cfr call K
dqn: -2.0, cfr: 2.0
------------
dqn fold J
dqn: -1.0, cfr: 1.0
------------
dqn raise K
cfr call Q
dqn: 2.0, cfr: -2.0
------------
dqn raise K
cfr call Q
dqn: 2.0, cfr: -2.0
------------
cfr check Q
dqn fold J
dqn: -1.0, cfr: 1.0
------------
dqn raise Q
cfr call K
dqn: -2.0, cfr: 2.0
------------
cfr raise K
dqn call Q
dqn: -2.0, cfr: 2.0
------------
cfr check Q
dqn fold J
dqn: -1.0, cfr: 1.0
------------
cfr raise K
dqn fold J
dqn: -1.0, cfr: 1.0
------------
dq

In [44]:
p0 = 0
p1 = 1
for i in range(10_000):
    trajectories, payoffs = eval_env.run(is_training=False)
    p0 += payoffs[0]
    p1 += payoffs[1]

print(p0, p1)
print(p0 / 10_000, p1 / 10_000)

1750.0 -1749.0
0.175 -0.1749


In [32]:
pairs = []
eval_env.set_agents([cfr_agent, dqn_agent])

for i in range(10_000):
    trajectories, payoffs = eval_env.run(is_training=False)

    cards = ['J', 'Q', 'K']
    card0 = '?'
    card1 = '?'
    
    if trajectories[0]:
        card0 = cards[list(trajectories[0][-1][0]['obs']).index(1)]
    
    if trajectories[1]:
        card1 = cards[list(trajectories[1][-1][0]['obs']).index(1)]

    pairs.append((card0, card1))
    
c = Counter(pairs)
print(c)

Counter({('?', 'J'): 1691, ('Q', 'K'): 1642, ('K', 'Q'): 1608, ('J', 'K'): 1279, ('J', 'Q'): 1237, ('K', 'J'): 865, ('J', '?'): 841, ('Q', 'J'): 837})


In [33]:
pairs = []
eval_env.set_agents([dqn_agent, cfr_agent])

for i in range(10_000):
    trajectories, payoffs = eval_env.run(is_training=False)

    cards = ['J', 'Q', 'K']
    card0 = '?'
    card1 = '?'
    
    if trajectories[0]:
        card0 = cards[list(trajectories[0][-1][0]['obs']).index(1)]
    
    if trajectories[1]:
        card1 = cards[list(trajectories[1][-1][0]['obs']).index(1)]

    pairs.append((card1, card0))
    
c = Counter(pairs)
print(c)

Counter({('Q', 'K'): 1698, ('K', 'Q'): 1680, ('?', 'J'): 1631, ('J', 'K'): 1338, ('J', 'Q'): 1262, ('Q', 'J'): 829, ('K', 'J'): 788, ('J', '?'): 774})
