In [12]:
%matplotlib inline

import numpy as np
import pandas as pd
import gzip
import glob
import matplotlib.pyplot as plt
from collections import defaultdict
import itertools
import seaborn as sns
import swifter
import functools
from scipy.special import comb

BOTS = ['RandomBot', 'ObserveBot', 'CFRBot_3000000', 'CFRBot_6000000', 'ISMCTSBot', 'MOISMCTSBot', 'Deeprole']
NUM_PLAYERS = 5

In [3]:
def load_dataframes():
    result = []
    for filename in glob.glob('tournaments/*.msg.gz'):
        with gzip.open(filename, 'r') as f:
            result.append(pd.read_msgpack(f))
    df = pd.concat(result)
    df.win_type = df.win_type.astype('category')
    df.winner = df.winner.astype('category')
    for i in range(5):
        df['bot_{}'.format(i)] = df['bot_{}'.format(i)].astype('category')
        df['bot_{}_role'.format(i)] = df['bot_{}_role'.format(i)].astype('category')
    df.reset_index(drop=True, inplace=True)
    return df

ALL_GAMES = load_dataframes()

In [4]:
def group_by_bot(games):
    bots = BOTS
    new_dataframe = {}
    for bot in bots:
        print bot
        for i in range(5):
            selector = games['bot_{}'.format(i)] == bot
            if i == 0:
                bot_count = selector.astype(int)
                payoff = games['bot_{}_payoff'.format(i)][selector]
            else:
                bot_count += selector.astype(int)
                payoff = payoff.add(games['bot_{}_payoff'.format(i)][selector], fill_value=0.0)
        new_dataframe['{}_count'.format(bot)] = bot_count
        new_dataframe['{}_payoff'.format(bot)] = payoff.divide(bot_count)

    return pd.DataFrame(new_dataframe)

GROUPED_GAMES = group_by_bot(ALL_GAMES)

RandomBot
ObserveBot
CFRBot_3000000
CFRBot_6000000
ISMCTSBot
MOISMCTSBot
Deeprole


In [6]:
GROUPED_GAMES

Unnamed: 0,CFRBot_3000000_count,CFRBot_3000000_payoff,CFRBot_6000000_count,CFRBot_6000000_payoff,Deeprole_count,Deeprole_payoff,ISMCTSBot_count,ISMCTSBot_payoff,MOISMCTSBot_count,MOISMCTSBot_payoff,ObserveBot_count,ObserveBot_payoff,RandomBot_count,RandomBot_payoff
0,0,,0,,3,0.166667,0,,0,,2,-0.25,0,
1,0,,0,,3,-0.166667,0,,0,,2,0.25,0,
2,0,,0,,3,1.000000,0,,0,,2,-1.50,0,
3,0,,0,,3,0.666667,0,,0,,2,-1.00,0,
4,0,,0,,3,-0.166667,0,,0,,2,0.25,0,
5,0,,0,,3,0.166667,0,,0,,2,-0.25,0,
6,0,,0,,3,-0.666667,0,,0,,2,1.00,0,
7,0,,0,,3,-0.166667,0,,0,,2,0.25,0,
8,0,,0,,3,0.166667,0,,0,,2,-0.25,0,
9,0,,0,,3,-0.666667,0,,0,,2,1.00,0,


In [73]:
PAYOFF_TABLE = GROUPED_GAMES.groupby(['{}_count'.format(bot) for bot in BOTS]).mean()
PAYOFF_TABLE = PAYOFF_TABLE[['{}_payoff'.format(bot) for bot in BOTS]].fillna(0.0)
import functools

def get_payoff_table(big_table, bots):
    index_reset = big_table.reset_index()
    filtered = index_reset[['{}_count'.format(bot) for bot in bots] + ['{}_payoff'.format(bot) for bot in bots]]
    criteria = [filtered['{}_count'.format(bot)] != 0 for bot in bots]
    new_criter = functools.reduce(lambda a,b: a | b, criteria)
    bot_sum = functools.reduce(lambda a,b: a + b, [filtered['{}_count'.format(bot)] for bot in bots])
    
    return filtered[new_criter & (bot_sum == NUM_PLAYERS)].groupby(['{}_count'.format(bot) for bot in bots]).mean()
    

DEEPROLE_PAYOFFS = {
    bot: get_payoff_table(PAYOFF_TABLE, ['Deeprole', bot])
    for bot in BOTS if bot != 'Deeprole'
}

DEEPROLE_PAYOFFS['CFRBot_3000000']

Unnamed: 0_level_0,Unnamed: 1_level_0,Deeprole_payoff,CFRBot_3000000_payoff
Deeprole_count,CFRBot_3000000_count,Unnamed: 2_level_1,Unnamed: 3_level_1
0,5,0.0,0.0
1,4,-0.0172,0.0043
2,3,0.063167,-0.042111
3,2,0.1155,-0.17325
4,1,0.116717,-0.466867
5,0,0.0,0.0


In [44]:
def scipy_multinomial(params):
    if len(params) == 1:
        return 1
    coeff = (comb(np.sum(params), params[-1], exact=True) *
             scipy_multinomial(params[:-1]))
    return coeff

In [45]:
def P(N_i, x):
    x = np.array(x)
    N_i = np.array(N_i)
    return scipy_multinomial(N_i) * np.prod( x ** N_i )

In [46]:
def r(x, table):
    x = np.array(x)
    numerator = np.zeros(len(x))
    for index, payoff in table.iterrows():
        numerator += P(index, x) * np.array(payoff)
    denominator = 1.0 - (1.0 - x) ** NUM_PLAYERS
    return numerator / denominator

In [70]:
LEARNING_RATE = 0.01

def find_nash(table, num_bots):
    x = np.ones(num_bots) / num_bots
    for i in range(3000):
        rx = r(x, table)
        xtAx = np.sum(x * rx)
        xdot = x * (rx - xtAx)
        x += LEARNING_RATE*xdot
        if i % 300 == 0:
            print x

# RandomBot
# ObserveBot
# CFRBot_3000000
# CFRBot_6000000
# ISMCTSBot
# MOISMCTSBot
# Deeprole
            
find_nash(DEEPROLE_PAYOFFS['MOISMCTSBot'], 2)

[0.50046214 0.49953786]
[0.66881265 0.33118735]
[0.84700152 0.15299848]
[0.94696719 0.05303281]
[0.98350252 0.01649748]
[0.99502339 0.00497661]
[0.99851184 0.00148816]
[9.99556133e-01 4.43866968e-04]
[9.99867710e-01 1.32290032e-04]
[9.99960581e-01 3.94188054e-05]
