In [121]:
import pandas as pd 
import plotly.express as px
from plotly import graph_objects as go
from plotly.subplots import make_subplots

# Performance against random agent

In [122]:
experiments_dir = './experiments/random_agent/'


def performance_in_standard_format(df: pd.DataFrame, num_steps=50000):
    df = df.copy()
    df['episode'] *= num_steps / df['episode'].max()
    steps = pd.RangeIndex(0, num_steps + 1, 100)
    df = df.set_index('episode').reindex(steps, method='nearest').rename_axis('step').reset_index()
    return df.rename(columns={'reward': 'Reward', 'step': 'Step'})


def read_performance_against_random_agent(algo: str, model: str, standard_format: bool = True):
    df = pd.read_csv(experiments_dir + algo + '/' + model + '/performance.csv')
    if standard_format:
        return performance_in_standard_format(df)
    return df

dqn_transformer = read_performance_against_random_agent('dqn', 'transformer')
dqn_mlp = read_performance_against_random_agent('dqn', 'mlp')
nfsp_transformer = read_performance_against_random_agent('nfsp', 'transformer') 
nfsp_mlp = read_performance_against_random_agent('nfsp', 'mlp')

dfs = {('DQN', 'Transformer'): dqn_transformer,
        ('DQN', 'MLP'): dqn_mlp,
        ('NFSP', 'Transformer'): nfsp_transformer,
        ('NFSP', 'MLP'): nfsp_mlp}

df = pd.concat(dfs.values(), keys=dfs.keys(), names=['Algorithm', 'Model']).reset_index()

In [123]:
def plot_performance_vs_random_agent(df, moving_average_step: int = 1):
    data = df.copy()
    if moving_average_step > 1:
        data['Reward'] = data.groupby(['Model', 'Algorithm'])['Reward'].transform(
            lambda g: g.rolling(window=moving_average_step, min_periods=1).mean()
        )
        
    fig = px.line(data, x='Step', y='Reward', color='Model', facet_row='Algorithm')
    fig.update_layout(
        font=dict(
            family='Computer Modern',
        ),
        height=400, 
        width=300,
        margin=dict(l=0, r=20, t=0, b=0),
        legend=dict(
            orientation='h',
            yanchor='bottom',
            y=1.02,
            xanchor='right',
            x=0.9,
        ),
        yaxis_title={
            'standoff': 10,
        },
        yaxis2_title={
            'standoff': 10,
        },
    )

    fig.update_traces(
        opacity=0.75,
    )
    fig.show()
    return fig 

In [124]:
fig_random = plot_performance_vs_random_agent(df, moving_average_step=20);
fig_random.write_image('./plots/performance_against_random_agent.pdf')

# Bluffing and Calling agents

In [127]:
import os 


experiments_dir = './experiments/bluff_call/'


def performance_in_standard_format(df: pd.DataFrame, num_steps=50000):
    df = df.copy()
    df['episode'] *= num_steps / df['episode'].max()
    steps = pd.RangeIndex(0, num_steps + 1, 100)
    df = df.set_index('episode').reindex(steps, method='nearest').rename_axis('step').reset_index()
    return df.rename(columns={'reward': 'Reward', 'step': 'Step', 'opponent': 'Opponent'})


def read_performance_against_random_agent(algo: str, model: str, agent: str, standard_format: bool = True):
    if agent == 'bluff':
        path = os.path.join(experiments_dir, 'LeducHoldemRuleAgentBluff', algo, model)
    elif agent == 'call':
        path = os.path.join(experiments_dir, 'LeducHoldemRuleAgentCall', algo, model)
    elif agent == 'all':
        path = os.path.join(experiments_dir, 'all', algo, model)
    else:
        raise ValueError('Invalid agent')
    df = pd.read_csv(path + '/performance.csv')
    df['opponent'] = df['opponent'].map({'LeducHoldemRuleAgentBluff': 'Bluff', 'LeducHoldemRuleAgentCall': 'Call', 'all': 'All'})
    if standard_format:
        return performance_in_standard_format(df)
    df = df.rename(columns={'reward': 'Reward', 'episode': 'Step', 'opponent': 'Opponent'})
    return df


dfs = {}
for agent in ['bluff', 'call', 'all']:
    for algo in ['dqn', 'nfsp']:
        for model in ['transformer', 'mlp']:
            dfs[(agent, algo, model)] = read_performance_against_random_agent(algo, model, agent, standard_format=True)

df = pd.concat(dfs.values(), keys=dfs.keys(), names=['Opponents', 'Algorithm', 'Model']).reset_index().drop(columns='level_3')
df['Opponents'] = df['Opponents'].map({'bluff': 'Bluff', 'call': 'Call', 'all': 'All'})
df['Algorithm'] = df['Algorithm'].map({'dqn': 'DQN', 'nfsp': 'NFSP'}) 
df['Model'] = df['Model'].map({'transformer': 'Transformer', 'mlp': 'MLP'})

In [129]:
data = df.copy()
data['Reward'] = data.groupby(['Model', 'Algorithm', 'Opponent', 'Opponents'])['Reward'].transform(
    lambda g: g.rolling(window=10, min_periods=1).mean()
)
data = data[data['Algorithm'] == 'NFSP']
fig = px.line(data, x='Step', y='Reward', color='Opponents', facet_row='Opponent', facet_col='Model')

fig.update_layout(
    height=700, 
)
fig.show()

In [140]:
data = df.copy()
data['Reward'] = data.groupby(['Model', 'Algorithm', 'Opponent', 'Opponents'])['Reward'].transform(
    lambda g: g.rolling(window=10, min_periods=1).mean()
)
data = data[data['Algorithm'] == 'DQN']
fig = px.line(data, x='Step', y='Reward', color='Opponents', facet_row='Opponent', facet_col='Model')

fig.update_layout(
        font=dict(
            family='Computer Modern',
        ),
        height=400, 
        width=400,
        margin=dict(l=0, r=20, t=0, b=0),
        legend=dict(
            orientation='h',
            yanchor='bottom',
            y=1.02,
            xanchor='right',
            x=0.9,
        ),
        yaxis_title={
            'standoff': 10,
        },
        yaxis2_title={
            'standoff': 10,
        },
)
fig.show()

In [136]:
data = df.copy()
data['Reward'] = data.groupby(['Model', 'Algorithm', 'Opponent', 'Opponents'])['Reward'].transform(
    lambda g: g.rolling(window=20, min_periods=1).mean()
)
fig = px.line(data, x='Step', y='Reward', color='Model', facet_row='Opponent', facet_col='Algorithm', line_dash='Opponents', line_dash_map={'All': 'solid', 'Call': 'dash', 'Bluff': 'dash'})

fig.update_layout(
    height=700
)
fig.show()

In [80]:
data = df.copy()
px.line(df, x='Episode', y='Reward', color='Model', facet_row='Algorithm', facet_col='Opponent').show()

In [49]:
data = df.copy().drop(columns='level_3')
data = data[data['Model'] == 'mlp']
px.line(data, x='Step', y='Reward', facet_row='Algorithm', facet_col='Opponent').show()

In [50]:
data = df.copy().drop(columns='level_3')
data = data[data['Model'] == 'transformer']
px.line(data, x='Step', y='Reward', facet_row='Algorithm', facet_col='Opponent').show()

In [47]:
d = data[(data['Opponent'] == 'bluff') & (data['Algorithm'] == 'dqn')]
px.line(d, x='Step', y='Reward', color='Model').show()

In [42]:
data.groupby(['Opponent', 'Algorithm', 'Model']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Step,Reward
Opponent,Algorithm,Model,Unnamed: 3_level_1,Unnamed: 4_level_1
all,dqn,mlp,25000.0,2.034251
all,nfsp,mlp,25000.0,1.484072
bluff,dqn,mlp,25000.0,2.034251
bluff,nfsp,mlp,25000.0,1.484072
call,dqn,mlp,25000.0,0.971208
call,nfsp,mlp,25000.0,0.757794
