In [1]:
import pyspiel
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns

In [2]:
import sys, os
# Adjusting the location of the src folder
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))  
src_path     = os.path.join(project_root, "src")
if src_path not in sys.path:
    sys.path.insert(0, src_path)


In [3]:
from RQE import RQE, Player
from open_spiel_api import get_matrix

In [4]:
df = pd.read_csv('../data/open_spigel_games.csv')
df = df[df.type == 'one_shot']
df = df[df.min_n_players == 2]
df = df[1:]

In [12]:
from sklearn.model_selection import ParameterGrid
import mlflow
import nashpy as nash

param_grid = {
    "tau1": [0.001, 0.003, 0.06, 0.05, 0.1 ],
    "ep1": [110,130,170,190],
    "tau2": [0.001, 0.003, 0.06, 0.05, 0.1 ],
    "ep2": [110,130,170,190],
}

mlflow.set_tracking_uri("http://localhost:4322")
mlflow.set_experiment("rqe_vs_nash")
grid = ParameterGrid(param_grid)


for game_name in df.id:
    print(game_name)
    game = pyspiel.load_matrix_game(game_name)
    g1,g2 = game.row_utilities(), game.col_utilities()
    b1, b2 = next(nash.Game(g1,g2).support_enumeration())
    util_b1 = b1 @ g1 @ b2   
    # column’s payoff under NE:
    util_b2 = b1 @ g2 @ b2
    for params in grid:
        with mlflow.start_run(run_name=f"{game_name}_{params}"):
            rqe_solver = RQE(players=[
                Player(game_matrix=g1, tau=params["tau1"], epsilon=params["ep1"]),
                Player(game_matrix=g2, tau=params["tau2"], epsilon=params["ep2"])
            ])
            pi1, pi2 = rqe_solver.optimize()
            max_val_p1 = np.argmax(pi1)
            max_val_p2 = np.argmax(pi2)
            util_p1 = pi1 @ g1 @ pi2
            util_p2 = pi1 @ g2 @ pi2   
            joint_util = 0.5 * (util_p1 + util_p2)
            print("test",(util_b1 - util_p1 + util_b2 - util_p2) / 2)
            params["game_name"] = game_name
            mlflow.log_params(params)
            mlflow.log_metrics({
                "payoff1": util_p1,
                "payoff2": util_p2,
                "best_response_diff": (util_b1 - util_p1 + util_b2 - util_p2) / 2,
                "joint": joint_util
            })

    del game
        

matrix_bos
test 1.25
🏃 View run matrix_bos_{'ep1': 110, 'ep2': 110, 'tau1': 0.001, 'tau2': 0.001} at: http://localhost:4322/#/experiments/605880644396514428/runs/e12ef675caf340758def3bf9b6d628b9
🧪 View experiment at: http://localhost:4322/#/experiments/605880644396514428
test 1.25
🏃 View run matrix_bos_{'ep1': 110, 'ep2': 110, 'tau1': 0.001, 'tau2': 0.003} at: http://localhost:4322/#/experiments/605880644396514428/runs/42237d4798af4eac89f00fa032aa74da
🧪 View experiment at: http://localhost:4322/#/experiments/605880644396514428
test 1.25
🏃 View run matrix_bos_{'ep1': 110, 'ep2': 110, 'tau1': 0.001, 'tau2': 0.06} at: http://localhost:4322/#/experiments/605880644396514428/runs/d9805393a6414aeca398773054dbee11
🧪 View experiment at: http://localhost:4322/#/experiments/605880644396514428
test 1.25
🏃 View run matrix_bos_{'ep1': 110, 'ep2': 110, 'tau1': 0.001, 'tau2': 0.05} at: http://localhost:4322/#/experiments/605880644396514428/runs/9abaaaa810184228bfdf1d6fef71e062
🧪 View experiment at: ht

  return f_raw(*args, **kwargs)
  return f_raw(*args, **kwargs)
  defvjp(anp.log, lambda ans, x: lambda g: g / x)


IndexError: index -1 is out of bounds for axis 0 with size 0

In [None]:
!date '+%A %W %Y %X'