In [1]:
import pyspiel
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns

In [2]:
import sys, os
# Adjusting the location of the src folder
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))  
src_path     = os.path.join(project_root, "src")
if src_path not in sys.path:
    sys.path.insert(0, src_path)


In [3]:
from RQE import RQE, Player
from open_spiel_api import get_matrix

In [4]:
df = pd.read_csv('../data/open_spigel_games.csv')
df = df[df.type == 'one_shot']
df = df[df.min_n_players == 2]
df

Unnamed: 0.1,Unnamed: 0,id,name,min_n_players,max_n_players,type,is_deterministic
7,7,blotto,Blotto,2,10,one_shot,True
55,55,matrix_bos,Bach or Stravinsky,2,2,one_shot,True
56,56,matrix_brps,"Biased Rock, Paper, Scissors",2,2,one_shot,True
57,57,matrix_cd,Chicken-Dare,2,2,one_shot,True
58,58,matrix_coordination,Coordination,2,2,one_shot,True
59,59,matrix_mp,Matching Pennies,2,2,one_shot,True
60,60,matrix_pd,Prisoner's Dilemma,2,2,one_shot,True
61,61,matrix_rps,"Rock, Paper, Scissors",2,2,one_shot,True
62,62,matrix_rpsw,"Rock, Paper, Scissors, Water",2,2,one_shot,True
63,63,matrix_sh,Stag Hunt,2,2,one_shot,True


In [None]:
from sklearn.model_selection import ParameterGrid
import mlflow
import nashpy as nash

param_grid = {
    "tau1": [0.001, 0.003, 0.06, 0.05, 0.1 ],
    "ep1": [110,130,170,190],
    "tau2": [0.001, 0.003, 0.06, 0.05, 0.1 ],
    "ep2": [110,130,170,190],
}

mlflow.set_tracking_uri("http://localhost:4322")
mlflow.set_experiment("rqe_vs_nash")
grid = ParameterGrid(param_grid)


for game_name in df.id:
    print(game_name)
    game = pyspiel.load_matrix_game(game_name)
    g1,g2 = game.row_utilities(), game.col_utilities()
    b1, b2 = nash.Game(g1,g2).support_enumeration()
    util_b1 = np.dot(g1[b1],g1)
    util_b2 = np.dot(g2[:b2],g2)
    for params in grid:
        with mlflow.start_run(run_name=f"{game_name}_{params}"):
            rqe_solver = RQE(players=[
                Player(game_matrix=g1, tau=params["tau1"], epsilon=params["ep1"]),
                Player(game_matrix=g2, tau=params["tau2"], epsilon=params["ep2"])
            ])
            pi1, pi2 = rqe_solver.optimize()
            max_val_p1 = np.argmax(pi1)
            max_val_p2 = np.argmax(pi2)
            util_p1 = np.dot(g1[max_val_p1], pi2)
            util_p2 = np.dot(g2[:, max_val_p2], pi1)        
            joint_util = 0.5 * (util_p1 + util_p2)
    
            params["game_name"] = game_name
            mlflow.log_params(params)
            mlflow.log_metrics({
                "payoff1": util_p1,
                "payoff2": util_p2,
                "best_response_diff": (util_b1 - util_p1 + utils_b2 - util_p2) / 2,
                "joint": joint_util
            })

    del game
        

blotto


In [None]:
!date '+%A %W %Y %X'