# 1. 单局 PK 可视化区域

In [82]:
from gomoku.player import ZeroMCTSPlayer, WrongZeroMCTSPlayer, IneffectiveZeroMCTSPlayer, play_one_game
from gomoku.gomoku_env import GomokuEnvSimple, GomokuEnv
from gomoku.policy import ZeroPolicy

In [None]:
from gomoku.player import IneffectiveZeroMCTSPlayer
import torch

board_size = 9

policy = ZeroPolicy(board_size)
policy.load_state_dict(torch.load('models/gomoku_zero_9_plus_pro_max/policy_step_78500.pth'))
player1 = ZeroMCTSPlayer(policy)
player2 = ZeroMCTSPlayer(policy)

game = GomokuEnv(board_size)

info = play_one_game(player1, player2, game=game,board_size=board_size, render=True, eager=False, itermax=400)

   0  1  2  3  4  5  6  7  8
 0 .  .  .  .  .  .  .  .  . 
 1 .  .  .  .  .  .  .  .  . 
 2 .  .  .  .  .  .  .  .  . 
 3 .  .  .  .  X  .  .  .  . 
 4 .  .  .  .  .  .  .  .  . 
 5 .  .  .  .  .  .  .  .  . 
 6 .  .  .  .  .  .  .  .  . 
 7 .  .  .  .  .  .  .  .  . 
 8 .  .  .  .  .  .  .  .  . 

   0  1  2  3  4  5  6  7  8
 0 .  .  .  .  .  .  .  .  . 
 1 .  .  .  .  .  .  .  .  . 
 2 .  .  .  .  .  .  .  .  . 
 3 .  .  .  .  X  .  .  .  . 
 4 .  .  .  O  .  .  .  .  . 
 5 .  .  .  .  .  .  .  .  . 
 6 .  .  .  .  .  .  .  .  . 
 7 .  .  .  .  .  .  .  .  . 
 8 .  .  .  .  .  .  .  .  . 

   0  1  2  3  4  5  6  7  8
 0 .  .  .  .  .  .  .  .  . 
 1 .  .  .  .  .  .  .  .  . 
 2 .  .  .  .  .  .  .  .  . 
 3 .  .  .  .  X  .  .  .  . 
 4 .  .  .  O  .  X  .  .  . 
 5 .  .  .  .  .  .  .  .  . 
 6 .  .  .  .  .  .  .  .  . 
 7 .  .  .  .  .  .  .  .  . 
 8 .  .  .  .  .  .  .  .  . 

   0  1  2  3  4  5  6  7  8
 0 .  .  .  .  .  .  .  .  . 
 1 .  .  .  .  .  .  .  .  . 
 2 .  .  . 

In [None]:
import cProfile
import torch
import pstats
import io

# --- 开始性能分析 ---

# 1. 创建一个 Profiler 对象
profiler = cProfile.Profile()

# 2. 启用 Profiler 并运行你的函数
profiler.enable()
policy = ZeroPolicy(board_size=15)
policy.load_state_dict(torch.load('models/gomoku_zero_15_continue/policy_step_6000.pth'))
player1 = ZeroMCTSPlayer(policy, itermax=200, device='cpu', eager=True)
player2 = ZeroMCTSPlayer(policy, itermax=200, device='cpu', eager=True)

game = GomokuEnv(15)

info = play_one_game(player1, player2, board_size=15, render=True)

profiler.disable()

# 3. 创建一个 IO 流来捕获分析结果
s = io.StringIO()

# 4. 创建 pstats.Stats 对象来格式化和排序结果
#    sort_stats() 的参数是排序依据，'cumulative' 是按累计耗时排序
stats = pstats.Stats(profiler, stream=s).sort_stats('cumulative')

# 5. 打印分析报告
stats.print_stats()

# 6. (可选) 只打印前 10 个最耗时的函数
# stats.print_stats(10)

print(s.getvalue())


# 2. ARENA 

In [87]:
from gomoku.player import arena_parallel
from gomoku.player import ZeroMCTSPlayer, WrongZeroMCTSPlayer, IneffectiveZeroMCTSPlayer, play_one_game
from gomoku.gomoku_env import GomokuEnvSimple
from gomoku.policy import ZeroPolicy

In [89]:
policy1 = ZeroPolicy(board_size)
policy2 = ZeroPolicy(board_size)
policy1.load_state_dict(torch.load('models/gomoku_zero_9_plus_pro/policy_step_129500.pth'))
policy2.load_state_dict(torch.load('models/gomoku_zero_9_plus_pro/policy_step_199500.pth')) 

r = arena_parallel(
    policy1,
    policy2, 
    board_size=9,
    num_cpus=16,
    games=100,
    itermax=100,
)

Starting parallel arena with 100 games on 16 CPUs...
Arena finished!
Player 1 wins: 50 (50.00%)
Player 2 wins: 50 (50.00%)
Draws: 0 (0.00%)
函数 'arena_parallel' 执行耗时: 27.3081 秒


In [None]:
from worker import gather_selfplay_games


gather_selfplay_games(
    policy=ZeroPolicy(board_size=9),
    device='cpu',
    board_size=9,
    num_workers=10,
    games_per_worker=10,
    itermax=100,
)

In [None]:
from player import self_play


_ = self_play(
    policy=ZeroPolicy(board_size=9),
    device='cpu',
    board_size=9,
    itermax=100,
)

In [None]:
_ = play_one_game(
    player1=ZeroMCTSPlayer(ZeroPolicy(board_size=9)),
    player2=ZeroMCTSPlayer(ZeroPolicy(board_size=9)),
    board_size=9,
    # render=True,
    game=GomokuEnvSimple(board_size=9),
    itermax=100
)

# 3. 评价棋局

In [None]:
from gomoku.player import ZeroMCTSPlayer, WrongZeroMCTSPlayer, IneffectiveZeroMCTSPlayer, play_one_game, self_play
from gomoku.gomoku_env import GomokuEnvSimple
from gomoku.policy import ZeroPolicy
import torch

In [None]:
board_size = 9

policy = ZeroPolicy(board_size)
policy.load_state_dict(torch.load('models/gomoku_zero_9_plus_pro/policy_step_125000.pth'))

In [None]:
infos = self_play(policy, 'cpu', board_size=board_size, itermax=200)


In [None]:
def render(states, index):
    import numpy as np
    states_tensor = torch.from_numpy(np.array(infos['states'], dtype=np.float32))
    probs, winrate = policy(states_tensor)

    symbols = {0: '.', 1: 'X', 2: 'O'}
    board_str = "  " + " ".join([f"{i:2d}" for i in range(9)]) + "\n"
    
    for i in range(9):
        cells = []
        for j in range(9):
            if states[index][ 0, i, j] ==  1:  
                cell = 1
            elif states[index][ 1, i, j] == 1:
                cell = 2
            else:
                cell = 0
            cells.append(cell)
        board_str += f"{i:2d} " + " ".join([f"{symbols[cell]} " for cell in cells]) + "\n"
    print(board_str)
    print(winrate[index])
    values, indices = torch.topk(torch.softmax(probs, dim=-1)[index], 10)

    def action_2_index(action):
        return action // 9, action % 9

    for i  in range(len(indices)):
        a, b = action_2_index(indices[i].item())
        print(f"[{a:2d},{b:2d}]:{values[i].item():.4f}")


In [None]:
render(infos['states'], 13)

In [None]:
render(info[1]['states'], -2)

In [None]:
def action_2_index(action):
    return action // 9, action % 9

probs = torch.tensor(info[1]['probs'])
values, indices = probs[-2].topk(10)

for i  in range(len(indices)):
    a, b = action_2_index(indices[i].item())
    print(f"[{a:2d},{b:2d}]:{values[i].item():.4f}")
