In [3]:
import agent
import environment

def play_round(player1: agent.Player, player2: agent.Player, learn: bool = False) -> [int]:
    board = environment.Board()
    rewards = {player1.role: 0, player2.role: 0}
    
    while board.check_winner() == environment.GameStatus.ONGOING:
        current_player_agent = player1 if board.current_player() == 1 else player2
        current_state = board.state

        chosen_move = current_player_agent.make_move(current_state, board.available_moves(), current_player_agent.exploration_rate)
        game_status = board.make_move(chosen_move)
        
        if game_status != environment.GameStatus.ONGOING:
            rewards[player1] = board.reward(player1.role)
            rewards[player2] = board.reward(player2.role)
            
            if learn:
                player1.train(current_state, rewards[player1])
                player2.train(current_state, rewards[player2])
                
            break
    
    return board.state, board.winner

In [4]:
player1 = agent.Player(role=1)
player2 = agent.Player(role=-1)

wins = {1: 0, -1: 0, 0: 0}

for episode in range(10000):
    board, winner = play_round(player1, player2, learn=True)
    wins[winner] += 1
    
    if episode < 5:
        print(f"Initial Episode {episode+1}, Board: {board}, Winner: {winner}")
    elif (episode + 1) % 10000 == 0:
        print(f"Episode {episode+1}, Board: {board}, Winner: {winner}")
        print(f"Player 1 Wins: {wins[1]}, Player 2 Wins: {wins[-1]}, Ties: {wins[0]}")

Initial Episode 1, Board: [0, -1, 1, 1, 0, 1, -1, -1, 1], Winner: 1
Initial Episode 2, Board: [-1, -1, -1, 1, 1, 0, 1, 1, -1], Winner: -1
Initial Episode 3, Board: [1, 0, -1, 1, 1, 0, 1, -1, -1], Winner: 1
Initial Episode 4, Board: [1, -1, 0, -1, 1, 1, -1, 0, 1], Winner: 1
Initial Episode 5, Board: [-1, 1, 1, 0, -1, -1, 1, 1, -1], Winner: -1
Episode 10000, Board: [-1, 0, 1, 0, 1, 0, 1, 0, -1], Winner: 1
Player 1 Wins: 9215, Player 2 Wins: 708, Ties: 77
Episode 20000, Board: [0, 1, -1, 0, 1, -1, 0, 1, 0], Winner: 1
Player 1 Wins: 15217, Player 2 Wins: 4696, Ties: 87
Episode 30000, Board: [0, 1, 0, 0, 1, 0, -1, 1, -1], Winner: 1
Player 1 Wins: 25093, Player 2 Wins: 4786, Ties: 121
Episode 40000, Board: [1, 1, 1, 0, 1, -1, -1, 0, -1], Winner: 1
Player 1 Wins: 31562, Player 2 Wins: 4877, Ties: 3561
Episode 50000, Board: [1, -1, 1, 1, 1, -1, -1, 1, -1], Winner: 0
Player 1 Wins: 36593, Player 2 Wins: 9066, Ties: 4341
Episode 60000, Board: [1, -1, 1, 1, 1, -1, -1, 1, -1], Winner: 0
Player 1 W