In [1]:
import random
import math
import numpy as np

# Define winning combinations
winning_combos = [
    (0, 1, 2),  # Rows
    (3, 4, 5),
    (6, 7, 8),
    (0, 3, 6),  # Columns
    (1, 4, 7),
    (2, 5, 8),
    (0, 4, 8),  # Diagonals
    (2, 4, 6)
]

def check_winner(board, player):
    """Check if the given player has won."""
    for combo in winning_combos:
        if all(board[i] == player for i in combo):
            return True
    return False

def is_draw(board):
    """Check if the game is a draw."""
    return ' ' not in board

def available_moves(board):
    """Return a list of available moves."""
    return [i for i, spot in enumerate(board) if spot == ' ']

def print_board(board):
    """Print the current board state."""
    print(f"{board[0]} | {board[1]} | {board[2]}")
    print("--+---+--")
    print(f"{board[3]} | {board[4]} | {board[5]}")
    print("--+---+--")
    print(f"{board[6]} | {board[7]} | {board[8]}")

In [2]:
state_statistics = {}

def update_state_statistics(state, winner):
    """Update the statistics for a given game state."""
    if state not in state_statistics:
        state_statistics[state] = [0, 0, 0, 0]  # [games_played, x_wins, o_wins, draws]
    
    state_statistics[state][0] += 1  # Increment games played
    
    if winner == 'X':
        state_statistics[state][1] += 1  # Increment X wins
    elif winner == 'O':
        state_statistics[state][2] += 1  # Increment O wins
    elif winner == 'draw':
        state_statistics[state][3] += 1  # Increment draws

def simulate_game(agent, verbose=False):
    """Simulate a single game and update state statistics."""
    board = [' '] * 9
    current_player = 'X'
    state_action_history = []
    
    while True:
        state = tuple(board)
        action = agent.choose_action(board, current_player)
        board[action] = current_player
        next_state = tuple(board)
        state_action_history.append((state, action, current_player))

        if check_winner(board, current_player):
            winner = current_player
            for state, action, player in state_action_history:
                update_state_statistics(state, winner=winner)
            break
        elif is_draw(board):
            for state, action, player in state_action_history:
                update_state_statistics(state, winner='draw')
            break
        else:
            current_player = 'O' if current_player == 'X' else 'X'
    
    if verbose:
        print_board(board)

def train_agent(agent, episodes=10000):
    """Train the agent by simulating games and updating state statistics."""
    for episode in range(episodes):
        simulate_game(agent)
    print(f"Training complete after {episodes} episodes.")

def calculate_win_draw_rates():
    """Calculate the win and draw rates for all encountered game states."""
    win_draw_rates = {}
    for state, stats in state_statistics.items():
        games_played, x_wins, o_wins, draws = stats
        if games_played > 0:
            x_win_rate = x_wins / games_played
            o_win_rate = o_wins / games_played
            draw_rate = draws / games_played
            win_draw_rates[state] = {
                'X_win_rate': x_win_rate,
                'O_win_rate': o_win_rate,
                'draw_rate': draw_rate
            }
    return win_draw_rates

# Example of an agent with random actions (you can replace this with your agent logic)
class RandomAgent:
    def choose_action(self, board, player):
        """Randomly choose an available action."""
        return random.choice(available_moves(board))

# Train the agent and collect state statistics
agent = RandomAgent()
train_agent(agent, episodes=100000)

# Calculate win and draw rates from the simulations
win_draw_rates = calculate_win_draw_rates()

# Display some sample win and draw rates (you can analyze them further or store them)
for state, rates in list(win_draw_rates.items())[:10]:  # Display first 10 states and their win/draw rates
    print(f"State: {state}, X win rate: {rates['X_win_rate']:.2f}, O win rate: {rates['O_win_rate']:.2f}, Draw rate: {rates['draw_rate']:.2f}")


Training complete after 100000 episodes.
State: (' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '), X win rate: 0.58, O win rate: 0.29, Draw rate: 0.13
State: (' ', ' ', 'X', ' ', ' ', ' ', ' ', ' ', ' '), X win rate: 0.61, O win rate: 0.26, Draw rate: 0.13
State: (' ', 'O', 'X', ' ', ' ', ' ', ' ', ' ', ' '), X win rate: 0.66, O win rate: 0.17, Draw rate: 0.17
State: (' ', 'O', 'X', ' ', ' ', ' ', ' ', 'X', ' '), X win rate: 0.61, O win rate: 0.12, Draw rate: 0.28
State: (' ', 'O', 'X', ' ', ' ', ' ', 'O', 'X', ' '), X win rate: 0.38, O win rate: 0.06, Draw rate: 0.56
State: (' ', 'O', 'X', ' ', ' ', 'X', 'O', 'X', ' '), X win rate: 0.49, O win rate: 0.11, Draw rate: 0.41
State: (' ', 'O', 'X', ' ', ' ', 'X', 'O', 'X', 'O'), X win rate: 0.25, O win rate: 0.00, Draw rate: 0.75
State: (' ', 'O', 'X', 'X', ' ', 'X', 'O', 'X', 'O'), X win rate: 0.54, O win rate: 0.00, Draw rate: 0.46
State: ('O', 'O', 'X', 'X', ' ', 'X', 'O', 'X', 'O'), X win rate: 1.00, O win rate: 0.00, Draw rate: 0.00
State

In [6]:
def combined_heuristic(state, action, current_player, next_player):
    """Calculate the combined value of a move based on win/loss rates."""
    # Get the resulting state after the action is taken
    board = list(state)
    board[action] = current_player
    next_state = tuple(board)
    
    # Get win and loss rates for current player and opponent
    win_rate_current = win_draw_rates.get(next_state, {}).get(f'{current_player}_win_rate', 0)
    win_rate_opponent = win_draw_rates.get(next_state, {}).get(f'{next_player}_win_rate', 0)
    
    # You can also consider draw rate if needed
    draw_rate = win_draw_rates.get(next_state, {}).get('draw_rate', 0)
    
    # Weights for win, opponent win (loss prevention), and draw
    alpha = 1.0  # Weight for maximizing your win
    beta = 0.1  # Weight for minimizing opponent win
    gamma = 0.1  # Weight for considering draws (tune as needed)
    
    # Calculate the combined heuristic value
    value = alpha * win_rate_current - beta * win_rate_opponent + gamma * draw_rate
    return value

def choose_best_action(board, current_player, next_player):
    """Choose the best action based on combined win/loss minimization strategy."""
    available_actions = available_moves(board)
    best_value = -float('inf')
    best_action = None
    
    for action in available_actions:
        value = combined_heuristic(tuple(board), action, current_player, next_player)
        if value > best_value:
            best_value = value
            best_action = action
    
    return best_action

# Example usage in a game
class SmartAgent:
    def choose_action(self, board, player):
        """Choose an action based on the combined heuristic strategy."""
        next_player = 'O' if player == 'X' else 'X'
        return choose_best_action(board, player, next_player)

# Train the smart agent
smart_agent = SmartAgent()
train_agent(smart_agent, episodes=10000)

# Simulate a game
simulate_game(smart_agent, verbose=True)

Training complete after 10000 episodes.
X | O | X
--+---+--
X | X | O
--+---+--
O | X | O


In [7]:
def play_game():
    """Allow a human player to play against the trained SmartAgent."""
    board = [' '] * 9
    human_player = input("Choose your player (X or O): ").upper()
    
    if human_player not in ['X', 'O']:
        print("Invalid choice. Defaulting to X.")
        human_player = 'X'
    
    ai_player = 'O' if human_player == 'X' else 'X'
    current_player = 'X'  # X always starts the game
    smart_agent = SmartAgent()

    print_board(board)

    while True:
        if current_player == human_player:
            # Human's turn
            available = available_moves(board)
            move = None
            while move not in available:
                try:
                    move = int(input(f"Your move (choose from {available}): "))
                    if move not in available:
                        print("Invalid move. Try again.")
                except ValueError:
                    print("Please enter a valid number.")
            board[move] = human_player
        else:
            # Smart agent's turn
            print(f"SmartAgent ({ai_player}) is thinking...")
            move = smart_agent.choose_action(board, ai_player)
            board[move] = ai_player
            print(f"SmartAgent ({ai_player}) played at position {move}")

        print_board(board)

        # Check for winner or draw
        if check_winner(board, current_player):
            print(f"Player {current_player} wins!")
            break
        elif is_draw(board):
            print("It's a draw!")
            break

        # Switch players
        current_player = 'O' if current_player == 'X' else 'X'

# Train the SmartAgent with 10,000 games
agent = SmartAgent()
train_agent(agent, episodes=10000)

# Now play against the trained agent
play_game()

Training complete after 10000 episodes.
  |   |  
--+---+--
  |   |  
--+---+--
  |   |  
SmartAgent (X) is thinking...
SmartAgent (X) played at position 4
  |   |  
--+---+--
  | X |  
--+---+--
  |   |  
  | O |  
--+---+--
  | X |  
--+---+--
  |   |  
SmartAgent (X) is thinking...
SmartAgent (X) played at position 2
  | O | X
--+---+--
  | X |  
--+---+--
  |   |  
O | O | X
--+---+--
  | X |  
--+---+--
  |   |  
SmartAgent (X) is thinking...
SmartAgent (X) played at position 5
O | O | X
--+---+--
  | X | X
--+---+--
  |   |  
O | O | X
--+---+--
  | X | X
--+---+--
O |   |  
SmartAgent (X) is thinking...
SmartAgent (X) played at position 7
O | O | X
--+---+--
  | X | X
--+---+--
O | X |  
O | O | X
--+---+--
O | X | X
--+---+--
O | X |  
Player O wins!
