<a href="https://colab.research.google.com/github/LinuxFan2718/bots/blob/main/Hexapawn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
import random
import pickle
from collections import defaultdict

# ===================================================================
# 1. THE HEXAPAWN GAME ENVIRONMENT (No changes here)
# ===================================================================
class Hexapawn:
    """Manages the state and rules of the Hexapawn game."""
    def __init__(self):
        self.board = [['b', 'b', 'b'], ['.', '.', '.'], ['w', 'w', 'w']]
        self.player_turn = 'w'

    def print_board(self):
        print("\n  0 1 2")
        for i, row in enumerate(self.board):
            print(f"{i} {' '.join(row)}")
        print()

    def get_state_string(self):
        return "".join(["".join(row) for row in self.board])

    def get_possible_moves(self, player):
        moves = []
        direction = -1 if player == 'w' else 1
        opponent = 'b' if player == 'w' else 'w'

        for r in range(3):
            for c in range(3):
                if self.board[r][c] == player:
                    if 0 <= r + direction < 3 and self.board[r + direction][c] == '.':
                        moves.append(((r, c), (r + direction, c)))
                    if 0 <= c - 1 < 3 and 0 <= r + direction < 3 and self.board[r + direction][c - 1] == opponent:
                        moves.append(((r, c), (r + direction, c - 1)))
                    if 0 <= c + 1 < 3 and 0 <= r + direction < 3 and self.board[r + direction][c + 1] == opponent:
                        moves.append(((r, c), (r + direction, c + 1)))
        return moves

    def make_move(self, move):
        start, end = move
        r1, c1 = start
        r2, c2 = end
        self.board[r2][c2] = self.board[r1][c1]
        self.board[r1][c1] = '.'

    def switch_player(self):
        self.player_turn = 'b' if self.player_turn == 'w' else 'w'

# ===================================================================
# 2. THE Q-LEARNING AGENT (No changes here)
# ===================================================================
class QLearningAgent:
    """The AI agent that learns to play Hexapawn using Q-learning."""
    def __init__(self, learning_rate=0.1, discount_factor=0.9, exploration_rate=1.0, exploration_decay=0.999):
        self.q_table = defaultdict(lambda: defaultdict(float))
        self.lr = learning_rate
        self.gamma = discount_factor
        self.epsilon = exploration_rate
        self.epsilon_decay = exploration_decay
        self.epsilon_min = 0.01

    def get_q_value(self, state, action):
        return self.q_table[state][action]

    def choose_action(self, state, possible_moves):
        if random.random() < self.epsilon or not possible_moves:
            return random.choice(possible_moves) if possible_moves else None
        else:
            q_values = [self.get_q_value(state, move) for move in possible_moves]
            max_q = max(q_values)
            best_moves = [move for i, move in enumerate(possible_moves) if q_values[i] == max_q]
            return random.choice(best_moves)

    def update(self, state, action, reward, next_state, next_possible_moves):
        best_next_q = 0
        if next_possible_moves:
            best_next_q = max(self.get_q_value(next_state, move) for move in next_possible_moves)

        old_q = self.get_q_value(state, action)
        new_q = old_q + self.lr * (reward + self.gamma * best_next_q - old_q)
        self.q_table[state][action] = new_q

    def decay_epsilon(self):
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

# ===================================================================
# 3. THE TRAINING LOOP (REVISED LOGIC)
# ===================================================================
def train(episodes):
    """Trains the agent using self-play with corrected win/loss logic."""
    agent = QLearningAgent()

    for i in range(episodes):
        game = Hexapawn()
        history = []
        winner = None

        while True:
            current_player = game.player_turn
            opponent = 'b' if current_player == 'w' else 'w'
            state = game.get_state_string()
            possible_moves = game.get_possible_moves(current_player)

            # LOSS Condition: Current player has no moves, opponent wins.
            if not possible_moves:
                winner = opponent
                break

            action = agent.choose_action(state, possible_moves)
            history.append({'state': state, 'action': action, 'player': current_player})
            game.make_move(action)

            # WIN Conditions (created by the move just made)
            # 1. Reached the opposite end
            if (current_player == 'w' and 'w' in game.board[0]) or \
               (current_player == 'b' and 'b' in game.board[2]):
                winner = current_player
                break

            # 2. Opponent is now trapped (will be caught at the start of their turn)

            game.switch_player()

        # Propagate rewards back through the game history
        for record in history:
            reward = 0
            if winner == record['player']:
                reward = 1
            elif winner is not None:
                reward = -1

            # Simplified update for this classic problem
            old_q = agent.get_q_value(record['state'], record['action'])
            agent.q_table[record['state']][record['action']] = old_q + agent.lr * (reward - old_q)

        agent.decay_epsilon()
        if (i + 1) % 1000 == 0:
            print(f"Episode {i+1}/{episodes} completed. Epsilon: {agent.epsilon:.4f}")

    print("\nTraining complete!")
    return agent

# ===================================================================
# 4. PLAY AGAINST THE TRAINED BOT (REVISED LOGIC)
# ===================================================================
def play_with_trained_bot(agent):
    """Allows a human to play against the trained AI with corrected win/loss logic."""
    agent.epsilon = 0 # Set to pure exploitation mode
    game = Hexapawn()

    while True:
        game.print_board()

        current_player = game.player_turn
        opponent = 'b' if current_player == 'w' else 'w'
        possible_moves = game.get_possible_moves(current_player)

        # Check for LOSS (current player has no moves)
        if not possible_moves:
            print(f"Player {current_player.upper()} has no legal moves!")
            print(f"--- Player {opponent.upper()} wins! ---")
            break

        move = None
        if current_player == 'w': # Human player
            print("Your possible moves:")
            for i, move_option in enumerate(possible_moves):
                print(f"{i}: From {move_option[0]} to {move_option[1]}")

            while True:
                try:
                    choice = int(input(f"Choose your move (0-{len(possible_moves)-1}): "))
                    if 0 <= choice < len(possible_moves):
                        move = possible_moves[choice]
                        break
                    else:
                        print("Invalid choice.")
                except ValueError:
                    print("Invalid input.")
        else: # AI player
            print("AI is thinking...")
            state = game.get_state_string()
            move = agent.choose_action(state, possible_moves)
            print(f"AI chose to move from {move[0]} to {move[1]}")

        game.make_move(move)

        # Check for WIN (move just made wins the game)
        # 1. Win by reaching the end
        if (current_player == 'w' and 'w' in game.board[0]) or \
           (current_player == 'b' and 'b' in game.board[2]):
            game.print_board()
            print(f"--- Player {current_player.upper()} wins by reaching the end! ---")
            break

        # 2. Win by trapping opponent is handled by the check at the start of the loop

        game.switch_player()

# ===================================================================
# 5. EXECUTION
# ===================================================================
print("Starting training... (this may take a minute)")
trained_agent = train(20000)

print("\nNow, it's your turn to play against the trained AI.")
print("You are Player 'w' (White). Good luck.")
play_with_trained_bot(trained_agent)

Starting training... (this may take a minute)
Episode 1000/20000 completed. Epsilon: 0.3677
Episode 2000/20000 completed. Epsilon: 0.1352
Episode 3000/20000 completed. Epsilon: 0.0497
Episode 4000/20000 completed. Epsilon: 0.0183
Episode 5000/20000 completed. Epsilon: 0.0100
Episode 6000/20000 completed. Epsilon: 0.0100
Episode 7000/20000 completed. Epsilon: 0.0100
Episode 8000/20000 completed. Epsilon: 0.0100
Episode 9000/20000 completed. Epsilon: 0.0100
Episode 10000/20000 completed. Epsilon: 0.0100
Episode 11000/20000 completed. Epsilon: 0.0100
Episode 12000/20000 completed. Epsilon: 0.0100
Episode 13000/20000 completed. Epsilon: 0.0100
Episode 14000/20000 completed. Epsilon: 0.0100
Episode 15000/20000 completed. Epsilon: 0.0100
Episode 16000/20000 completed. Epsilon: 0.0100
Episode 17000/20000 completed. Epsilon: 0.0100
Episode 18000/20000 completed. Epsilon: 0.0100
Episode 19000/20000 completed. Epsilon: 0.0100
Episode 20000/20000 completed. Epsilon: 0.0100

Training complete!

No

In [10]:
play_with_trained_bot(trained_agent)


  0 1 2
0 b b b
1 . . .
2 w w w

Your possible moves:
0: From (2, 0) to (1, 0)
1: From (2, 1) to (1, 1)
2: From (2, 2) to (1, 2)
Choose your move (0-2): 2

  0 1 2
0 b b b
1 . . w
2 w w .

AI is thinking...
AI chose to move from (0, 1) to (1, 2)

  0 1 2
0 b . b
1 . . b
2 w w .

Your possible moves:
0: From (2, 0) to (1, 0)
1: From (2, 1) to (1, 1)
2: From (2, 1) to (1, 2)
Choose your move (0-2): 2

  0 1 2
0 b . b
1 . . w
2 w . .

AI is thinking...
AI chose to move from (0, 0) to (1, 0)

  0 1 2
0 . . b
1 b . w
2 w . .

Player W has no legal moves!
--- Player B wins! ---


In [12]:
play_with_trained_bot(trained_agent)


  0 1 2
0 b b b
1 . . .
2 w w w

Your possible moves:
0: From (2, 0) to (1, 0)
1: From (2, 1) to (1, 1)
2: From (2, 2) to (1, 2)
Choose your move (0-2): 1

  0 1 2
0 b b b
1 . w .
2 w . w

AI is thinking...
AI chose to move from (0, 0) to (1, 1)

  0 1 2
0 . b b
1 . b .
2 w . w

Your possible moves:
0: From (2, 0) to (1, 0)
1: From (2, 0) to (1, 1)
2: From (2, 2) to (1, 2)
3: From (2, 2) to (1, 1)
Choose your move (0-3): 0

  0 1 2
0 . b b
1 w b .
2 . . w

AI is thinking...
AI chose to move from (1, 1) to (2, 2)

  0 1 2
0 . b b
1 w . .
2 . . b

--- Player B wins by reaching the end! ---


In [16]:
trained_agent.__dict__

{'q_table': defaultdict(<function __main__.QLearningAgent.__init__.<locals>.<lambda>()>,
             {'bbb...www': defaultdict(float,
                          {((2, 1), (1, 1)): -0.8842757511918685,
                           ((2, 0), (1, 0)): -0.9999999999999996,
                           ((2, 2), (1, 2)): -0.9999999999999996}),
              'bbb.w.w.w': defaultdict(float,
                          {((0, 2), (1, 2)): -0.999826454988237,
                           ((0, 0), (1, 1)): 0.9999952189415017,
                           ((0, 2), (1, 1)): 0.7605158876500508,
                           ((0, 0), (1, 0)): -0.9999515710039909}),
              'bb..wbw.w': defaultdict(float,
                          {((2, 0), (1, 0)): 0.7949868905149684,
                           ((1, 1), (0, 0)): 0.9999973843107253}),
              'bb.wwb..w': defaultdict(float,
                          {((0, 0), (1, 1)): -0.6256008079172641,
                           ((0, 1), (1, 0)): -0.7941088679053511})