In [1]:
import numpy as np
import random
import pygame


class TicTacToe:
    def __init__(self):
        self.state_space = self.create_state_space()
        self.q_table = np.zeros((len(self.state_space), 9))
        self.learning_rate = 0.5 # 学习率
        self.discount_factor = 0.5 # 折扣因子
        self.epsilon = 0.1  # 探索因子

    def create_state_space(self):
        return ["".join([" "] * 9)]

    def get_available_actions(self, state):
        return [i for i in range(9) if state[i] == " "]

    def choose_action(self, state):
        if state not in self.state_space:
            self.state_space.append(state)
            self.q_table = np.vstack((self.q_table, np.zeros(9)))

        if random.uniform(0, 1) < self.epsilon:
            return random.choice(self.get_available_actions(state))
        else:
            state_index = self.state_space.index(state)
            return np.argmax(self.q_table[state_index])

    def update_q_value(self, state, action, reward, next_state):
        if next_state not in self.state_space:
            self.state_space.append(next_state)
            self.q_table = np.vstack((self.q_table, np.zeros(9)))

        state_index = self.state_space.index(state)
        next_state_index = self.state_space.index(next_state)
        best_next_action = np.argmax(self.q_table[next_state_index])
        td_target = reward + self.discount_factor * self.q_table[next_state_index][best_next_action]
        self.q_table[state_index][action] += self.learning_rate * (td_target - self.q_table[state_index][action])


class Game:
    def __init__(self):
        self.board = [" "] * 9
        self.current_player = "X"
        self.winner = None

    def reset(self):
        self.board = [" "] * 9
        self.current_player = "X"
        self.winner = None

    def make_move(self, position):
        if self.board[position] == " ":
            self.board[position] = self.current_player
            if self.check_winner():
                self.winner = self.current_player
                return 1
            elif " " not in self.board:
                return 0
            self.current_player = "O" if self.current_player == "X" else "X"
        return None

    def check_winner(self):
        winning_combinations = [
            [0, 1, 2], [3, 4, 5], [6, 7, 8],
            [0, 3, 6], [1, 4, 7], [2, 5, 8],
            [0, 4, 8], [2, 4, 6]
        ]
        for combo in winning_combinations:
            if self.board[combo[0]] == self.board[combo[1]] == self.board[combo[2]] != " ":
                return True
        return False


def draw_board(board):
    for i in range(3):
        for j in range(3):
            pygame.draw.rect(screen, (255, 255, 255), (j * 100, i * 100, 100, 100), 1)
            if board[i * 3 + j] == 'X':
                pygame.draw.line(screen, (255, 0, 0), (j * 100, i * 100), ((j + 1) * 100, (i + 1) * 100), 5)
                pygame.draw.line(screen, (255, 0, 0), ((j + 1) * 100, i * 100), (j * 100, (i + 1) * 100), 5)
            elif board[i * 3 + j] == 'O':
                pygame.draw.circle(screen, (0, 0, 255), (j * 100 + 50, i * 100 + 50), 45, 5)


pygame.init()
screen = pygame.display.set_mode((300, 300))

game = Game()
q_learning = TicTacToe()

running = True
while running:
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            running = False
        elif event.type == pygame.MOUSEBUTTONDOWN and game.winner is None and game.current_player == "X":
            x, y = event.pos
            row = y // 100
            col = x // 100
            position = row * 3 + col
            result = game.make_move(position)
            if result is not None:
                if game.winner:
                    print(f"{game.winner} 获胜！")
                else:
                    print("平局！")
                game.reset()

    if game.current_player == "O":
        state = "".join(game.board)
        action = q_learning.choose_action(state)
        game.make_move(action)

        reward = 1 if game.winner == "O" else -1 if game.winner == "X" else 0
        next_state = "".join(game.board)
        q_learning.update_q_value(state, action, reward, next_state)

        if game.winner:
            print(f"{game.winner} 获胜！")
            game.reset()
        elif " " not in game.board:
            print("平局！")
            game.reset()

    screen.fill((0, 0, 0))
    draw_board(game.board)
    pygame.display.flip()

pygame.quit()


pygame 2.3.0 (SDL 2.24.2, Python 3.9.13)
Hello from the pygame community. https://www.pygame.org/contribute.html
O 获胜！


In [2]:
import pygame

class Gomoku:
    def __init__(self):
        self.board = [" "] * 225  # 15x15棋盘
        self.current_player = "X"  # X为黑棋，O为白棋
        self.winner = None
        self.grid_size = 30        # 每个格子的大小
        self.board_size = self.grid_size * 15  # 棋盘总尺寸

    def reset(self):
        self.board = [" "] * 225
        self.current_player = "X"
        self.winner = None

    def make_move(self, position):
        if self.board[position] == " ":
            self.board[position] = self.current_player
            if self.check_winner(position):
                self.winner = self.current_player
                return 1
            elif " " not in self.board:
                return 0  # 平局
            self.current_player = "O" if self.current_player == "X" else "X"
            return True
        return False

    def check_winner(self, pos):
        row = pos // 15
        col = pos % 15
        player = self.board[pos]
        
        # 转换为二维数组便于检查
        board = [self.board[i*15:(i+1)*15] for i in range(15)]
        
        # 检查四个方向
        directions = [(0, 1),  # 水平
                      (1, 0),  # 垂直
                      (1, 1),  # 主对角线
                      (1, -1)]  # 副对角线
        
        for dr, dc in directions:
            count = 1
            # 正向检查
            r, c = row + dr, col + dc
            while 0 <= r < 15 and 0 <= c < 15 and board[r][c] == player:
                count += 1
                r += dr
                c += dc
            # 反向检查
            r, c = row - dr, col - dc
            while 0 <= r < 15 and 0 <= c < 15 and board[r][c] == player:
                count += 1
                r -= dr
                c -= dc
            if count >= 5:
                return True
        return False

def draw_board(board, screen, grid_size):
    screen.fill((200, 160, 60))  # 木质背景色
    
    # 绘制棋盘线
    for i in range(15):
        pygame.draw.line(screen, (0, 0, 0),
                        (grid_size//2, grid_size//2 + i*grid_size),
                        (grid_size//2 + 14*grid_size, grid_size//2 + i*grid_size))
        pygame.draw.line(screen, (0, 0, 0),
                        (grid_size//2 + i*grid_size, grid_size//2),
                        (grid_size//2 + i*grid_size, grid_size//2 + 14*grid_size))
    
    # 绘制星位标记
    star_points = [(3,3), (3,11), (11,3), (11,11), (7,7)]
    for r, c in star_points:
        pygame.draw.circle(screen, (0,0,0), 
                         (grid_size//2 + c*grid_size, grid_size//2 + r*grid_size), 5)
    
    # 绘制棋子
    for i in range(225):
        row = i // 15
        col = i % 15
        if board[i] == "X":
            pygame.draw.circle(screen, (0, 0, 0), 
                             (col*grid_size + grid_size//2, row*grid_size + grid_size//2), 
                             grid_size//2 - 2)
        elif board[i] == "O":
            pygame.draw.circle(screen, (255, 255, 255), 
                             (col*grid_size + grid_size//2, row*grid_size + grid_size//2), 
                             grid_size//2 - 2)
            pygame.draw.circle(screen, (0, 0, 0), 
                             (col*grid_size + grid_size//2, row*grid_size + grid_size//2), 
                             grid_size//2 - 2, 2)

def main():
    pygame.init()
    grid_size = 30
    screen = pygame.display.set_mode((grid_size*15, grid_size*15))
    pygame.display.set_caption("五子棋")
    
    game = Gomoku()
    font = pygame.font.Font(None, 36)
    
    running = True
    while running:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                running = False
            elif event.type == pygame.MOUSEBUTTONDOWN and not game.winner:
                x, y = pygame.mouse.get_pos()
                col = x // grid_size
                row = y // grid_size
                if 0 <= row < 15 and 0 <= col < 15:
                    game.make_move(row*15 + col)
        
        screen.fill((0, 0, 0))
        draw_board(game.board, screen, grid_size)
        
        if game.winner:
            text = font.render(f"Player {game.winner} 获胜!", True, (255, 0, 0))
            screen.blit(text, (grid_size*15//2 - text.get_width()//2, 
                             grid_size*15//2 - text.get_height()//2))
        
        pygame.display.flip()
    
    pygame.quit()

if __name__ == "__main__":
    main()

In [8]:
import pygame
import numpy as np
import pickle
import random

class Gomoku:
    def __init__(self):
        self.board = [" "] * 225  # 15x15棋盘
        self.current_player = "X"  # X为黑棋，O为白棋
        self.winner = None
        self.grid_size = 30  # 每个格子的大小
        self.board_size = self.grid_size * 15  # 棋盘总尺寸

    def reset(self):
        self.board = [" "] * 225
        self.current_player = "X"
        self.winner = None

    def make_move(self, position):
        if self.board[position] == " ":
            self.board[position] = self.current_player
            if self.check_winner(position):
                self.winner = self.current_player
                return 1  # 胜利
            elif " " not in self.board:
                return 0  # 平局
            else:
                self.current_player = "O" if self.current_player == "X" else "X"
                return True
        return False

    def check_winner(self, pos):
        row = pos // 15
        col = pos % 15
        player = self.board[pos]

        # 转换为二维数组便于检查
        board = [self.board[i * 15:(i + 1) * 15] for i in range(15)]

        # 检查四个方向
        directions = [(0, 1),  # 水平
                      (1, 0),  # 垂直
                      (1, 1),  # 主对角线
                      (1, -1)]  # 副对角线

        for dr, dc in directions:
            count = 1
            # 正向检查
            r, c = row + dr, col + dc
            while 0 <= r < 15 and 0 <= c < 15 and board[r][c] == player:
                count += 1
                r += dr
                c += dc
            # 反向检查
            r, c = row - dr, col - dc
            while 0 <= r < 15 and 0 <= c < 15 and board[r][c] == player:
                count += 1
                r -= dr
                c -= dc
            if count >= 5:
                return True
        return False

    def get_available_moves(self):
        return [i for i in range(225) if self.board[i] == " "]

    def get_state(self):
        return "".join(self.board)

# Q-learning parameters
class QLearningAgent:
    def __init__(self, alpha=0.1, gamma=0.9, epsilon=0.2):
        self.q_table = {}  # 保存 Q 值
        self.alpha = alpha  # 学习率
        self.gamma = gamma  # 折扣因子
        self.epsilon = epsilon  # 探索率

    def get_q_value(self, state, action):
        return self.q_table.get((state, action), 0.0)

    def update_q_value(self, state, action, reward, next_state):
        max_next_q = max([self.get_q_value(next_state, a) for a in range(225)], default=0)
        old_q = self.get_q_value(state, action)
        self.q_table[(state, action)] = old_q + self.alpha * (reward + self.gamma * max_next_q - old_q)

    def choose_action(self, state, available_moves):
        if np.random.rand() < self.epsilon:  # 探索
            return random.choice(available_moves)
        else:  # 利用
            q_values = {action: self.get_q_value(state, action) for action in available_moves}
            max_q = max(q_values.values(), default=0)
            return random.choice([action for action, q in q_values.items() if q == max_q])

# 绘制棋盘
def draw_board(board, screen, grid_size):
    screen.fill((200, 160, 60))  # 木质背景色

    # 绘制棋盘线
    for i in range(15):
        pygame.draw.line(screen, (0, 0, 0),
                         (grid_size // 2, grid_size // 2 + i * grid_size),
                         (grid_size // 2 + 14 * grid_size, grid_size // 2 + i * grid_size))
        pygame.draw.line(screen, (0, 0, 0),
                         (grid_size // 2 + i * grid_size, grid_size // 2),
                         (grid_size // 2 + i * grid_size, grid_size // 2 + 14 * grid_size))

    # 绘制星位标记
    star_points = [(3, 3), (3, 11), (11, 3), (11, 11), (7, 7)]
    for r, c in star_points:
        pygame.draw.circle(screen, (0, 0, 0),
                           (grid_size // 2 + c * grid_size, grid_size // 2 + r * grid_size), 5)

    # 绘制棋子
    for i in range(225):
        row = i // 15
        col = i % 15
        if board[i] == "X":
            pygame.draw.circle(screen, (0, 0, 0),
                               (col * grid_size + grid_size // 2, row * grid_size + grid_size // 2),
                               grid_size // 2 - 2)
        elif board[i] == "O":
            pygame.draw.circle(screen, (255, 255, 255),
                               (col * grid_size + grid_size // 2, row * grid_size + grid_size // 2),
                               grid_size // 2 - 2)
            pygame.draw.circle(screen, (0, 0, 0),
                               (col * grid_size + grid_size // 2, row * grid_size + grid_size // 2),
                               grid_size // 2 - 2, 2)

# 主函数
def main():
    pygame.init()
    grid_size = 30
    screen = pygame.display.set_mode((grid_size * 15, grid_size * 15))
    pygame.display.set_caption("五子棋 Q-learning 人机对战")

    game = Gomoku()
    agent = QLearningAgent()
    font = pygame.font.Font(None, 36)
    running = True

    while running:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                running = False
            elif event.type == pygame.MOUSEBUTTONDOWN and not game.winner:
                x, y = pygame.mouse.get_pos()
                col = x // grid_size
                row = y // grid_size
                if 0 <= row < 15 and 0 <= col < 15:
                    position = row * 15 + col
                    if game.board[position] == " ":
                        game.make_move(position)
                        if not game.winner:  # AI 下棋
                            state = game.get_state()
                            available_moves = game.get_available_moves()
                            ai_move = agent.choose_action(state, available_moves)
                            game.make_move(ai_move)

        screen.fill((0, 0, 0))
        draw_board(game.board, screen, grid_size)

        if game.winner:
            text = font.render(f"Player {game.winner} Wins!", True, (255, 0, 0))
            screen.blit(text, (grid_size * 15 // 2 - text.get_width() // 2,
                               grid_size * 15 // 2 - text.get_height() // 2))

        pygame.display.flip()

    pygame.quit()


if __name__ == "__main__":
    main()

In [2]:
import numpy as np
import random
import pygame

class Gomoku:
    def __init__(self):
        self.board = [" "] * 225  # 15x15棋盘
        self.current_player = "X"  # X为黑棋，O为白棋
        self.winner = None
        self.size = 15  # 棋盘大小

    def reset(self):
        self.board = [" "] * 225
        self.current_player = "X"
        self.winner = None

    def make_move(self, position):
        if self.board[position] == " ":
            self.board[position] = self.current_player
            if self.check_winner(position):
                self.winner = self.current_player
                return 1  # 当前玩家胜利
            elif " " not in self.board:
                return 0  # 平局
            self.current_player = "O" if self.current_player == "X" else "X"
        return None

    def check_winner(self, pos):
        row = pos // self.size
        col = pos % self.size
        player = self.board[pos]
        directions = [
            (0, 1),  # 水平
            (1, 0),  # 垂直
            (1, 1),  # 主对角线
            (1, -1),  # 副对角线
        ]

        for dr, dc in directions:
            count = 1
            # 正向检查
            r, c = row + dr, col + dc
            while 0 <= r < self.size and 0 <= c < self.size and self.board[r * self.size + c] == player:
                count += 1
                r += dr
                c += dc
            # 反向检查
            r, c = row - dr, col - dc
            while 0 <= r < self.size and 0 <= c < self.size and self.board[r * self.size + c] == player:
                count += 1
                r -= dr
                c -= dc
            if count >= 5:
                return True
        return False

    def get_available_moves(self):
        return [i for i in range(225) if self.board[i] == " "]

    def get_state(self):
        return "".join(self.board)


class QLearningAgent:
    def __init__(self, alpha=0.5, gamma=0.9, epsilon=0.1):
        self.q_table = {}  # 使用字典存储 Q 值
        self.alpha = alpha  # 学习率
        self.gamma = gamma  # 折扣因子
        self.epsilon = epsilon  # 探索因子

    def get_q_value(self, state, action):
        return self.q_table.get((state, action), 0.0)

    def update_q_value(self, state, action, reward, next_state, available_actions):
        max_next_q = max([self.get_q_value(next_state, a) for a in available_actions], default=0)
        old_q = self.get_q_value(state, action)
        self.q_table[(state, action)] = old_q + self.alpha * (reward + self.gamma * max_next_q - old_q)

    def choose_action(self, state, available_moves):
        if random.uniform(0, 1) < self.epsilon:  # 探索
            return random.choice(available_moves)
        else:  # 利用
            q_values = {action: self.get_q_value(state, action) for action in available_moves}
            max_q = max(q_values.values(), default=0)
            return random.choice([action for action, q in q_values.items() if q == max_q])


def draw_board(board, screen, grid_size, size):
    screen.fill((200, 160, 60))  # 木质背景色

    # 绘制棋盘线
    for i in range(size):
        pygame.draw.line(screen, (0, 0, 0), (grid_size // 2, grid_size // 2 + i * grid_size),
                         (grid_size // 2 + (size - 1) * grid_size, grid_size // 2 + i * grid_size))
        pygame.draw.line(screen, (0, 0, 0), (grid_size // 2 + i * grid_size, grid_size // 2),
                         (grid_size // 2 + i * grid_size, grid_size // 2 + (size - 1) * grid_size))

    # 绘制棋子
    for i in range(size * size):
        row = i // size
        col = i % size
        if board[i] == "X":
            pygame.draw.circle(screen, (0, 0, 0),
                               (col * grid_size + grid_size // 2, row * grid_size + grid_size // 2),
                               grid_size // 2 - 2)
        elif board[i] == "O":
            pygame.draw.circle(screen, (255, 255, 255),
                               (col * grid_size + grid_size // 2, row * grid_size + grid_size // 2),
                               grid_size // 2 - 2)
            pygame.draw.circle(screen, (0, 0, 0),
                               (col * grid_size + grid_size // 2, row * grid_size + grid_size // 2),
                               grid_size // 2 - 2, 2)


def main():
    pygame.init()
    grid_size = 30
    size = 15
    screen = pygame.display.set_mode((grid_size * size, grid_size * size))
    pygame.display.set_caption("五子棋 Q-learning 人机对战")

    game = Gomoku()
    agent = QLearningAgent()
    font = pygame.font.Font(None, 36)
    running = True

    while running:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                running = False
            elif event.type == pygame.MOUSEBUTTONDOWN and game.winner is None and game.current_player == "X":
                x, y = event.pos
                col = x // grid_size
                row = y // grid_size
                position = row * size + col
                result = game.make_move(position)
                if result is not None:
                    if game.winner:
                        print(f"Player {game.winner} Wins!")
                    else:
                        print("Draw!")
                    game.reset()

        if game.current_player == "O" and game.winner is None:
            state = game.get_state()
            available_moves = game.get_available_moves()
            action = agent.choose_action(state, available_moves)
            result = game.make_move(action)

            reward = 1 if game.winner == "O" else -1 if game.winner == "X" else 0
            next_state = game.get_state()
            next_available_moves = game.get_available_moves()
            agent.update_q_value(state, action, reward, next_state, next_available_moves)

            if game.winner:
                print(f"Player {game.winner} Wins!")
                game.reset()
            elif " " not in game.board:
                print("Draw!")
                game.reset()

        draw_board(game.board, screen, grid_size, size)
        pygame.display.flip()

    pygame.quit()


if __name__ == "__main__":
    main()

Player X Wins!
Player X Wins!
Player X Wins!
Player X Wins!
Player X Wins!
Player X Wins!
Player X Wins!
Player X Wins!
Player X Wins!
Player X Wins!
Player X Wins!
Player X Wins!
Player X Wins!
Player X Wins!
Player X Wins!
Player X Wins!
Player X Wins!
