In [1]:
import numpy as np

class GomokuEnv:
    def __init__(self, board_size=5):
        self.board_size = board_size
        self.reset()

    def reset(self):
        self.board = np.zeros((self.board_size, self.board_size), dtype=int)
        self.current_player = 1
        return self.board.flatten()

    def step(self, action):
        row, col = divmod(action, self.board_size)
        if self.board[row, col] != 0:
            return self.board.flatten(), -10, True, {}  # illegal move penalty

        self.board[row, col] = self.current_player
        if self.check_win(row, col):
            return self.board.flatten(), 10, True, {}  # win reward

        if np.all(self.board != 0):
            return self.board.flatten(), 0, True, {}  # draw

        self.current_player = 3 - self.current_player  # switch player
        return self.board.flatten(), 0, False, {}

    def check_win(self, row, col):
        player = self.board[row, col]
        directions = [(1, 0), (0, 1), (1, 1), (1, -1)]
        for dr, dc in directions:
            count = 1
            for i in range(1, 5):
                r, c = row + dr * i, col + dc * i
                if 0 <= r < self.board_size and 0 <= c < self.board_size and self.board[r, c] == player:
                    count += 1
                else:
                    break
            for i in range(1, 5):
                r, c = row - dr * i, col - dc * i
                if 0 <= r < self.board_size and 0 <= c < self.board_size and self.board[r, c] == player:
                    count += 1
                else:
                    break
            if count >= 5:
                return True
        return False

    def available_actions(self):
        return [i for i in range(self.board_size * self.board_size) if self.board.flat[i] == 0]

class QLearningAgent:
    def __init__(self, state_size, action_size, learning_rate=0.1, discount_factor=0.9, exploration_rate=1.0, exploration_decay=0.995):
        self.state_size = state_size
        self.action_size = action_size
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate
        self.exploration_decay = exploration_decay
        self.q_table = {}

    def get_state(self, board):
        return tuple(board)

    def choose_action(self, state, available_actions):
        if np.random.rand() < self.exploration_rate:
            return np.random.choice(available_actions)
        if state not in self.q_table:
            self.q_table[state] = np.zeros(self.action_size)
        return available_actions[np.argmax([self.q_table[state][a] for a in available_actions])]

    def learn(self, state, action, reward, next_state, done):
        if state not in self.q_table:
            self.q_table[state] = np.zeros(self.action_size)
        if next_state not in self.q_table:
            self.q_table[next_state] = np.zeros(self.action_size)
        q_predict = self.q_table[state][action]
        q_target = reward + self.discount_factor * np.max(self.q_table[next_state]) * (1 - done)
        self.q_table[state][action] += self.learning_rate * (q_target - q_predict)

    def update_exploration_rate(self):
        self.exploration_rate *= self.exploration_decay

def train_gomoku():
    board_size = 5
    env = GomokuEnv(board_size)
    agent = QLearningAgent(state_size=board_size * board_size, action_size=board_size * board_size)
    
    num_episodes = 1000
    max_steps_per_episode = board_size * board_size

    for episode in range(num_episodes):
        state = env.reset()
        done = False
        while not done:
            available_actions = env.available_actions()
            action = agent.choose_action(agent.get_state(state), available_actions)
            next_state, reward, done, _ = env.step(action)
            agent.learn(agent.get_state(state), action, reward, agent.get_state(next_state), done)
            state = next_state
        agent.update_exploration_rate()

    return agent, env


In [3]:
pip install pygame

Could not fetch URL https://pypi.org/simple/pygame/: There was a problem confirming the ssl certificate: HTTPSConnectionPool(host='pypi.org', port=443): Max retries exceeded with url: /simple/pygame/ (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:1125)'))) - skipping
Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement pygame (from versions: none)
ERROR: No matching distribution found for pygame


In [2]:
import pygame
import sys

# Pygame初始化
pygame.init()

# 定义颜色
WHITE = (255, 255, 255)
BLACK = (0, 0, 0)
RED = (255, 0, 0)
BLUE = (0, 0, 255)

# 屏幕尺寸
SCREEN_SIZE = 500
GRID_SIZE = 5
CELL_SIZE = SCREEN_SIZE // GRID_SIZE

# 创建屏幕对象
screen = pygame.display.set_mode((SCREEN_SIZE, SCREEN_SIZE))
pygame.display.set_caption('Gomoku')

def draw_board(board):
    screen.fill(WHITE)
    for x in range(GRID_SIZE):
        for y in range(GRID_SIZE):
            rect = pygame.Rect(x * CELL_SIZE, y * CELL_SIZE, CELL_SIZE, CELL_SIZE)
            pygame.draw.rect(screen, BLACK, rect, 1)
            if board[y][x] == 1:
                pygame.draw.circle(screen, RED, rect.center, CELL_SIZE // 3)
            elif board[y][x] == 2:
                pygame.draw.circle(screen, BLUE, rect.center, CELL_SIZE // 3)
    pygame.display.flip()

def get_grid_position(mouse_pos):
    x, y = mouse_pos
    return y // CELL_SIZE, x // CELL_SIZE

def main():
    agent, env = train_gomoku()
    board = env.board
    draw_board(board)
    running = True
    human_turn = True

    while running:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                sys.exit()

            if event.type == pygame.MOUSEBUTTONDOWN and human_turn:
                mouse_pos = pygame.mouse.get_pos()
                row, col = get_grid_position(mouse_pos)
                action = row * GRID_SIZE + col
                if board[row][col] == 0:
                    state, reward, done, _ = env.step(action)
                    draw_board(env.board)
                    human_turn = False

                    if done:
                        print("Game Over!")
                        pygame.time.wait(2000)
                        board = env.reset()
                        draw_board(board)
                        human_turn = True

        if not human_turn:
            state = env.board.flatten()
            available_actions = env.available_actions()
            action = agent.choose_action(agent.get_state(state), available_actions)
            state, reward, done, _ = env.step(action)
            draw_board(env.board)
            human_turn = True

            if done:
                print("Game Over!")
                pygame.time.wait(2000)
                board = env.reset()
                draw_board(board)
                human_turn = True

if __name__ == '__main__':
    main()


ModuleNotFoundError: No module named 'pygame'

In [4]:
import numpy as np

class GomokuEnv:
    def __init__(self, board_size=5):
        self.board_size = board_size
        self.reset()

    def reset(self):
        self.board = np.zeros((self.board_size, self.board_size), dtype=int)
        self.current_player = 1
        return self.board.flatten()

    def step(self, action):
        row, col = divmod(action, self.board_size)
        if self.board[row, col] != 0:
            return self.board.flatten(), -10, True, {}  # illegal move penalty

        self.board[row, col] = self.current_player
        if self.check_win(row, col):
            return self.board.flatten(), 10, True, {}  # win reward

        if np.all(self.board != 0):
            return self.board.flatten(), 0, True, {}  # draw

        self.current_player = 3 - self.current_player  # switch player
        return self.board.flatten(), 0, False, {}

    def check_win(self, row, col):
        player = self.board[row, col]
        directions = [(1, 0), (0, 1), (1, 1), (1, -1)]
        for dr, dc in directions:
            count = 1
            for i in range(1, 5):
                r, c = row + dr * i, col + dc * i
                if 0 <= r < self.board_size and 0 <= c < self.board_size and self.board[r, c] == player:
                    count += 1
                else:
                    break
            for i in range(1, 5):
                r, c = row - dr * i, col - dc * i
                if 0 <= r < self.board_size and 0 <= c < self.board_size and self.board[r, c] == player:
                    count += 1
                else:
                    break
            if count >= 5:
                return True
        return False

    def available_actions(self):
        return [i for i in range(self.board_size * self.board_size) if self.board.flat[i] == 0]

class QLearningAgent:
    def __init__(self, state_size, action_size, learning_rate=0.1, discount_factor=0.9, exploration_rate=1.0, exploration_decay=0.995):
        self.state_size = state_size
        self.action_size = action_size
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate
        self.exploration_decay = exploration_decay
        self.q_table = {}

    def get_state(self, board):
        return tuple(board)

    def choose_action(self, state, available_actions):
        if np.random.rand() < self.exploration_rate:
            return np.random.choice(available_actions)
        if state not in self.q_table:
            self.q_table[state] = np.zeros(self.action_size)
        return available_actions[np.argmax([self.q_table[state][a] for a in available_actions])]

    def learn(self, state, action, reward, next_state, done):
        if state not in self.q_table:
            self.q_table[state] = np.zeros(self.action_size)
        if next_state not in self.q_table:
            self.q_table[next_state] = np.zeros(self.action_size)
        q_predict = self.q_table[state][action]
        q_target = reward + self.discount_factor * np.max(self.q_table[next_state]) * (1 - done)
        self.q_table[state][action] += self.learning_rate * (q_target - q_predict)

    def update_exploration_rate(self):
        self.exploration_rate *= self.exploration_decay

def train_gomoku():
    board_size = 5
    env = GomokuEnv(board_size)
    agent = QLearningAgent(state_size=board_size * board_size, action_size=board_size * board_size)
    
    num_episodes = 1000
    max_steps_per_episode = board_size * board_size

    for episode in range(num_episodes):
        state = env.reset()
        done = False
        while not done:
            available_actions = env.available_actions()
            action = agent.choose_action(agent.get_state(state), available_actions)
            next_state, reward, done, _ = env.step(action)
            agent.learn(agent.get_state(state), action, reward, agent.get_state(next_state), done)
            state = next_state
        agent.update_exploration_rate()

    return agent, env


In [5]:
import tkinter as tk
from tkinter import messagebox

class GomokuGUI:
    def __init__(self, master, agent, env):
        self.master = master
        self.agent = agent
        self.env = env
        self.board_size = env.board_size
        self.board = env.board
        self.current_player = 1
        self.buttons = [[None for _ in range(self.board_size)] for _ in range(self.board_size)]
        self.create_widgets()

    def create_widgets(self):
        for row in range(self.board_size):
            for col in range(self.board_size):
                button = tk.Button(self.master, width=4, height=2, command=lambda r=row, c=col: self.player_move(r, c))
                button.grid(row=row, column=col)
                self.buttons[row][col] = button

    def player_move(self, row, col):
        action = row * self.board_size + col
        if self.board[row, col] == 0:
            self.update_board(action, self.current_player)
            state, reward, done, _ = self.env.step(action)
            self.update_buttons()
            if done:
                messagebox.showinfo("Game Over", f"Player {self.current_player} wins!")
                self.reset_game()
                return
            self.current_player = 3 - self.current_player  # switch player
            self.agent_move()

    def agent_move(self):
        state = self.env.board.flatten()
        available_actions = self.env.available_actions()
        action = self.agent.choose_action(self.agent.get_state(state), available_actions)
        self.update_board(action, self.current_player)
        state, reward, done, _ = self.env.step(action)
        self.update_buttons()
        if done:
            messagebox.showinfo("Game Over", f"Player {self.current_player} wins!")
            self.reset_game()
        self.current_player = 3 - self.current_player  # switch player

    def update_board(self, action, player):
        row, col = divmod(action, self.board_size)
        self.board[row, col] = player

    def update_buttons(self):
        symbols = {0: " ", 1: "X", 2: "O"}
        for row in range(self.board_size):
            for col in range(self.board_size):
                self.buttons[row][col].config(text=symbols[self.board[row, col]])

    def reset_game(self):
        self.board = self.env.reset().reshape(self.board_size, self.board_size)
        self.current_player = 1
        self.update_buttons()

if __name__ == "__main__":
    agent, env = train_gomoku()

    root = tk.Tk()
    root.title("Gomoku")
    app = GomokuGUI(root, agent, env)
    root.mainloop()


In [6]:
import numpy as np

class GomokuEnv:
    def __init__(self, board_size=5):
        self.board_size = board_size
        self.reset()

    def reset(self):
        self.board = np.zeros((self.board_size, self.board_size), dtype=int)
        self.current_player = 1
        return self.board.flatten()

    def step(self, action):
        row, col = divmod(action, self.board_size)
        if self.board[row, col] != 0:
            return self.board.flatten(), -10, True, {}  # illegal move penalty

        self.board[row, col] = self.current_player
        if self.check_win(row, col):
            return self.board.flatten(), 10, True, {}  # win reward

        if np.all(self.board != 0):
            return self.board.flatten(), 0, True, {}  # draw

        self.current_player = 3 - self.current_player  # switch player
        return self.board.flatten(), 0, False, {}

    def check_win(self, row, col):
        player = self.board[row, col]
        directions = [(1, 0), (0, 1), (1, 1), (1, -1)]
        for dr, dc in directions:
            count = 1
            for i in range(1, 5):
                r, c = row + dr * i, col + dc * i
                if 0 <= r < self.board_size and 0 <= c < self.board_size and self.board[r, c] == player:
                    count += 1
                else:
                    break
            for i in range(1, 5):
                r, c = row - dr * i, col - dc * i
                if 0 <= r < self.board_size and 0 <= c < self.board_size and self.board[r, c] == player:
                    count += 1
                else:
                    break
            if count >= 5:
                return True
        return False

    def available_actions(self):
        return [i for i in range(self.board_size * self.board_size) if self.board.flat[i] == 0]


In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
import random
from collections import deque

class DQN(nn.Module):
    def __init__(self, state_size, action_size):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_size, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, action_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # discount rate
        self.epsilon = 1.0  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = DQN(state_size, action_size)
        self.target_model = DQN(state_size, action_size)
        self.update_target_model()
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
        self.criterion = nn.MSELoss()

    def update_target_model(self):
        self.target_model.load_state_dict(self.model.state_dict())

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.choice(np.flatnonzero(state == 0))
        state = torch.FloatTensor(state).unsqueeze(0)
        act_values = self.model(state)
        available_actions = np.flatnonzero(state.numpy().flatten() == 0)
        act_values = act_values[0][available_actions]
        return available_actions[torch.argmax(act_values).item()]

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            state = torch.FloatTensor(state)
            next_state = torch.FloatTensor(next_state)
            target = reward
            if not done:
                target += self.gamma * torch.max(self.target_model(next_state)).item()
            target_f = self.model(state).detach().numpy()
            target_f[action] = target
            target_f = torch.FloatTensor(target_f)
            self.optimizer.zero_grad()
            output = self.model(state)
            loss = self.criterion(output, target_f.unsqueeze(0))
            loss.backward()
            self.optimizer.step()
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):
        self.model.load_state_dict(torch.load(name))

    def save(self, name):
        torch.save(self.model.state_dict(), name)

def train_dqn(board_size=5, episodes=1000):
    env = GomokuEnv(board_size)
    state_size = board_size * board_size
    action_size = state_size
    agent = DQNAgent(state_size, action_size)
    done = False
    batch_size = 32

    for e in range(episodes):
        state = env.reset()
        state = np.reshape(state, [1, state_size])
        for time in range(500):
            action = agent.act(state)
            next_state, reward, done, _ = env.step(action)
            next_state = np.reshape(next_state, [1, state_size])
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            if done:
                agent.update_target_model()
                break
            if len(agent.memory) > batch_size:
                agent.replay(batch_size)
        print(f"Episode {e+1}/{episodes}")

    return agent, env


In [11]:
import tkinter as tk
from tkinter import messagebox

class GomokuGUI:
    def __init__(self, master, agent, env):
        self.master = master
        self.agent = agent
        self.env = env
        self.board_size = env.board_size
        self.board = env.board
        self.current_player = 1
        self.buttons = [[None for _ in range(self.board_size)] for _ in range(self.board_size)]
        self.create_widgets()

    def create_widgets(self):
        for row in range(self.board_size):
            for col in range(self.board_size):
                button = tk.Button(self.master, width=4, height=2, command=lambda r=row, c=col: self.player_move(r, c))
                button.grid(row=row, column=col)
                self.buttons[row][col] = button

    def player_move(self, row, col):
        action = row * self.board_size + col
        if self.board[row, col] == 0:
            self.update_board(action, self.current_player)
            state, reward, done, _ = self.env.step(action)
            self.update_buttons()
            if done:
                messagebox.showinfo("Game Over", f"Player {self.current_player} wins!")
                self.reset_game()
                return
            self.current_player = 3 - self.current_player  # switch player
            self.agent_move()

    def agent_move(self):
        state = self.env.board.flatten()
        available_actions = self.env.available_actions()
        action = self.agent.act(state)
        self.update_board(action, self.current_player)
        state, reward, done, _ = self.env.step(action)
        self.update_buttons()
        if done:
            messagebox.showinfo("Game Over", f"Player {self.current_player} wins!")
            self.reset_game()
        self.current_player = 3 - self.current_player  # switch player

    def update_board(self, action, player):
        row, col = divmod(action, self.board_size)
        self.board[row, col] = player

    def update_buttons(self):
        symbols = {0: " ", 1: "X", 2: "O"}
        for row in range(self.board_size):
            for col in range(self.board_size):
                self.buttons[row][col].config(text=symbols[self.board[row, col]])

    def reset_game(self):
        self.board = self.env.reset().reshape(self.board_size, self.board_size)
        self.current_player = 1
        self.update_buttons()

if __name__ == "__main__":
    agent, env = train_dqn()

    root = tk.Tk()
    root.title("Gomoku")
    app = GomokuGUI(root, agent, env)
    root.mainloop()


Episode 1/1000


IndexError: index 16 is out of bounds for axis 0 with size 1