In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import random
from collections import deque

class Gomoku:
    def __init__(self, size=15):
        self.size = size
        self.board = np.zeros((size, size), dtype=int)
        self.current_player = 1
    
    def reset(self):
        self.board = np.zeros((self.size, self.size), dtype=int)
        self.current_player = 1
        return self.board
    
    def is_valid_move(self, x, y):
        return 0 <= x < self.size and 0 <= y < self.size and self.board[x, y] == 0
    
    def make_move(self, x, y):
        if self.is_valid_move(x, y):
            self.board[x, y] = self.current_player
            self.current_player = 3 - self.current_player
            return True
        return False
    
    def check_winner(self):
        for x in range(self.size):
            for y in range(self.size):
                if self.board[x, y] != 0 and self.check_direction(x, y):
                    return self.board[x, y]
        return 0
    
    def check_direction(self, x, y):
        directions = [(1, 0), (0, 1), (1, 1), (1, -1)]
        for d in directions:
            count = 0
            for i in range(-4, 5):
                nx, ny = x + i * d[0], y + i * d[1]
                if 0 <= nx < self.size and 0 <= ny < self.size and self.board[nx, ny] == self.board[x, y]:
                    count += 1
                    if count == 5:
                        return True
                else:
                    count = 0
        return False

    def evaluate_position(self, player):
        """Evaluate the board from the perspective of the given player."""
        score = 0
        for x in range(self.size):
            for y in range(self.size):
                if self.board[x, y] == player:
                    score += self.evaluate_point(x, y, player)
                elif self.board[x, y] == 3 - player:
                    score -= self.evaluate_point(x, y, 3 - player)
        return score

    def evaluate_point(self, x, y, player):
        """Evaluate a single point from the perspective of the given player."""
        score = 0
        directions = [(1, 0), (0, 1), (1, 1), (1, -1)]
        for d in directions:
            count = 0
            block = 0
            for i in range(-4, 5):
                nx, ny = x + i * d[0], y + i * d[1]
                if 0 <= nx < self.size and 0 <= ny < self.size:
                    if self.board[nx, ny] == player:
                        count += 1
                    elif self.board[nx, ny] != 0:
                        block += 1
                        break
                else:
                    block += 1
            if count == 5:
                score += 10000  # win
            elif count == 4 and block == 0:
                score += 100  # open four
            elif count == 3 and block == 0:
                score += 10  # open three
            elif count == 2 and block == 0:
                score += 1  # open two
        return score

class DQN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

class DQNAgent:
    def __init__(self, state_size, action_size, hidden_size=128, gamma=0.99, lr=0.001, batch_size=64, epsilon=1.0, epsilon_min=0.01, epsilon_decay=0.995):
        self.state_size = state_size
        self.action_size = action_size
        self.hidden_size = hidden_size
        self.gamma = gamma
        self.lr = lr
        self.batch_size = batch_size
        self.epsilon = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        
        self.memory = deque(maxlen=2000)
        self.model = DQN(state_size, hidden_size, action_size)
        self.target_model = DQN(state_size, hidden_size, action_size)  # Add target network
        self.update_target_model()  # Initialize target model
        self.optimizer = optim.Adam(self.model.parameters(), lr=lr)
        self.criterion = nn.MSELoss()
    
    def update_target_model(self):
        self.target_model.load_state_dict(self.model.state_dict())
    
    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))
    
    def act(self, state):
        if random.uniform(0, 1) < self.epsilon:
            return random.choice(self.get_valid_actions(state))
        state = torch.FloatTensor(state).unsqueeze(0)
        q_values = self.model(state)
        valid_actions = self.get_valid_actions(state)
        return valid_actions[torch.argmax(q_values[0][valid_actions]).item()]
    
    def get_valid_actions(self, state):
        if isinstance(state, torch.Tensor):
            state = state.numpy().flatten()  # Convert tensor to numpy array and flatten it
        return [i for i in range(self.action_size) if state[i] == 0]
    
    def replay(self):
        if len(self.memory) < self.batch_size:
            return
        batch = random.sample(self.memory, self.batch_size)
        for state, action, reward, next_state, done in batch:
            state = torch.FloatTensor(state).unsqueeze(0)
            next_state = torch.FloatTensor(next_state).unsqueeze(0)
            target = reward
            if not done:
                target += self.gamma * torch.max(self.target_model(next_state)).item()
            target_f = self.model(state)
            target_f[0][action] = target
            self.optimizer.zero_grad()
            loss = self.criterion(target_f, self.model(state))
            loss.backward()
            self.optimizer.step()
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
    
    def load_model(self, path):
        self.model.load_state_dict(torch.load(path))
    
    def save_model(self, path):
        torch.save(self.model.state_dict(), path)

def train_dqn(agent, env, episodes=100):
    for e in range(episodes):
        state = env.reset().flatten()
        done = False
        while not done:
            action = agent.act(state)
            x, y = divmod(action, env.size)
            if env.is_valid_move(x, y):
                env.make_move(x, y)
                reward = env.evaluate_position(1)
                next_state = env.board.flatten()
                done = env.check_winner() > 0 or (env.board != 0).all()
                agent.remember(state, action, reward, next_state, done)
                state = next_state
                if done:
                    break
                # Opponent's turn (simple heuristic)
                opp_action = random.choice(agent.get_valid_actions(env.board.flatten()))
                opp_x, opp_y = divmod(opp_action, env.size)
                env.make_move(opp_x, opp_y)
                if env.check_winner():
                    reward = -10000  # Large negative reward for losing
                    done = True
            else:
                reward = -100  # Penalty for invalid move
                done = True
            agent.replay()
        agent.update_target_model()  # Update target network periodically
        print(f"Episode {e+1}/{episodes}, epsilon: {agent.epsilon:.2f}")
    agent.save_model('dqn_gomoku.pth')

gomoku_env = Gomoku()
dqn_agent = DQNAgent(state_size=gomoku_env.size*gomoku_env.size, action_size=gomoku_env.size*gomoku_env.size)
train_dqn(dqn_agent, gomoku_env, episodes=100)


Episode 1/100, epsilon: 1.00
Episode 2/100, epsilon: 0.80
Episode 3/100, epsilon: 0.61
Episode 4/100, epsilon: 0.47
Episode 5/100, epsilon: 0.34
Episode 6/100, epsilon: 0.30
Episode 7/100, epsilon: 0.28
Episode 8/100, epsilon: 0.21
Episode 9/100, epsilon: 0.16
Episode 10/100, epsilon: 0.12
Episode 11/100, epsilon: 0.08
Episode 12/100, epsilon: 0.07
Episode 13/100, epsilon: 0.05
Episode 14/100, epsilon: 0.04
Episode 15/100, epsilon: 0.04
Episode 16/100, epsilon: 0.03
Episode 17/100, epsilon: 0.02
Episode 18/100, epsilon: 0.02
Episode 19/100, epsilon: 0.01
Episode 20/100, epsilon: 0.01
Episode 21/100, epsilon: 0.01


In [10]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import random
from collections import deque

class Gomoku:
    def __init__(self, size=5):  # Change the size to 5
        self.size = size
        self.board = np.zeros((size, size), dtype=int)
        self.current_player = 1
    
    def reset(self):
        self.board = np.zeros((self.size, self.size), dtype=int)
        self.current_player = 1
        return self.board
    
    def is_valid_move(self, x, y):
        return 0 <= x < self.size and 0 <= y < self.size and self.board[x, y] == 0
    
    def make_move(self, x, y):
        if self.is_valid_move(x, y):
            self.board[x, y] = self.current_player
            self.current_player = 3 - self.current_player
            return True
        return False
    
    def check_winner(self):
        for x in range(self.size):
            for y in range(self.size):
                if self.board[x, y] != 0 and self.check_direction(x, y):
                    return self.board[x, y]
        return 0
    
    def check_direction(self, x, y):
        directions = [(1, 0), (0, 1), (1, 1), (1, -1)]
        for d in directions:
            count = 0
            for i in range(-4, 5):
                nx, ny = x + i * d[0], y + i * d[1]
                if 0 <= nx < self.size and 0 <= ny < self.size and self.board[nx, ny] == self.board[x, y]:
                    count += 1
                    if count == 5:
                        return True
                else:
                    count = 0
        return False

    def evaluate_position(self, player):
        """Evaluate the board from the perspective of the given player."""
        score = 0
        for x in range(self.size):
            for y in range(self.size):
                if self.board[x, y] == player:
                    score += self.evaluate_point(x, y, player)
                elif self.board[x, y] == 3 - player:
                    score -= self.evaluate_point(x, y, 3 - player)
        return score

    def evaluate_point(self, x, y, player):
        """Evaluate a single point from the perspective of the given player."""
        score = 0
        directions = [(1, 0), (0, 1), (1, 1), (1, -1)]
        for d in directions:
            count = 0
            block = 0
            for i in range(-4, 5):
                nx, ny = x + i * d[0], y + i * d[1]
                if 0 <= nx < self.size and 0 <= ny < self.size:
                    if self.board[nx, ny] == player:
                        count += 1
                    elif self.board[nx, ny] != 0:
                        block += 1
                        break
                else:
                    block += 1
            if count == 5:
                score += 10000  # win
            elif count == 4 and block == 0:
                score += 3000  # open four
            elif count == 3 and block == 0:
                score += 500  # open three
            elif count == 2 and block == 0:
                score += 1  # open two
        return score

class DQN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

class DQNAgent:
    def __init__(self, state_size, action_size, hidden_size=128, gamma=0.99, lr=0.001, batch_size=64, epsilon=1.0, epsilon_min=0.01, epsilon_decay=0.995):
        self.state_size = state_size
        self.action_size = action_size
        self.hidden_size = hidden_size
        self.gamma = gamma
        self.lr = lr
        self.batch_size = batch_size
        self.epsilon = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        
        self.memory = deque(maxlen=2000)
        self.model = DQN(state_size, hidden_size, action_size)
        self.target_model = DQN(state_size, hidden_size, action_size)  # Add target network
        self.update_target_model()  # Initialize target model
        self.optimizer = optim.Adam(self.model.parameters(), lr=lr)
        self.criterion = nn.MSELoss()
    
    def update_target_model(self):
        self.target_model.load_state_dict(self.model.state_dict())
    
    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))
    
    def act(self, state):
        if random.uniform(0, 1) < self.epsilon:
            return random.choice(self.get_valid_actions(state))
        state = torch.FloatTensor(state).unsqueeze(0)
        q_values = self.model(state)
        valid_actions = self.get_valid_actions(state)
        return valid_actions[torch.argmax(q_values[0][valid_actions]).item()]
    
    def get_valid_actions(self, state):
        if isinstance(state, torch.Tensor):
            state = state.numpy().flatten()  # Convert tensor to numpy array and flatten it
        return [i for i in range(self.action_size) if state[i] == 0]
    
    def replay(self):
        if len(self.memory) < self.batch_size:
            return
        batch = random.sample(self.memory, self.batch_size)
        for state, action, reward, next_state, done in batch:
            state = torch.FloatTensor(state).unsqueeze(0)
            next_state = torch.FloatTensor(next_state).unsqueeze(0)
            target = reward
            if not done:
                target += self.gamma * torch.max(self.target_model(next_state)).item()
            target_f = self.model(state)
            target_f[0][action] = target
            self.optimizer.zero_grad()
            loss = self.criterion(target_f, self.model(state))
            loss.backward()
            self.optimizer.step()
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
    
    def load_model(self, path):
        self.model.load_state_dict(torch.load(path))
    
    def save_model(self, path):
        torch.save(self.model.state_dict(), path)

def train_dqn(agent, env, episodes=100):
    for e in range(episodes):
        state = env.reset().flatten()
        done = False
        while not done:
            action = agent.act(state)
            x, y = divmod(action, env.size)
            if env.is_valid_move(x, y):
                env.make_move(x, y)
                reward = env.evaluate_position(1)
                next_state = env.board.flatten()
                done = env.check_winner() > 0 or (env.board != 0).all()
                agent.remember(state, action, reward, next_state, done)
                state = next_state
                if done:
                    break
                # Opponent's turn (simple heuristic)
                opp_action = random.choice(agent.get_valid_actions(torch.FloatTensor(env.board.flatten())))
                opp_x, opp_y = divmod(opp_action, env.size)
                env.make_move(opp_x, opp_y)
                if env.check_winner():
                    reward = -10000  # Large negative reward for losing
                    done = True
            else:
                reward = -100  # Penalty for invalid move
                done = True
            agent.replay()
        agent.update_target_model()  # Update target network periodically
        print(f"Episode {e+1}/{episodes}, epsilon: {agent.epsilon:.2f}")
    agent.save_model('dqn_gomoku.pth')

gomoku_env = Gomoku(size=5)  # Set the size to 5x5
dqn_agent = DQNAgent(state_size=gomoku_env.size*gomoku_env.size, action_size=gomoku_env.size*gomoku_env.size)
train_dqn(dqn_agent, gomoku_env, episodes=500)


Episode 1/500, epsilon: 1.00
Episode 2/500, epsilon: 1.00
Episode 3/500, epsilon: 1.00
Episode 4/500, epsilon: 1.00
Episode 5/500, epsilon: 1.00
Episode 6/500, epsilon: 1.00
Episode 7/500, epsilon: 1.00
Episode 8/500, epsilon: 1.00
Episode 9/500, epsilon: 0.98
Episode 10/500, epsilon: 0.94
Episode 11/500, epsilon: 0.93
Episode 12/500, epsilon: 0.88
Episode 13/500, epsilon: 0.85
Episode 14/500, epsilon: 0.80
Episode 15/500, epsilon: 0.76
Episode 16/500, epsilon: 0.72
Episode 17/500, epsilon: 0.69
Episode 18/500, epsilon: 0.64
Episode 19/500, epsilon: 0.61
Episode 20/500, epsilon: 0.58
Episode 21/500, epsilon: 0.55
Episode 22/500, epsilon: 0.52
Episode 23/500, epsilon: 0.51
Episode 24/500, epsilon: 0.49
Episode 25/500, epsilon: 0.47
Episode 26/500, epsilon: 0.46
Episode 27/500, epsilon: 0.45
Episode 28/500, epsilon: 0.44
Episode 29/500, epsilon: 0.41
Episode 30/500, epsilon: 0.40
Episode 31/500, epsilon: 0.39
Episode 32/500, epsilon: 0.38
Episode 33/500, epsilon: 0.36
Episode 34/500, eps

Episode 269/500, epsilon: 0.01
Episode 270/500, epsilon: 0.01
Episode 271/500, epsilon: 0.01
Episode 272/500, epsilon: 0.01
Episode 273/500, epsilon: 0.01
Episode 274/500, epsilon: 0.01
Episode 275/500, epsilon: 0.01
Episode 276/500, epsilon: 0.01
Episode 277/500, epsilon: 0.01
Episode 278/500, epsilon: 0.01
Episode 279/500, epsilon: 0.01
Episode 280/500, epsilon: 0.01
Episode 281/500, epsilon: 0.01
Episode 282/500, epsilon: 0.01
Episode 283/500, epsilon: 0.01
Episode 284/500, epsilon: 0.01
Episode 285/500, epsilon: 0.01
Episode 286/500, epsilon: 0.01
Episode 287/500, epsilon: 0.01
Episode 288/500, epsilon: 0.01
Episode 289/500, epsilon: 0.01
Episode 290/500, epsilon: 0.01
Episode 291/500, epsilon: 0.01
Episode 292/500, epsilon: 0.01
Episode 293/500, epsilon: 0.01
Episode 294/500, epsilon: 0.01
Episode 295/500, epsilon: 0.01
Episode 296/500, epsilon: 0.01
Episode 297/500, epsilon: 0.01
Episode 298/500, epsilon: 0.01
Episode 299/500, epsilon: 0.01
Episode 300/500, epsilon: 0.01
Episode 

In [11]:
import tkinter as tk
class GomokuApp:
    def __init__(self, root, size=5):
        self.root = root
        self.size = size
        self.gomoku = Gomoku(size)
        self.agent = DQNAgent(state_size=size*size, action_size=size*size)
        self.agent.load_model('dqn_gomoku.pth')  # Uncomment if the model is already trained
        self.canvas = tk.Canvas(root, width=500, height=500)
        self.canvas.pack()
        self.canvas.bind("<Button-1>", self.on_click)
        self.draw_board()
        self.reset_game()
    
    def reset_game(self):
        self.gomoku.reset()
        self.update_canvas()
    
    def draw_board(self):
        for i in range(self.size):
            self.canvas.create_line(50 + i * 80, 50, 50 + i * 80, 450)
            self.canvas.create_line(50, 50 + i * 80, 450, 50 + i * 80)
    
    def update_canvas(self):
        self.canvas.delete("piece")
        for x in range(self.size):
            for y in range(self.size):
                if self.gomoku.board[x, y] == 1:
                    self.canvas.create_oval(50 + x * 80 - 30, 50 + y * 80 - 30, 50 + x * 80 + 30, 50 + y * 80 + 30, fill="black", tags="piece")
                elif self.gomoku.board[x, y] == 2:
                    self.canvas.create_oval(50 + x * 80 - 30, 50 + y * 80 - 30, 50 + x * 80 + 30, 50 + y * 80 + 30, fill="red", tags="piece")
    
    def on_click(self, event):
        x, y = (event.x - 50) // 80, (event.y - 50) // 80
        if 0 <= x < self.size and 0 <= y < self.size and self.gomoku.is_valid_move(x, y):
            self.gomoku.make_move(x, y)
            self.update_canvas()
            if self.gomoku.check_winner():
                winner = self.gomoku.check_winner()
                print(f"Player {winner} wins!")
                self.reset_game()
                return
            self.agent_move()
    
    def agent_move(self):
        state = self.gomoku.board.flatten()
        action = self.agent.act(state)
        x, y = divmod(action, self.size)
        if self.gomoku.is_valid_move(x, y):
            self.gomoku.make_move(x, y)
            self.update_canvas()
            if self.gomoku.check_winner():
                winner = self.gomok
                u.check_winner()
                print(f"Player {winner} wins!")
                self.reset_game()
if __name__ == "__main__":
    root = tk.Tk()
    app = GomokuApp(root)
    root.mainloop()


In [None]:
if __name__ == "__main__":
    root = tk.Tk()
    app = GomokuApp(root)
    root.mainloop()
