In [6]:
import chess
import torch
def board_to_tensor(board):
    board_tensor = torch.zeros(64, dtype=torch.float32)
    for square in chess.SQUARES:
        piece = board.piece_at(square)
        if piece:
            board_tensor[square] = piece.piece_type if piece.color == chess.WHITE else -piece.piece_type
    return board_tensor

def get_coordinates(str):
    col = ord(str[0])-97
    row = int(str[1]) - 1
    return row, col

def get_row_col(square):
    row = square // 8    # Rank (0 is the first row)
    col = square % 8     # File (0 is the first column)
    return row, col

def encode_move(move):
    from_square = move.from_square
    to_square = move.to_square
    
    #start_row, start_col = get_coordinates(from_square)
    #end_row, end_col = get_coordinates(to_square)
    start_row, start_col = get_row_col(from_square)
    end_row, end_col = get_row_col(to_square)

    return (8*64*(start_row) + 64*(start_col) + 8*(end_row)+end_col) + 1

    
board = chess.Board()
print(board)
legal_moves = list(board.legal_moves)
for move in legal_moves:
    from_square = move.from_square
    to_square = move.to_square
    piece = board.piece_at(from_square)
    #print(f"Move {board.san(move)}:")
    #print(f"  Piece: {piece}")
    #print(f"  From: {chess.square_name(from_square)}")
    #print(get_coordinates(chess.square_name(from_square)))
    #print(f"  To: {chess.square_name(to_square)}")
    
print(board_to_tensor(board))

ModuleNotFoundError: No module named 'chess'

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

class DQN(nn.Module):
    def __init__(self):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(64, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, 4096)

    def forward(self, x, turn):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

q_network = DQN()
target_network = DQN()
target_network.load_state_dict(q_network.state_dict())

optimizer = optim.Adam(q_network.parameters())
criterion = nn.MSELoss()

#replay_buffer = ReplayBuffer(10000)


ModuleNotFoundError: No module named 'torch'

In [None]:
import random
from collections import deque

# Experience replay buffer
class ReplayBuffer:
    def __init__(self, capacity):
        self.buffer = deque(maxlen=capacity)

    def add(self, turn, state, action, reward, next_state, done):
        self.buffer.append((turn, state, action, reward, next_state, done))

    def sample(self, batch_size):
        return random.sample(self.buffer, batch_size)

    def size(self):
        return len(self.buffer)

replay_buffer = ReplayBuffer(10000)


def select_action(board, state, q_network, turn, epsilon):
    legal_moves = list(board.legal_moves)
    if random.random() < epsilon:
        # Exploration
        move = random.choice(legal_moves)
        print("random legal move")
        print(move)
        return encode_move(move)
    else:
        # Exploitation
        print("using predicted move")
        state_tensor = torch.tensor(state, dtype=torch.float32).unsqueeze(0)
        q_values = q_network(state_tensor)
        
        # Create a mask to zero out illegal moves
        #mask = torch.zeros_like(q_values)
        #legal_moves_indices = [env.move_to_index(move) for move in legal_moves]
        #mask[legal_moves_indices] = 1
        
        # Apply the mask to the Q-values
        #masked_q_values = q_values * mask
        
        # Select the move with the highest masked Q-value
        best_move_index = torch.argmax(q_values).item() + 1
        
        
        return best_move_index


def train(replay_buffer, batch_size, gamma):
    if replay_buffer.size() < batch_size:
        return
    batch = replay_buffer.sample(batch_size)
    turns, states, actions, rewards, next_states, dones = zip(*batch)

    
    states = torch.stack(states)
    actions = torch.tensor(actions, dtype=torch.int64).unsqueeze(1)
    rewards = torch.tensor(rewards, dtype=torch.float32)
    next_states = torch.stack(next_states)
    dones = torch.tensor(dones, dtype=torch.float32)

    q_values = q_network(states).gather(1,actions).squeeze(1)
    next_q_values = target_network(next_states).max(1)[0]
    expected_q_values = rewards + gamma * next_q_values * (1 - dones)
    
    
    loss = criterion(q_values, expected_q_values.detach())
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

turn = 1
num_episodes = 1000
batch_size = 32
gamma = 0.99
epsilon_start = 1.0
epsilon_end = 0.01
epsilon_decay = 500

for episode in range(num_episodes):
    board = chess.Board()
    state = board_to_tensor(board)
    total_reward = 0
    done = False

    while not done:
        reward = 0
        epsilon = epsilon_end + (epsilon_start - epsilon_end) * torch.exp(torch.tensor(-1. * episode / epsilon_decay)).item()

        
        best_move_index = select_action(board, state, q_network, turn, epsilon)
        
        start_square = (best_move_index//64)
        end_square = (best_move_index % 64) -1
        move = chess.Move(start_square, end_square)
        #print(move)
        
        
        board.push(move)
        print(board)
        action = best_move_index
        
        next_state = board_to_tensor(board)
        
        if board.is_game_over():
            if board.result() == "1-0":
                reward = 100 * turn
            elif board.result() == "0-1":
                reward = -100 * turn
            done = True
        
        replay_buffer.add(turn, state, action, reward, next_state, done)
        state = next_state
        total_reward += reward
        
        train(replay_buffer, batch_size, gamma)
        turn = turn * -1
    if episode % 10 == 0:
        target_network.load_state_dict(q_network.state_dict())
        print(f"Episode {episode}, Total Reward: {total_reward}")
