In [1]:
import numpy as np
import random
import torch
import torch.nn as nn
import torch.optim as optim

In [115]:
class PolicyNetwork(nn.Module):
    def __init__(self, board_size):
        super(PolicyNetwork, self).__init__()
        self.board_size = board_size
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)    # First convolutional layer with input channels=1 and output channels=32
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)   # Second convolutional layer with input channels=32 and output channels=64
        self.fc1 = nn.Linear(64 * (board_size ** 2), 256)                    # Fully connected layer with input features= flatten output of conv2 which is 64*(board_size^2) and output features=256; original code was (prev)//4
        self.fc2 = nn.Linear(256, board_size ** 2)                           # Fully connected layer with input features=256 and output features=board_size^2

    def forward(self, x):
        x = torch.relu(self.conv1(x))        # Apply ReLU activation to the output of the first convolutional layer
        x = torch.relu(self.conv2(x))        # Apply ReLU activation to the output of the second convolutional layer
        x = x.view(x.size(0), -1)            # Reshape x into a 2D matrix with size (batch_size, -1)
        x = torch.relu(self.fc1(x))          # Apply ReLU activation to the output of the first fully connected layer
        x = self.fc2(x)                      # Output the final logits from the second fully connected layer
        return x

In [46]:
class RenjuGame:
    def __init__(self, board_size):
        self.board_size = board_size
        self.board = np.zeros((board_size, board_size), dtype=np.int32)
        self.current_player = 1                                             # current_player = 1 or 2
        self.winner = None
        self.last_move_col = 0
        self.last_move_row = 0

    def is_valid_move(self, row, col):
        if row < 0 or row >= self.board_size or col < 0 or col >= self.board_size:
            return False
        return self.board[row][col] == 0

    def make_move(self, row, col):
        if self.is_valid_move(row, col):
            self.board[row][col] = self.current_player
            self.check_winner()
            self.current_player = 3 - self.current_player
            self.last_move_row = row
            self.last_move_col = col

    def check_winner(self):
        directions = [(0, 1), (1, 0), (1, 1), (-1, 1)]  # horizontal, vertical, diagonal, anti-diagonal
        for dr, dc in directions:
            for row in range(self.board_size):
                for col in range(self.board_size):
                    if self.board[row][col] != 0:
                        color = self.board[row][col]
                        win = True
                        for i in range(5):
                            if row + i * dr < 0 or row + i * dr >= self.board_size or col + i * dc < 0 or col + i * dc >= self.board_size or self.board[row + i * dr][col + i * dc] != color:
                                win = False
                                break
                        if win:
                            self.winner = color
                            return
 
    

    def is_game_over(self):
        return np.count_nonzero(self.board) == self.board_size * self.board_size or self.winner is not None

    def get_state(self):
        return self.board.copy()

    def get_valid_moves(self):
        valid_moves = []
        for row in range(self.board_size):
            for col in range(self.board_size):
                if self.is_valid_move(row, col):
                    valid_moves.append((row, col))
        return valid_moves

    def print_board(self):
        for row in self.board:
            print(row)
        print()

In [208]:
class BFnode:
    def __init__(self, state, parent=None):
        self.state = state
        self.parent = parent
        self.children = []
        self.player = self.state.current_player
        #self.wins = [0]*self.state.get_valid_moves()
        #self.visits = [0]*self.state.get_valid_moves()
        self.wins = 0
        self.visits = 1
        self.last_move = None

        if self.state.winner == self.player:
            self.wins = 1
        elif self.state.winner == 3 - self.player:
            self.wins = -1

    def tell(self):
        print("wins = %d", self.wins)
        print("self.plyaer = %d", self.player )
        print("self.state.winner = %d", self.state.winner )
        
    def expand(self):
        valid_moves = self.state.get_valid_moves()
        for move in valid_moves:
            new_state = self.state.get_state()
            new_state[move[0]][move[1]] = self.state.current_player
            child = BFnode(RenjuGame(self.state.board_size), self)
            child.state.board = new_state
            child.state.current_player = 3 - self.state.current_player
            child.state.check_winner()
            child.last_move = move
            self.children.append(child)

    def back_propagate(self):
        self.vists=1
        for child in self.children:
            self.wins += child.wins
            self.visits += child.visits


    def select_best_child(self):
        best_child = None
        best_score = float('-inf')
        for child in self.children:
            score = child.wins / child.visits
            if score > best_score:
                best_score = score
                best_child = child
        return best_child
        

In [155]:
def iter_expand(node, num_iter):
        if (num_iter) == 0:
            return
        node.expand()
        for child in node.children:
            iter_expand(child, num_iter-1)
        node.back_propagate()
        return

In [None]:
class BFagent:
    def __init__(self, iterations, board_size):
        self.iterations = iterations
        self.board_size = board_size
        self.policy_network = PolicyNetwork(board_size)
        self.optimizer = optim.Adam(self.policy_network.parameters(), lr=0.001)
    
    

        
    def get_action(self, state):
        root = BFnode(state)
        # Search three moves ahead with brute force and find the best move
        best_child = None
        
        iter_expand(root)
        best_child = root.select_best_child()


        return best_child.last_move



In [209]:
game = RenjuGame(10)
game.make_move(1,1)
for i in range(4):
    game.make_move(1,i+1)
    game.make_move(2,i+1)
testnode = BFnode(game)
testnode.state.print_board()

testnode.expand()
testnode.children[11].tell()
print(testnode.children[11].state.winner)
print(testnode.children[11].player)

print(testnode.children[11].wins)

#iter_expand(testnode, 1)


[0 0 0 0 0 0 0 0 0 0]
[0 1 1 1 1 0 0 0 0 0]
[0 2 2 2 2 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0]

wins = %d 0
self.plyaer = %d 1
self.state.winner = %d 1
1
1
0


In [203]:
bchild = testnode.select_best_child()

#print(bchild.visits)
#print(testnode.visits)
#print(testnode.children[11].state.winner)
#print(testnode.children[11].player)

#print(testnode.children[11].wins)
#testnode.children[11].state.print_board()

testnode.select_best_child().state.print_board()

[1 0 0 0 0 0 0 0 0 0]
[0 1 1 1 1 0 0 0 0 0]
[0 2 2 2 2 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0]

