In [115]:
!nvidia-smi

Wed Oct 25 16:12:51 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 545.84                 Driver Version: 545.84       CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                     TCC/WDDM  | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 3060 ...  WDDM  | 00000000:01:00.0 Off |                  N/A |
| N/A   55C    P5              14W /  95W |    278MiB /  6144MiB |     40%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [116]:
import torch

print(torch.cuda.is_available())

True


# implement game logic

In [117]:

class GoGame:
    def __init__(self, board_size=19):
        self.board_size = board_size
        self.board = torch.zeros((board_size, board_size), dtype=torch.float32)

    def place_stone(self, x, y, color):
        self.board[x][y] = color
        self.remove_dead_stones(-color)

    def has_liberties(self, board, x, y, color, visited=None):
        if visited is None:
            visited = set()
        if (x, y) in visited:
            return False
        visited.add((x, y))

        if x < 0 or x >= len(board) or y < 0 or y >= len(board[0]):
            return False
        if board[x][y] == 0:
            return True
        if board[x][y] != color:
            return False
        # below is board[x][y] == color, this is a recursive function
        liberties = any(
            self.has_liberties(board, x + dx, y + dy, color, visited) for dx, dy in [(1, 0), (-1, 0), (0, 1), (0, -1)]
        )
        return liberties

    def remove_dead_stones(self, color):
        dead_stones = []
        for x in range(self.board_size):
            for y in range(self.board_size):
                if self.board[x][y] == color and not self.has_liberties(self.board, x, y, color):
                    dead_stones.append((x, y))

        for x, y in dead_stones:
            self.board[x][y] = 0

    def get_board(self):
        return self.board

    def reset(self):
        self.board = torch.zeros(
            (self.board_size, self.board_size), dtype=torch.int8)


In [118]:
import csv
from torch.utils.data import Dataset

class GoDataset(Dataset):
    def __init__(self, path):
        super().__init__()
        self.path = path
        self.goGame = GoGame()
        self.char2idx = {c: i for i, c in enumerate('abcdefghijklmnopqrs')}

        # Load data from CSV file
        with open(self.path, newline='') as csvfile:
            reader = csv.reader(csvfile, delimiter=',')
            self.data = list(reader)

    def _step(self, step):
        # Step through the game
        current_player = -1 if step[0] == 'B' else 1
        x = self.char2idx[step[2]]
        y = self.char2idx[step[3]]
        self.goGame.place_stone(x, y, current_player)

    def _transform(self, data):
        # Transform data from CSV file into a board
        transformed_data = []
        for i in range(2, len(data)):
            self._step(data[i])
            # Append a copy of the board to avoid modifying the original
            transformed_data.append(self.goGame.get_board().clone())
        return torch.stack(transformed_data).to(torch.float32)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        # Get data at the given index
        row = self.data[idx]

        # Transform data into a board
        self.goGame.reset()
        processed_data = self._transform(row)
        return processed_data


goDataset = GoDataset('data/train/dan_train.csv')


In [119]:
print(goDataset.__len__())

100160


# draw board

In [120]:
import numpy as np
import cv2


def draw_board(board):
    # Create an RGB image (3 channels) with a green background
    image = np.ones((20*20, 20*20, 3), dtype=np.uint8) * \
        173  # RGB values for a green color

    # Draw lines
    for i in range(1, 20):
        cv2.line(image, (i * 20, 20), (i * 20, 20*20 - 20),
                 color=(0, 0, 0), thickness=1)
        cv2.line(image, (20, i * 20), (20*20 - 20, i * 20),
                 color=(0, 0, 0), thickness=1)

    # Draw stones
    for row in range(19):
        for col in range(19):
            if board[row][col] == -1:  # Black stone
                color = (0, 0, 0)  # RGB for black
                cv2.circle(image, (col * 20 + 20, row * 20 + 20),
                           8, color, -1)  # Draw a filled circle
            elif board[row][col] == 1:  # White stone
                color = (255, 255, 255)  # RGB for white
                cv2.circle(image, (col * 20 + 20, row * 20 + 20),
                           8, color, -1)  # Draw a filled circle

    return image


# model

In [121]:
import torch
import torch.nn as nn

import torch
import torch.nn as nn


class ConvModule(torch.nn.Module):
    r"""Conformer convolution module.

    Args:
        input_dim (int): input dimension.
        num_channels (int): number of depthwise convolution layer input channels.
        depthwise_kernel_size (int): kernel size of depthwise convolution layer.
        dropout (float, optional): dropout probability. (Default: 0.0)
        bias (bool, optional): indicates whether to add bias term to each convolution layer. (Default: ``False``)
        use_group_norm (bool, optional): use GroupNorm rather than BatchNorm. (Default: ``False``)
    """

    def __init__(
        self,
        input_dim: int,
        num_channels: int,
        depthwise_kernel_size: int,
        dropout: float = 0.0,
        bias: bool = False,
        use_group_norm: bool = False,
    ) -> None:
        super().__init__()
        if (depthwise_kernel_size - 1) % 2 != 0:
            raise ValueError(
                "depthwise_kernel_size must be odd to achieve 'SAME' padding.")
        self.layer_norm = torch.nn.LayerNorm(input_dim)
        self.sequential = torch.nn.Sequential(
            torch.nn.Conv1d(
                input_dim,
                2 * num_channels,
                1,
                stride=1,
                padding=0,
                bias=bias,
            ),
            torch.nn.GLU(dim=1),
            torch.nn.Conv1d(
                num_channels,
                num_channels,
                depthwise_kernel_size,
                stride=1,
                padding=(depthwise_kernel_size - 1) // 2,
                groups=num_channels,
                bias=bias,
            ),
            torch.nn.GroupNorm(num_groups=1, num_channels=num_channels)
            if use_group_norm
            else torch.nn.BatchNorm1d(num_channels),
            torch.nn.SiLU(),
            torch.nn.Conv1d(
                num_channels,
                input_dim,
                kernel_size=1,
                stride=1,
                padding=0,
                bias=bias,
            ),
            torch.nn.Dropout(dropout),
        )

    def forward(self, input: torch.Tensor) -> torch.Tensor:
        r"""
        Args:
            input (torch.Tensor): with shape `(B, T, D)`.

        Returns:
            torch.Tensor: output, with shape `(B, T, D)`.
        """
        x = self.layer_norm(input)
        x = x.transpose(1, 2)
        x = self.sequential(x)
        return x.transpose(1, 2)


class FeedForwardModule(nn.Module):
    def __init__(self, input_dim, hidden_dim, dropout=0.1):
        super(FeedForwardModule, self).__init__()
        self.module = nn.Sequential(
            nn.LayerNorm(input_dim),
            nn.Linear(input_dim, hidden_dim),
            nn.SiLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, input_dim),
            nn.Dropout(dropout)
        )

    def forward(self, x):
        return self.module(x)


class ConformerBlock(nn.Module):
    r"""Conformer layer that constitutes Conformer.

    Args:
        input_dim (int): input dimension.
        ffn_dim (int): hidden layer dimension of feedforward network.
        num_attention_heads (int): number of attention heads.
        depthwise_conv_kernel_size (int): kernel size of depthwise convolution layer.
        dropout (float, optional): dropout probability. (Default: 0.0)
        use_group_norm (bool, optional): use ``GroupNorm`` rather than ``BatchNorm1d``
            in the convolution module. (Default: ``False``)
        convolution_first (bool, optional): apply the convolution module ahead of
            the attention module. (Default: ``False``)
    """

    def __init__(
            self,
            input_dim,
            ffn_dim,
            num_attention_heads,
            depthwise_conv_kernel_size,
            dropout=0.1,
            use_group_norm=False,
            convolution_first=False,
    ):
        super().__init__()
        self.ffn1 = FeedForwardModule(input_dim, ffn_dim, dropout)
        self.ffn2 = FeedForwardModule(input_dim, ffn_dim, dropout)
        self.conv = ConvModule(
            input_dim,
            input_dim,
            depthwise_conv_kernel_size,
            dropout,
            use_group_norm=use_group_norm,
        )
        self.self_attn = nn.MultiheadAttention(
            input_dim, num_attention_heads, dropout=dropout
        )
        self.self_attn_dropout = nn.Dropout(dropout)
        self.layer_norm = nn.LayerNorm(input_dim)
        self.convolution_first = convolution_first

    def apply_conv(self, x):
        residual = x
        x = x.transpose(0, 1)
        x = self.conv(x)
        x = x.transpose(0, 1)
        x = x + residual
        return x

    def forward(self, x):
        residual = x
        x = self.ffn1(x)
        x = 0.5 * x + residual

        if self.convolution_first:
            x = self.apply_conv(x)

        residual = x
        x = self.layer_norm(x)
        x, _ = self.self_attn(x, x, x)
        x = self.self_attn_dropout(x)
        x = x + residual

        if not self.convolution_first:
            x = self.apply_conv(x)

        residual = x
        x = self.ffn2(x)
        x = 0.5 * x + residual
        x = self.layer_norm(x)
        return x


class Generator(nn.Module):
    r"""
    Model: Conformer

    Args:
        input_dim (int): input dimension.
        num_heads (int): number of attention heads in each Conformer layer.
        ffn_dim (int): hidden layer dimension of feedforward networks.
        num_layers (int): number of Conformer layers to instantiate.
        depthwise_conv_kernel_size (int): kernel size of each Conformer layer's depthwise convolution layer.
        dropout (float, optional): dropout probability. (Default: 0.0)
        use_group_norm (bool, optional): use ``GroupNorm`` rather than ``BatchNorm1d``
            in the convolution module. (Default: ``False``)
        convolution_first (bool, optional): apply the convolution module ahead of
            the attention module. (Default: ``False``)"""

    def __init__(
            self,
            input_dim,
            num_heads,
            ffn_dim,
            num_layers,
            depthwise_conv_kernel_size,
            dropout=0.1,
            use_group_norm=False,
            convolution_first=False,
    ):
        super().__init__()
        self.conformer_blocks = nn.ModuleList(
            [
                ConformerBlock(
                    input_dim,
                    ffn_dim,
                    num_heads,
                    depthwise_conv_kernel_size,
                    dropout,
                    use_group_norm,
                    convolution_first,
                )
                for _ in range(num_layers)
            ]
        )
        output_dim = input_dim
        self.generator_output = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        r"""
        Args:
            x (torch.Tensor): input with shape `(B, T, input_dim)`.

        Returns:
            torch.Tensor: output with shape `(B, T, input_dim)`.
        """
        batch_size, seq_length, _, _ = x.shape
        x = x.view(batch_size, seq_length, -1)

        x = x.transpose(0, 1)

        for layer in self.conformer_blocks:
            x = layer(x)
        x = x.transpose(0, 1)

        return self.generator_output(x)


height, width = 19, 19

gen = Generator(
    input_dim=height * width,
    num_heads=1,
    ffn_dim=32,
    num_layers=2,
    depthwise_conv_kernel_size=3,
    dropout=0.1,
    use_group_norm=False,
    convolution_first=False,
)

# test

In [124]:
from torch.utils.data import DataLoader

test_loader = DataLoader(goDataset, batch_size=1, shuffle=True)

for batch_idx, data in enumerate(test_loader):
    channel_len = data.shape[1]
    print(batch_idx)
    print(f'data shape: {data.shape}')
    output = gen(data)
    print(f'output shape: {output.shape}')
    print(f'reshape output shape: {output.view(1, channel_len, height, width).shape}')
    break


0
data shape: torch.Size([1, 230, 19, 19])
torch.Size([230, 1, 361])
torch.Size([230, 1, 361])
output shape: torch.Size([1, 230, 361])
