In [1]:
import numpy as np
import torch

DATASET_PATH = "dataset/riichi_ds_v01.npz"

loads = np.load(DATASET_PATH)
x, y = torch.tensor(loads["x"].astype(np.float32)), torch.tensor(loads["y"].astype(np.float32))

x_test, x_train = x[:20000], x[20000:]
y_test, y_train = y[:20000], y[20000:]

test_ds = torch.utils.data.TensorDataset(x_test, y_test)
train_ds = torch.utils.data.TensorDataset(x_train, y_train)

test_dl = torch.utils.data.DataLoader(test_ds, batch_size=64, shuffle=True)
train_dl = torch.utils.data.DataLoader(train_ds, batch_size=64, shuffle=True)

### Models

#### Model V1
Basic model with structure suggested by <br>
https://arxiv.org/pdf/2202.12847

In [2]:
from torchsummary import summary

# https://arxiv.org/pdf/2202.12847
# ~ No padding or pooling to be used, loses accuracy

class ModelV1(torch.nn.Module):
    def __init__(self):
        super(ModelV1, self).__init__()
        self.dropout = torch.nn.Dropout(0.5)
        self.relu = torch.nn.ReLU()
        self.sigmoid = torch.nn.Sigmoid()
        
        self.conv2d_1 = torch.nn.Conv2d(1, 64, (4, 5))
        self.bn_1 = torch.nn.BatchNorm2d(64)
        self.conv2d_2 = torch.nn.Conv2d(64, 64, (4, 5))
        self.bn_2 = torch.nn.BatchNorm2d(64)
        self.conv2d_3 = torch.nn.Conv2d(64, 64, (4, 5))
        self.bn_3 = torch.nn.BatchNorm2d(64)
        self.conv2d_4 = torch.nn.Conv2d(64, 32, (4, 5))
        self.bn_4 = torch.nn.BatchNorm2d(32)
        
        self.fc_1 = torch.nn.Linear(6912, 256)
        self.bn_5 = torch.nn.BatchNorm1d(256)
        self.fc_2 = torch.nn.Linear(256, 34)

    def forward(self, x):
        x = torch.unsqueeze(x, -3)
        x = self.dropout(self.bn_1(self.relu(self.conv2d_1(x))))
        x = self.dropout(self.bn_2(self.relu(self.conv2d_2(x))))
        x = self.dropout(self.bn_3(self.relu(self.conv2d_3(x))))
        x = self.dropout(self.bn_4(self.relu(self.conv2d_4(x))))
        x = torch.flatten(x, 1, -1)
        x = self.dropout(self.bn_5(self.relu(self.fc_1(x))))
        x = self.sigmoid(self.fc_2(x))
        return x

summary(ModelV1(), (24, 34), device="cpu")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 21, 30]           1,344
              ReLU-2           [-1, 64, 21, 30]               0
       BatchNorm2d-3           [-1, 64, 21, 30]             128
           Dropout-4           [-1, 64, 21, 30]               0
            Conv2d-5           [-1, 64, 18, 26]          81,984
              ReLU-6           [-1, 64, 18, 26]               0
       BatchNorm2d-7           [-1, 64, 18, 26]             128
           Dropout-8           [-1, 64, 18, 26]               0
            Conv2d-9           [-1, 64, 15, 22]          81,984
             ReLU-10           [-1, 64, 15, 22]               0
      BatchNorm2d-11           [-1, 64, 15, 22]             128
          Dropout-12           [-1, 64, 15, 22]               0
           Conv2d-13           [-1, 32, 12, 18]          40,992
             ReLU-14           [-1, 32,

#### Model V2
Model stacking the type of tiles to form a 4 layer input<br>
of manzu, pinzu, soozu, jihai

In [59]:
from torchsummary import summary


class ModelV2(torch.nn.Module):
    def __init__(self):
        super(ModelV2, self).__init__()
        self.dropout = torch.nn.Dropout(0.5)
        self.relu = torch.nn.ReLU()
        self.sigmoid = torch.nn.Sigmoid()
        
        self.bn_1 = torch.nn.BatchNorm2d(64)
        self.bn_2 = torch.nn.BatchNorm2d(64)
        self.bn_3 = torch.nn.BatchNorm2d(64)
        self.bn_4 = torch.nn.BatchNorm2d(32)
        
        self.conv2d_1 = torch.nn.Conv2d(4, 64, (6, 7), padding=(0, 3))
        self.conv2d_2 = torch.nn.Conv2d(64, 64, (6, 7), padding=(0, 3))
        self.conv2d_3 = torch.nn.Conv2d(64, 64, (6, 7), padding=(0, 3))
        self.conv2d_4 = torch.nn.Conv2d(64, 32, (6, 7), padding=(0, 3))
        self.conv2d_5 = torch.nn.Conv2d(32, 4, (4, 7), padding=(0, 3))

    def forward(self, x):
        x_manzu = x[..., 0:9]
        x_pinzu = x[..., 9:18]
        x_soozu = x[..., 18:27]
        x_jihai = torch.nn.functional.pad(x[..., 27:34], (1, 1, 0, 0))
        x = torch.stack([x_manzu, x_pinzu, x_soozu, x_jihai], dim=-3)
        x = self.dropout(self.bn_1(self.relu(self.conv2d_1(x))))
        x = self.dropout(self.bn_2(self.relu(self.conv2d_2(x))))
        x = self.dropout(self.bn_3(self.relu(self.conv2d_3(x))))
        x = self.dropout(self.bn_4(self.relu(self.conv2d_4(x))))
        x = self.sigmoid(self.conv2d_5(x))
        x = torch.squeeze(x, dim=-2)
        x = torch.concat([x[..., 0, :], x[..., 1, :], x[..., 2, :], x[..., 3, 1:8]], dim=-1)
        return x

summary(ModelV2(), (24, 34), device="cpu")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 64, 19, 9]          10,816
              ReLU-2            [-1, 64, 19, 9]               0
       BatchNorm2d-3            [-1, 64, 19, 9]             128
           Dropout-4            [-1, 64, 19, 9]               0
            Conv2d-5            [-1, 64, 14, 9]         172,096
              ReLU-6            [-1, 64, 14, 9]               0
       BatchNorm2d-7            [-1, 64, 14, 9]             128
           Dropout-8            [-1, 64, 14, 9]               0
            Conv2d-9             [-1, 64, 9, 9]         172,096
             ReLU-10             [-1, 64, 9, 9]               0
      BatchNorm2d-11             [-1, 64, 9, 9]             128
          Dropout-12             [-1, 64, 9, 9]               0
           Conv2d-13             [-1, 32, 4, 9]          86,048
             ReLU-14             [-1, 3

In [4]:
# Testing the importance of big kernels
# Comparing the 6x7 kernels with V2_1's 6x3

from torchsummary import summary


class ModelV2_1(torch.nn.Module):
    def __init__(self):
        super(ModelV2_1, self).__init__()
        self.dropout = torch.nn.Dropout(0.5)
        self.relu = torch.nn.ReLU()
        self.sigmoid = torch.nn.Sigmoid()
        
        self.bn_1 = torch.nn.BatchNorm2d(64)
        self.bn_2 = torch.nn.BatchNorm2d(64)
        self.bn_3 = torch.nn.BatchNorm2d(64)
        self.bn_4 = torch.nn.BatchNorm2d(32)
        
        self.conv2d_1 = torch.nn.Conv2d(4, 64, (6, 3), padding=(0, 1))
        self.conv2d_2 = torch.nn.Conv2d(64, 64, (6, 3), padding=(0, 1))
        self.conv2d_3 = torch.nn.Conv2d(64, 64, (6, 3), padding=(0, 1))
        self.conv2d_4 = torch.nn.Conv2d(64, 32, (6, 3), padding=(0, 1))
        self.conv2d_5 = torch.nn.Conv2d(32, 4, (4, 3), padding=(0, 1))

    def forward(self, x):
        x_manzu = x[..., 0:9]
        x_pinzu = x[..., 9:18]
        x_soozu = x[..., 18:27]
        x_jihai = torch.nn.functional.pad(x[..., 27:34], (1, 1, 0, 0))
        x = torch.stack([x_manzu, x_pinzu, x_soozu, x_jihai], dim=-3)
        x = self.dropout(self.bn_1(self.relu(self.conv2d_1(x))))
        x = self.dropout(self.bn_2(self.relu(self.conv2d_2(x))))
        x = self.dropout(self.bn_3(self.relu(self.conv2d_3(x))))
        x = self.dropout(self.bn_4(self.relu(self.conv2d_4(x))))
        x = self.sigmoid(self.conv2d_5(x))
        x = torch.squeeze(x, dim=-2)
        x = torch.concat([x[..., 0, :], x[..., 1, :], x[..., 2, :], x[..., 3, 1:8]], dim=-1)
        return x

summary(ModelV2_1(), (24, 34), device="cpu")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 64, 19, 9]           4,672
              ReLU-2            [-1, 64, 19, 9]               0
       BatchNorm2d-3            [-1, 64, 19, 9]             128
           Dropout-4            [-1, 64, 19, 9]               0
            Conv2d-5            [-1, 64, 14, 9]          73,792
              ReLU-6            [-1, 64, 14, 9]               0
       BatchNorm2d-7            [-1, 64, 14, 9]             128
           Dropout-8            [-1, 64, 14, 9]               0
            Conv2d-9             [-1, 64, 9, 9]          73,792
             ReLU-10             [-1, 64, 9, 9]               0
      BatchNorm2d-11             [-1, 64, 9, 9]             128
          Dropout-12             [-1, 64, 9, 9]               0
           Conv2d-13             [-1, 32, 4, 9]          36,896
             ReLU-14             [-1, 3

#### Model V3
A modification of model V2, but using maxpooling as a method of downsampling

In [26]:
from torchsummary import summary


class ModelV3(torch.nn.Module):
    def __init__(self):
        super(ModelV3, self).__init__()
        self.dropout = torch.nn.Dropout(0.5)
        self.relu = torch.nn.ReLU()
        self.sigmoid = torch.nn.Sigmoid()
        self.pool = torch.nn.MaxPool2d((6, 1), stride=1)
        
        self.bn_1 = torch.nn.BatchNorm2d(64)
        self.bn_2 = torch.nn.BatchNorm2d(64)
        self.bn_3 = torch.nn.BatchNorm2d(64)
        self.bn_4 = torch.nn.BatchNorm2d(32)
        
        self.conv2d_1 = torch.nn.Conv2d(4, 64, (7, 7), padding=(3, 3))
        self.conv2d_2 = torch.nn.Conv2d(64, 64, (7, 7), padding=(3, 3))
        self.conv2d_3 = torch.nn.Conv2d(64, 64, (7, 7), padding=(3, 3))
        self.conv2d_4 = torch.nn.Conv2d(64, 32, (7, 7), padding=(3, 3))
        self.conv2d_5 = torch.nn.Conv2d(32, 4, (4, 7), padding=(0, 3))

    def forward(self, x):
        x_manzu = x[..., 0:9]
        x_pinzu = x[..., 9:18]
        x_soozu = x[..., 18:27]
        x_jihai = torch.nn.functional.pad(x[..., 27:34], (1, 1, 0, 0))
        x = torch.stack([x_manzu, x_pinzu, x_soozu, x_jihai], dim=-3)
        x = self.pool(self.dropout(self.bn_1(self.relu(self.conv2d_1(x)))))
        x = self.pool(self.dropout(self.bn_2(self.relu(self.conv2d_2(x)))))
        x = self.pool(self.dropout(self.bn_3(self.relu(self.conv2d_3(x)))))
        x = self.pool(self.dropout(self.bn_4(self.relu(self.conv2d_4(x)))))
        x = self.sigmoid(self.conv2d_5(x))
        x = torch.squeeze(x, dim=-2)
        x = torch.concat([x[..., 0, :], x[..., 1, :], x[..., 2, :], x[..., 3, 1:8]], dim=-1)
        return x

summary(ModelV3(), (24, 34), device="cpu")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 64, 24, 9]          12,608
              ReLU-2            [-1, 64, 24, 9]               0
       BatchNorm2d-3            [-1, 64, 24, 9]             128
           Dropout-4            [-1, 64, 24, 9]               0
         MaxPool2d-5            [-1, 64, 19, 9]               0
            Conv2d-6            [-1, 64, 19, 9]         200,768
              ReLU-7            [-1, 64, 19, 9]               0
       BatchNorm2d-8            [-1, 64, 19, 9]             128
           Dropout-9            [-1, 64, 19, 9]               0
        MaxPool2d-10            [-1, 64, 14, 9]               0
           Conv2d-11            [-1, 64, 14, 9]         200,768
             ReLU-12            [-1, 64, 14, 9]               0
      BatchNorm2d-13            [-1, 64, 14, 9]             128
          Dropout-14            [-1, 64

In [3]:
# Trying dropout after pool

from torchsummary import summary


class ModelV3_1(torch.nn.Module):
    def __init__(self):
        super(ModelV3_1, self).__init__()
        self.dropout = torch.nn.Dropout(0.5)
        self.relu = torch.nn.ReLU()
        self.sigmoid = torch.nn.Sigmoid()
        self.pool = torch.nn.MaxPool2d((6, 1), stride=1)
        
        self.bn_1 = torch.nn.BatchNorm2d(64)
        self.bn_2 = torch.nn.BatchNorm2d(64)
        self.bn_3 = torch.nn.BatchNorm2d(64)
        self.bn_4 = torch.nn.BatchNorm2d(32)
        
        self.conv2d_1 = torch.nn.Conv2d(4, 64, (7, 7), padding=(3, 3))
        self.conv2d_2 = torch.nn.Conv2d(64, 64, (7, 7), padding=(3, 3))
        self.conv2d_3 = torch.nn.Conv2d(64, 64, (7, 7), padding=(3, 3))
        self.conv2d_4 = torch.nn.Conv2d(64, 32, (7, 7), padding=(3, 3))
        self.conv2d_5 = torch.nn.Conv2d(32, 4, (4, 7), padding=(0, 3))

    def forward(self, x):
        x_manzu = x[..., 0:9]
        x_pinzu = x[..., 9:18]
        x_soozu = x[..., 18:27]
        x_jihai = torch.nn.functional.pad(x[..., 27:34], (1, 1, 0, 0))
        x = torch.stack([x_manzu, x_pinzu, x_soozu, x_jihai], dim=-3)
        x = self.dropout(self.pool(self.bn_1(self.relu(self.conv2d_1(x)))))
        x = self.dropout(self.pool(self.bn_2(self.relu(self.conv2d_2(x)))))
        x = self.dropout(self.pool(self.bn_3(self.relu(self.conv2d_3(x)))))
        x = self.dropout(self.pool(self.bn_4(self.relu(self.conv2d_4(x)))))
        x = self.sigmoid(self.conv2d_5(x))
        x = torch.squeeze(x, dim=-2)
        x = torch.concat([x[..., 0, :], x[..., 1, :], x[..., 2, :], x[..., 3, 1:8]], dim=-1)
        return x

summary(ModelV3_1(), (24, 34), device="cpu")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 64, 24, 9]          12,608
              ReLU-2            [-1, 64, 24, 9]               0
       BatchNorm2d-3            [-1, 64, 24, 9]             128
         MaxPool2d-4            [-1, 64, 19, 9]               0
           Dropout-5            [-1, 64, 19, 9]               0
            Conv2d-6            [-1, 64, 19, 9]         200,768
              ReLU-7            [-1, 64, 19, 9]               0
       BatchNorm2d-8            [-1, 64, 19, 9]             128
         MaxPool2d-9            [-1, 64, 14, 9]               0
          Dropout-10            [-1, 64, 14, 9]               0
           Conv2d-11            [-1, 64, 14, 9]         200,768
             ReLU-12            [-1, 64, 14, 9]               0
      BatchNorm2d-13            [-1, 64, 14, 9]             128
        MaxPool2d-14             [-1, 6

In [2]:
# Increased dropout from 0.5 to 0.75

from torchsummary import summary


class ModelV3_2(torch.nn.Module):
    def __init__(self):
        super(ModelV3_2, self).__init__()
        self.dropout = torch.nn.Dropout(0.75)
        self.relu = torch.nn.ReLU()
        self.sigmoid = torch.nn.Sigmoid()
        self.pool = torch.nn.MaxPool2d((6, 1), stride=1)
        
        self.bn_1 = torch.nn.BatchNorm2d(64)
        self.bn_2 = torch.nn.BatchNorm2d(64)
        self.bn_3 = torch.nn.BatchNorm2d(64)
        self.bn_4 = torch.nn.BatchNorm2d(32)
        
        self.conv2d_1 = torch.nn.Conv2d(4, 64, (7, 7), padding=(3, 3))
        self.conv2d_2 = torch.nn.Conv2d(64, 64, (7, 7), padding=(3, 3))
        self.conv2d_3 = torch.nn.Conv2d(64, 64, (7, 7), padding=(3, 3))
        self.conv2d_4 = torch.nn.Conv2d(64, 32, (7, 7), padding=(3, 3))
        self.conv2d_5 = torch.nn.Conv2d(32, 4, (4, 7), padding=(0, 3))

    def forward(self, x):
        x_manzu = x[..., 0:9]
        x_pinzu = x[..., 9:18]
        x_soozu = x[..., 18:27]
        x_jihai = torch.nn.functional.pad(x[..., 27:34], (1, 1, 0, 0))
        x = torch.stack([x_manzu, x_pinzu, x_soozu, x_jihai], dim=-3)
        x = self.dropout(self.pool(self.bn_1(self.relu(self.conv2d_1(x)))))
        x = self.dropout(self.pool(self.bn_2(self.relu(self.conv2d_2(x)))))
        x = self.dropout(self.pool(self.bn_3(self.relu(self.conv2d_3(x)))))
        x = self.dropout(self.pool(self.bn_4(self.relu(self.conv2d_4(x)))))
        x = self.sigmoid(self.conv2d_5(x))
        x = torch.squeeze(x, dim=-2)
        x = torch.concat([x[..., 0, :], x[..., 1, :], x[..., 2, :], x[..., 3, 1:8]], dim=-1)
        return x

summary(ModelV3_2(), (24, 34), device="cpu")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 64, 24, 9]          12,608
              ReLU-2            [-1, 64, 24, 9]               0
       BatchNorm2d-3            [-1, 64, 24, 9]             128
         MaxPool2d-4            [-1, 64, 19, 9]               0
           Dropout-5            [-1, 64, 19, 9]               0
            Conv2d-6            [-1, 64, 19, 9]         200,768
              ReLU-7            [-1, 64, 19, 9]               0
       BatchNorm2d-8            [-1, 64, 19, 9]             128
         MaxPool2d-9            [-1, 64, 14, 9]               0
          Dropout-10            [-1, 64, 14, 9]               0
           Conv2d-11            [-1, 64, 14, 9]         200,768
             ReLU-12            [-1, 64, 14, 9]               0
      BatchNorm2d-13            [-1, 64, 14, 9]             128
        MaxPool2d-14             [-1, 6

In [6]:
# Changed pooling size

from torchsummary import summary


class ModelV3_3(torch.nn.Module):
    def __init__(self):
        super(ModelV3_3, self).__init__()
        self.dropout = torch.nn.Dropout(0.5)
        self.relu = torch.nn.ReLU()
        self.sigmoid = torch.nn.Sigmoid()
        self.pool = torch.nn.MaxPool2d((2, 1))
        
        self.bn_1 = torch.nn.BatchNorm2d(64)
        self.bn_2 = torch.nn.BatchNorm2d(64)
        self.bn_3 = torch.nn.BatchNorm2d(64)
        self.bn_4 = torch.nn.BatchNorm2d(64)
        self.bn_5 = torch.nn.BatchNorm2d(32)
        
        self.conv2d_1 = torch.nn.Conv2d(4, 64, (7, 7), padding=(3, 3))
        self.conv2d_2 = torch.nn.Conv2d(64, 64, (7, 7), padding=(3, 3))
        self.conv2d_3 = torch.nn.Conv2d(64, 64, (7, 7), padding=(3, 3))
        self.conv2d_4 = torch.nn.Conv2d(64, 64, (7, 7), padding=(3, 3))
        self.conv2d_5 = torch.nn.Conv2d(64, 32, (7, 7), padding=(3, 3))
        self.conv2d_6 = torch.nn.Conv2d(32, 4, (6, 7), padding=(0, 3))

    def forward(self, x):
        x_manzu = x[..., 0:9]
        x_pinzu = x[..., 9:18]
        x_soozu = x[..., 18:27]
        x_jihai = torch.nn.functional.pad(x[..., 27:34], (1, 1, 0, 0))
        x = torch.stack([x_manzu, x_pinzu, x_soozu, x_jihai], dim=-3)
        x = self.dropout(self.bn_1(self.relu(self.conv2d_1(x))))
        x = self.dropout(self.pool(self.bn_2(self.relu(self.conv2d_2(x)))))
        x = self.dropout(self.bn_3(self.relu(self.conv2d_3(x))))
        x = self.dropout(self.pool(self.bn_4(self.relu(self.conv2d_4(x)))))
        x = self.dropout(self.bn_5(self.relu(self.conv2d_5(x))))
        x = self.sigmoid(self.conv2d_6(x))
        x = torch.squeeze(x, dim=-2)
        x = torch.concat([x[..., 0, :], x[..., 1, :], x[..., 2, :], x[..., 3, 1:8]], dim=-1)
        return x

summary(ModelV3_3(), (24, 34), device="cpu")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 64, 24, 9]          12,608
              ReLU-2            [-1, 64, 24, 9]               0
       BatchNorm2d-3            [-1, 64, 24, 9]             128
           Dropout-4            [-1, 64, 24, 9]               0
            Conv2d-5            [-1, 64, 24, 9]         200,768
              ReLU-6            [-1, 64, 24, 9]               0
       BatchNorm2d-7            [-1, 64, 24, 9]             128
         MaxPool2d-8            [-1, 64, 12, 9]               0
           Dropout-9            [-1, 64, 12, 9]               0
           Conv2d-10            [-1, 64, 12, 9]         200,768
             ReLU-11            [-1, 64, 12, 9]               0
      BatchNorm2d-12            [-1, 64, 12, 9]             128
          Dropout-13            [-1, 64, 12, 9]               0
           Conv2d-14            [-1, 64

#### Model V4
A modification of model V3_3, using residual networks like Resnet

In [2]:
from torchsummary import summary


class ModelV4(torch.nn.Module):
    def __init__(self):
        super(ModelV4, self).__init__()
        self.dropout = torch.nn.Dropout(0.5)
        self.relu = torch.nn.ReLU()
        self.sigmoid = torch.nn.Sigmoid()
        self.pool = torch.nn.MaxPool2d((2, 1))
        
        self.bn_1 = torch.nn.BatchNorm2d(64)
        self.bn_2 = torch.nn.BatchNorm2d(64)
        self.bn_3 = torch.nn.BatchNorm2d(64)
        self.bn_4 = torch.nn.BatchNorm2d(64)
        self.bn_5 = torch.nn.BatchNorm2d(32)
        
        self.conv2d_1 = torch.nn.Conv2d(4, 64, (7, 7), padding=(3, 3))
        self.conv2d_2 = torch.nn.Conv2d(64, 64, (7, 7), padding=(3, 3))
        self.conv2d_3 = torch.nn.Conv2d(64, 64, (7, 7), padding=(3, 3))
        self.conv2d_4 = torch.nn.Conv2d(64, 64, (7, 7), padding=(3, 3))
        self.conv2d_5 = torch.nn.Conv2d(64, 32, (7, 7), padding=(3, 3))
        self.conv2d_6 = torch.nn.Conv2d(32, 4, (6, 7), padding=(0, 3))

    def forward(self, x):
        x_manzu = x[..., 0:9]
        x_pinzu = x[..., 9:18]
        x_soozu = x[..., 18:27]
        x_jihai = torch.nn.functional.pad(x[..., 27:34], (1, 1, 0, 0))
        x = torch.stack([x_manzu, x_pinzu, x_soozu, x_jihai], dim=-3)
        x = self.dropout(self.bn_1(self.relu(self.conv2d_1(x))))
        x = torch.add(x, self.bn_2(self.relu(self.conv2d_2(x))))  # Residual
        x = self.dropout(self.pool(x))
        x = torch.add(x, self.bn_3(self.relu(self.conv2d_3(x))))  # Residual
        x = self.dropout(x)
        x = torch.add(x, self.bn_4(self.relu(self.conv2d_4(x))))  # Residual
        x = self.dropout(self.pool(x))
        x = self.dropout(self.bn_5(self.relu(self.conv2d_5(x))))
        x = self.sigmoid(self.conv2d_6(x))
        x = torch.squeeze(x, dim=-2)
        x = torch.concat([x[..., 0, :], x[..., 1, :], x[..., 2, :], x[..., 3, 1:8]], dim=-1)
        return x

summary(ModelV4(), (24, 34), device="cpu")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 64, 24, 9]          12,608
              ReLU-2            [-1, 64, 24, 9]               0
       BatchNorm2d-3            [-1, 64, 24, 9]             128
           Dropout-4            [-1, 64, 24, 9]               0
            Conv2d-5            [-1, 64, 24, 9]         200,768
              ReLU-6            [-1, 64, 24, 9]               0
       BatchNorm2d-7            [-1, 64, 24, 9]             128
         MaxPool2d-8            [-1, 64, 12, 9]               0
           Dropout-9            [-1, 64, 12, 9]               0
           Conv2d-10            [-1, 64, 12, 9]         200,768
             ReLU-11            [-1, 64, 12, 9]               0
      BatchNorm2d-12            [-1, 64, 12, 9]             128
          Dropout-13            [-1, 64, 12, 9]               0
           Conv2d-14            [-1, 64

In [42]:
# Trying BN -> ReLU instead of ReLU -> BN

from torchsummary import summary


class ModelV4_1(torch.nn.Module):
    def __init__(self):
        super(ModelV4_1, self).__init__()
        self.dropout = torch.nn.Dropout(0.5)
        self.pool = torch.nn.MaxPool2d((2, 1))
        self.relu = torch.nn.ReLU()
        self.sigmoid = torch.nn.Sigmoid()
        
        self.bn1_1 = torch.nn.BatchNorm2d(64)
        self.bn1_2 = torch.nn.BatchNorm2d(64)
        
        self.bn2_1 = torch.nn.BatchNorm2d(64)
        self.bn2_2 = torch.nn.BatchNorm2d(64)

        self.conv2d1_1 = torch.nn.Conv2d(4, 64, (1, 1))
        self.conv2d1_2 = torch.nn.Conv2d(4, 64, (7, 7), padding=(3, 3))
        self.conv2d1_3 = torch.nn.Conv2d(64, 64, (7, 7), padding=(3, 3))

        self.conv2d2_1 = torch.nn.Conv2d(64, 64, (1, 1))
        self.conv2d2_2 = torch.nn.Conv2d(64, 64, (7, 7), padding=(3, 3))
        self.conv2d2_3 = torch.nn.Conv2d(64, 64, (7, 7), padding=(3, 3))
        
        self.conv2d3 = torch.nn.Conv2d(64, 4, (6, 7), padding=(0, 3))

    def forward(self, x):
        x_manzu = x[..., 0:9]
        x_pinzu = x[..., 9:18]
        x_soozu = x[..., 18:27]
        x_jihai = torch.nn.functional.pad(x[..., 27:34], (1, 1, 0, 0))
        x = torch.stack([x_manzu, x_pinzu, x_soozu, x_jihai], dim=-3)
        
        x = torch.add(
            self.conv2d1_1(x),
            self.conv2d1_3(self.dropout(self.relu(self.bn1_1(self.conv2d1_2(x)))))
        )
        x = self.dropout(self.pool(self.relu(self.bn1_2(x))))
        
        x = torch.add(
            self.conv2d2_1(x),
            self.conv2d2_3(self.dropout(self.relu(self.bn2_1(self.conv2d2_2(x)))))
        )
        x = self.dropout(self.pool(self.relu(self.bn2_2(x))))
        
        x = self.sigmoid(self.conv2d3(x))
        x = torch.squeeze(x, dim=-2)
        x = torch.concat([x[..., 0, :], x[..., 1, :], x[..., 2, :], x[..., 3, 1:8]], dim=-1)
        return x

summary(ModelV4_1(), (24, 34), device="cpu")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 64, 24, 9]             320
            Conv2d-2            [-1, 64, 24, 9]          12,608
       BatchNorm2d-3            [-1, 64, 24, 9]             128
              ReLU-4            [-1, 64, 24, 9]               0
           Dropout-5            [-1, 64, 24, 9]               0
            Conv2d-6            [-1, 64, 24, 9]         200,768
       BatchNorm2d-7            [-1, 64, 24, 9]             128
              ReLU-8            [-1, 64, 24, 9]               0
         MaxPool2d-9            [-1, 64, 12, 9]               0
          Dropout-10            [-1, 64, 12, 9]               0
           Conv2d-11            [-1, 64, 12, 9]           4,160
           Conv2d-12            [-1, 64, 12, 9]         200,768
      BatchNorm2d-13            [-1, 64, 12, 9]             128
             ReLU-14            [-1, 64

In [2]:
# Increasing kernel from 64 to 128

from torchsummary import summary


class ModelV4_2(torch.nn.Module):
    def __init__(self):
        super(ModelV4_2, self).__init__()
        self.dropout = torch.nn.Dropout(0.5)
        self.pool = torch.nn.MaxPool2d((2, 1))
        self.relu = torch.nn.ReLU()
        self.sigmoid = torch.nn.Sigmoid()
        
        self.bn1_1 = torch.nn.BatchNorm2d(128)
        self.bn1_2 = torch.nn.BatchNorm2d(128)
        
        self.bn2_1 = torch.nn.BatchNorm2d(128)
        self.bn2_2 = torch.nn.BatchNorm2d(128)

        self.conv2d1_1 = torch.nn.Conv2d(4, 128, (1, 1))
        self.conv2d1_2 = torch.nn.Conv2d(4, 128, (7, 7), padding=(3, 3))
        self.conv2d1_3 = torch.nn.Conv2d(128, 128, (7, 7), padding=(3, 3))

        self.conv2d2_1 = torch.nn.Conv2d(128, 128, (1, 1))
        self.conv2d2_2 = torch.nn.Conv2d(128, 128, (7, 7), padding=(3, 3))
        self.conv2d2_3 = torch.nn.Conv2d(128, 128, (7, 7), padding=(3, 3))
        
        self.conv2d3 = torch.nn.Conv2d(128, 4, (6, 7), padding=(0, 3))

    def forward(self, x):
        x_manzu = x[..., 0:9]
        x_pinzu = x[..., 9:18]
        x_soozu = x[..., 18:27]
        x_jihai = torch.nn.functional.pad(x[..., 27:34], (1, 1, 0, 0))
        x = torch.stack([x_manzu, x_pinzu, x_soozu, x_jihai], dim=-3)
        
        x = torch.add(
            self.conv2d1_1(x),
            self.conv2d1_3(self.dropout(self.relu(self.bn1_1(self.conv2d1_2(x)))))
        )
        x = self.dropout(self.pool(self.relu(self.bn1_2(x))))
        
        x = torch.add(
            self.conv2d2_1(x),
            self.conv2d2_3(self.dropout(self.relu(self.bn2_1(self.conv2d2_2(x)))))
        )
        x = self.dropout(self.pool(self.relu(self.bn2_2(x))))
        
        x = self.sigmoid(self.conv2d3(x))
        x = torch.squeeze(x, dim=-2)
        x = torch.concat([x[..., 0, :], x[..., 1, :], x[..., 2, :], x[..., 3, 1:8]], dim=-1)
        return x

summary(ModelV4_2(), (24, 34), device="cpu")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 128, 24, 9]             640
            Conv2d-2           [-1, 128, 24, 9]          25,216
       BatchNorm2d-3           [-1, 128, 24, 9]             256
              ReLU-4           [-1, 128, 24, 9]               0
           Dropout-5           [-1, 128, 24, 9]               0
            Conv2d-6           [-1, 128, 24, 9]         802,944
       BatchNorm2d-7           [-1, 128, 24, 9]             256
              ReLU-8           [-1, 128, 24, 9]               0
         MaxPool2d-9           [-1, 128, 12, 9]               0
          Dropout-10           [-1, 128, 12, 9]               0
           Conv2d-11           [-1, 128, 12, 9]          16,512
           Conv2d-12           [-1, 128, 12, 9]         802,944
      BatchNorm2d-13           [-1, 128, 12, 9]             256
             ReLU-14           [-1, 128

In [2]:
# Increasing model depth from v4_1

from torchsummary import summary


class ModelV4_3(torch.nn.Module):
    def __init__(self):
        super(ModelV4_3, self).__init__()
        self.dropout = torch.nn.Dropout(0.5)
        self.pool = torch.nn.MaxPool2d((2, 1))
        self.relu = torch.nn.ReLU()
        self.sigmoid = torch.nn.Sigmoid()
        
        self.bn1_1 = torch.nn.BatchNorm2d(64)
        self.bn1_2 = torch.nn.BatchNorm2d(64)
        
        self.bn2_1 = torch.nn.BatchNorm2d(64)
        self.bn2_2 = torch.nn.BatchNorm2d(64)
        
        self.bn3_1 = torch.nn.BatchNorm2d(64)
        self.bn3_2 = torch.nn.BatchNorm2d(64)

        self.conv2d1_1 = torch.nn.Conv2d(4, 64, (1, 1))
        self.conv2d1_2 = torch.nn.Conv2d(4, 64, (7, 7), padding=(3, 3))
        self.conv2d1_3 = torch.nn.Conv2d(64, 64, (7, 7), padding=(3, 3))

        self.conv2d2_1 = torch.nn.Conv2d(64, 64, (1, 1))
        self.conv2d2_2 = torch.nn.Conv2d(64, 64, (7, 7), padding=(3, 3))
        self.conv2d2_3 = torch.nn.Conv2d(64, 64, (7, 7), padding=(3, 3))

        self.conv2d3_1 = torch.nn.Conv2d(64, 64, (1, 1))
        self.conv2d3_2 = torch.nn.Conv2d(64, 64, (7, 7), padding=(3, 3))
        self.conv2d3_3 = torch.nn.Conv2d(64, 64, (7, 7), padding=(3, 3))
        
        self.conv2d4 = torch.nn.Conv2d(64, 4, (6, 7), padding=(0, 3))

    def forward(self, x):
        x_manzu = x[..., 0:9]
        x_pinzu = x[..., 9:18]
        x_soozu = x[..., 18:27]
        x_jihai = torch.nn.functional.pad(x[..., 27:34], (1, 1, 0, 0))
        x = torch.stack([x_manzu, x_pinzu, x_soozu, x_jihai], dim=-3)
        
        x = torch.add(
            self.conv2d1_1(x),
            self.conv2d1_3(self.dropout(self.relu(self.bn1_1(self.conv2d1_2(x)))))
        )
        x = self.dropout(self.pool(self.relu(self.bn1_2(x))))
        
        x = torch.add(
            self.conv2d2_1(x),
            self.conv2d2_3(self.dropout(self.relu(self.bn2_1(self.conv2d2_2(x)))))
        )
        x = self.dropout(self.relu(self.bn2_2(x)))
        x = torch.add(
            self.conv2d3_1(x),
            self.conv2d3_3(self.dropout(self.relu(self.bn3_1(self.conv2d3_2(x)))))
        )
        x = self.dropout(self.pool(self.relu(self.bn3_2(x))))
        
        x = self.sigmoid(self.conv2d4(x))
        x = torch.squeeze(x, dim=-2)
        x = torch.concat([x[..., 0, :], x[..., 1, :], x[..., 2, :], x[..., 3, 1:8]], dim=-1)
        return x

summary(ModelV4_3(), (24, 34), device="cpu")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 64, 24, 9]             320
            Conv2d-2            [-1, 64, 24, 9]          12,608
       BatchNorm2d-3            [-1, 64, 24, 9]             128
              ReLU-4            [-1, 64, 24, 9]               0
           Dropout-5            [-1, 64, 24, 9]               0
            Conv2d-6            [-1, 64, 24, 9]         200,768
       BatchNorm2d-7            [-1, 64, 24, 9]             128
              ReLU-8            [-1, 64, 24, 9]               0
         MaxPool2d-9            [-1, 64, 12, 9]               0
          Dropout-10            [-1, 64, 12, 9]               0
           Conv2d-11            [-1, 64, 12, 9]           4,160
           Conv2d-12            [-1, 64, 12, 9]         200,768
      BatchNorm2d-13            [-1, 64, 12, 9]             128
             ReLU-14            [-1, 64

#### Model V5
A modification of model V4 and V3, using inception networks instead

In [2]:
from torchsummary import summary


class ModelV5(torch.nn.Module):
    def __init__(self):
        super(ModelV5, self).__init__()
        self.dropout = torch.nn.Dropout(0.5)
        self.pool = torch.nn.MaxPool2d((2, 1))
        self.relu = torch.nn.ReLU()
        self.sigmoid = torch.nn.Sigmoid()
        
        self.bn1 = torch.nn.BatchNorm2d(128)
        self.bn2 = torch.nn.BatchNorm2d(184)

        self.conv2d1a1 = torch.nn.Conv2d(4, 16, (1, 1))
        self.conv2d1b1 = torch.nn.Conv2d(4, 32, (1, 1))
        self.conv2d1b2 = torch.nn.Conv2d(32, 32, (3, 7), padding=(1, 3))
        self.conv2d1c1 = torch.nn.Conv2d(4, 32, (1, 1))
        self.conv2d1c2 = torch.nn.Conv2d(32, 32, (5, 7), padding=(2, 3))
        self.conv2d1d1 = torch.nn.Conv2d(4, 48, (1, 1))
        self.conv2d1d2 = torch.nn.Conv2d(48, 48, (7, 7), padding=(3, 3))

        self.conv2d2a1 = torch.nn.Conv2d(128, 24, (1, 1))
        self.conv2d2b1 = torch.nn.Conv2d(128, 48, (1, 1))
        self.conv2d2b2 = torch.nn.Conv2d(48, 48, (3, 7), padding=(1, 3))
        self.conv2d2c1 = torch.nn.Conv2d(128, 48, (1, 1))
        self.conv2d2c2 = torch.nn.Conv2d(48, 48, (5, 7), padding=(2, 3))
        self.conv2d2d1 = torch.nn.Conv2d(128, 64, (1, 1))
        self.conv2d2d2 = torch.nn.Conv2d(64, 64, (7, 7), padding=(3, 3))
        
        self.conv2d3 = torch.nn.Conv2d(184, 4, (6, 7), padding=(0, 3))

    def forward(self, x):
        x_manzu = x[..., 0:9]
        x_pinzu = x[..., 9:18]
        x_soozu = x[..., 18:27]
        x_jihai = torch.nn.functional.pad(x[..., 27:34], (1, 1, 0, 0))
        x = torch.stack([x_manzu, x_pinzu, x_soozu, x_jihai], dim=-3)
        x = torch.cat([
            self.conv2d1a1(x), 
            self.conv2d1b2(self.relu(self.conv2d1b1(x))),
            self.conv2d1c2(self.relu(self.conv2d1c1(x))),
            self.conv2d1d2(self.relu(self.conv2d1d1(x))),
        ], dim=-3)
        x = self.dropout(self.pool(self.relu(self.bn1(x))))
        x = torch.cat([
            self.conv2d2a1(x), 
            self.conv2d2b2(self.relu(self.conv2d2b1(x))),
            self.conv2d2c2(self.relu(self.conv2d2c1(x))),
            self.conv2d2d2(self.relu(self.conv2d2d1(x))),
        ], dim=-3)
        x = self.dropout(self.pool(self.relu(self.bn2(x))))
        x = self.sigmoid(self.conv2d3(x))
        x = torch.squeeze(x, dim=-2)
        x = torch.concat([x[..., 0, :], x[..., 1, :], x[..., 2, :], x[..., 3, 1:8]], dim=-1)
        return x

summary(ModelV5(), (24, 34), device="cpu")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 16, 24, 9]              80
            Conv2d-2            [-1, 32, 24, 9]             160
              ReLU-3            [-1, 32, 24, 9]               0
            Conv2d-4            [-1, 32, 24, 9]          21,536
            Conv2d-5            [-1, 32, 24, 9]             160
              ReLU-6            [-1, 32, 24, 9]               0
            Conv2d-7            [-1, 32, 24, 9]          35,872
            Conv2d-8            [-1, 48, 24, 9]             240
              ReLU-9            [-1, 48, 24, 9]               0
           Conv2d-10            [-1, 48, 24, 9]         112,944
      BatchNorm2d-11           [-1, 128, 24, 9]             256
             ReLU-12           [-1, 128, 24, 9]               0
        MaxPool2d-13           [-1, 128, 12, 9]               0
          Dropout-14           [-1, 128

#### Model V6
Recoding the model from V4_3

In [2]:
from torchsummary import summary


class ModelV6(torch.nn.Module):
    def __init__(self):
        super(ModelV6, self).__init__()
        self.dropout = torch.nn.Dropout(0.5)
        self.pool = torch.nn.MaxPool2d((2, 1))
        self.relu = torch.nn.ReLU()
        self.sigmoid = torch.nn.Sigmoid()

        self.blocks = (
            ("block1", 4, 64, 2, (7, 7), True),
            ("block2", 64, 64, 2, (7, 7), False),
            ("block3", 64, 64, 2, (7, 7), True),
        )
        for block_args in self.blocks:
            self._create_block(*block_args)
        
        self.conv_final = torch.nn.Conv2d(64, 4, (6, 7), padding=(0, 3))

    def _create_block(self, prefix, in_layers, out_layers, depth, kernel_size, downsize):
        padding = tuple(i // 2 for i in kernel_size)
        setattr(self, f"{prefix}_conv_res", torch.nn.Conv2d(in_layers, out_layers, (1, 1)))
        for sublayer in range(depth):
            setattr(self, f"{prefix}_conv_{sublayer}", 
                    torch.nn.Conv2d(
                        in_layers if sublayer == 0 else out_layers, 
                        out_layers, kernel_size, padding=padding))
            setattr(self, f"{prefix}_bn_{sublayer}", torch.nn.BatchNorm2d(out_layers))

    def forward(self, x):
        x_manzu = x[..., 0:9]
        x_pinzu = x[..., 9:18]
        x_soozu = x[..., 18:27]
        x_jihai = torch.nn.functional.pad(x[..., 27:34], (1, 1, 0, 0))
        x = torch.stack([x_manzu, x_pinzu, x_soozu, x_jihai], dim=-3)
        for prefix, depth, downsize in map(lambda t: (t[0], t[3], t[5]), self.blocks):
            x_res = getattr(self, f"{prefix}_conv_res")(x)
            for sublayer in range(depth):
                if sublayer == (depth - 1):  # Final layer
                    x = getattr(self, f"{prefix}_conv_{sublayer}")(x)
                    x = torch.add(x, x_res)
                    x = getattr(self, f"{prefix}_bn_{sublayer}")(x)
                    x = self.relu(x)
                    if downsize:
                        x = self.pool(x)
                    x = self.dropout(x)
                else:
                    x = getattr(self, f"{prefix}_conv_{sublayer}")(x)
                    x = getattr(self, f"{prefix}_bn_{sublayer}")(x)
                    x = self.relu(x)
                    x = self.dropout(x)
        x = self.sigmoid(self.conv_final(x))
        x = torch.squeeze(x, dim=-2)
        x = torch.concat([x[..., 0, :], x[..., 1, :], x[..., 2, :], x[..., 3, 1:8]], dim=-1)
        return x

summary(ModelV6(), (24, 34), device="cpu")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 64, 24, 9]             320
            Conv2d-2            [-1, 64, 24, 9]          12,608
       BatchNorm2d-3            [-1, 64, 24, 9]             128
              ReLU-4            [-1, 64, 24, 9]               0
           Dropout-5            [-1, 64, 24, 9]               0
            Conv2d-6            [-1, 64, 24, 9]         200,768
       BatchNorm2d-7            [-1, 64, 24, 9]             128
              ReLU-8            [-1, 64, 24, 9]               0
         MaxPool2d-9            [-1, 64, 12, 9]               0
          Dropout-10            [-1, 64, 12, 9]               0
           Conv2d-11            [-1, 64, 12, 9]           4,160
           Conv2d-12            [-1, 64, 12, 9]         200,768
      BatchNorm2d-13            [-1, 64, 12, 9]             128
             ReLU-14            [-1, 64

In [2]:
# Increasing layers

from torchsummary import summary


class ModelV6_1(torch.nn.Module):
    def __init__(self):
        super(ModelV6_1, self).__init__()
        self.dropout = torch.nn.Dropout(0.5)
        self.pool = torch.nn.MaxPool2d((2, 1))
        self.relu = torch.nn.ReLU()
        self.sigmoid = torch.nn.Sigmoid()

        self.blocks = (
            ("block1a", 4, 64, 2, (7, 7), False),
            ("block1b", 64, 64, 2, (7, 7), True),
            ("block2a", 64, 64, 2, (7, 7), False),
            ("block2b", 64, 64, 2, (7, 7), True),
            ("block3a", 64, 64, 2, (7, 7), False),
        )
        for block_args in self.blocks:
            self._create_block(*block_args)
        
        self.conv_final = torch.nn.Conv2d(64, 4, (6, 7), padding=(0, 3))

    def _create_block(self, prefix, in_layers, out_layers, depth, kernel_size, downsize):
        padding = tuple(i // 2 for i in kernel_size)
        setattr(self, f"{prefix}_conv_res", torch.nn.Conv2d(in_layers, out_layers, (1, 1)))
        for sublayer in range(depth):
            setattr(self, f"{prefix}_conv_{sublayer}", 
                    torch.nn.Conv2d(
                        in_layers if sublayer == 0 else out_layers, 
                        out_layers, kernel_size, padding=padding))
            setattr(self, f"{prefix}_bn_{sublayer}", torch.nn.BatchNorm2d(out_layers))

    def forward(self, x):
        x_manzu = x[..., 0:9]
        x_pinzu = x[..., 9:18]
        x_soozu = x[..., 18:27]
        x_jihai = torch.nn.functional.pad(x[..., 27:34], (1, 1, 0, 0))
        x = torch.stack([x_manzu, x_pinzu, x_soozu, x_jihai], dim=-3)
        for prefix, depth, downsize in map(lambda t: (t[0], t[3], t[5]), self.blocks):
            x_res = getattr(self, f"{prefix}_conv_res")(x)
            for sublayer in range(depth):
                if sublayer == (depth - 1):  # Final layer
                    x = getattr(self, f"{prefix}_conv_{sublayer}")(x)
                    x = torch.add(x, x_res)
                    x = getattr(self, f"{prefix}_bn_{sublayer}")(x)
                    x = self.relu(x)
                    if downsize:
                        x = self.pool(x)
                    x = self.dropout(x)
                else:
                    x = getattr(self, f"{prefix}_conv_{sublayer}")(x)
                    x = getattr(self, f"{prefix}_bn_{sublayer}")(x)
                    x = self.relu(x)
                    x = self.dropout(x)
        x = self.sigmoid(self.conv_final(x))
        x = torch.squeeze(x, dim=-2)
        x = torch.concat([x[..., 0, :], x[..., 1, :], x[..., 2, :], x[..., 3, 1:8]], dim=-1)
        return x

summary(ModelV6_1(), (24, 34), device="cpu")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 64, 24, 9]             320
            Conv2d-2            [-1, 64, 24, 9]          12,608
       BatchNorm2d-3            [-1, 64, 24, 9]             128
              ReLU-4            [-1, 64, 24, 9]               0
           Dropout-5            [-1, 64, 24, 9]               0
            Conv2d-6            [-1, 64, 24, 9]         200,768
       BatchNorm2d-7            [-1, 64, 24, 9]             128
              ReLU-8            [-1, 64, 24, 9]               0
           Dropout-9            [-1, 64, 24, 9]               0
           Conv2d-10            [-1, 64, 24, 9]           4,160
           Conv2d-11            [-1, 64, 24, 9]         200,768
      BatchNorm2d-12            [-1, 64, 24, 9]             128
             ReLU-13            [-1, 64, 24, 9]               0
          Dropout-14            [-1, 64

### Training

In [None]:
from torch.utils.tensorboard import SummaryWriter
from collections import deque
import datetime
import tqdm

EPOCHS = 100
RUN_PATH = "runs/riichi-dsv01-mv06_1"

timestamp = datetime.datetime.today().strftime("%Y%m%d_%H%M%S")
train_writer = SummaryWriter(f"{RUN_PATH}/{timestamp}/train")
test_writer = SummaryWriter(f"{RUN_PATH}/{timestamp}/test")

model = ModelV6_1()
model = torch.nn.DataParallel(model)
model.to("cuda")

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=25, gamma=0.5)
loss_deque = deque(maxlen=len(train_dl))

for epoch in range(EPOCHS):
    pbar = tqdm.tqdm(
        range(len(train_dl)), 
        desc=f"Epoch {epoch+1}/{EPOCHS}", 
        position=0, leave=True, ncols=90)
    
    model.train(True)
    for i, (x, y) in enumerate(train_dl):
        x, y = x.to("cuda"), y.to("cuda")
        optimizer.zero_grad()
        pred = model(x)
        loss = criterion(pred, y)
        loss.backward()
        optimizer.step()
        loss_deque.append(loss.item())
        train_writer.add_scalar("loss", loss.item(), epoch * len(train_dl) + i)
        pbar.set_postfix({"loss": f"{sum(loss_deque) / len(loss_deque):.3f}"})
        pbar.update()
    scheduler.step()
    
    running_loss = 0.0
    model.eval()
    pbar = tqdm.tqdm(
        range(len(test_dl)), desc="Test", 
        position=0, leave=True, ncols=90
    )
    with torch.no_grad():
        for i, (x, y) in enumerate(test_dl):
            x, y = x.to("cuda"), y.to("cuda")
            pred = model(x)
            loss = criterion(pred, y)
            running_loss += loss.item()
            pbar.set_postfix({"loss": f"{running_loss / (i + 1):.3f}"})
            pbar.update()
        test_writer.add_scalar("loss", running_loss / len(test_dl), (epoch + 1) * len(train_dl))
pbar.close()

2024-06-22 19:38:52.340627: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-22 19:38:52.340660: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-22 19:38:52.341327: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-06-22 19:38:52.346030: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Epoch 1/100: 100%|████████████████████████| 1808/1808

### Evaluation

In [4]:
from IPython.display import Markdown

def arr2mjutf8(x, pred, y=None):
    mj_list_utf8 = [
        "🀇","🀈","🀉","🀊","🀋","🀌","🀍","🀎","🀏",
        "🀙","🀚","🀛","🀜","🀝","🀞","🀟","🀠","🀡",
        "🀐","🀑","🀒","🀓","🀔","🀕","🀖","🀗","🀘",
        "🀀","🀁","🀂","🀃","🀆","🀅","🀄︎"]
    river = list(map((lambda i: mj_list_utf8[i]), np.where(x.cpu().numpy())[1].tolist()))
    if y is not None:
        wait_y = list(map((lambda i: mj_list_utf8[i]), np.where(y.cpu().numpy())[0].tolist()))
    wait_pred = pred.detach().cpu().numpy().tolist()
    wait_pred = list(map((lambda l: f"{mj_list_utf8[l[0]]}{l[1]:.4f}"), enumerate(wait_pred)))
    wait_pred.insert(27, "<br>")
    wait_pred.insert(18, "<br>")
    wait_pred.insert(9, "<br>")
    output = f"<div style='line-height:1em;font-size:30px'>" + \
             f"River (x)<br>{''.join(river[:6])}<br>{''.join(river[6:12])}<br>{''.join(river[12:])}<br>" + \
             f"""{'' if y is None else f'Wait (y)<br>{"".join(wait_y)}<br><br>'}""" + \
             f"Prediction<br>|{'|'.join(wait_pred)}|</div>"
    display(Markdown(output))

In [12]:
torch.set_printoptions(sci_mode=False)
x, y = next(iter(test_dl))
x, y = x.to("cuda"), y.to("cuda")

arr2mjutf8(x[0], model(x)[0], y[0])

<div style='line-height:1em;font-size:30px'>River (x)<br>🀙🀅🀡🀙🀔🀗<br>🀎🀔<br><br>Wait (y)<br>🀉<br><br>Prediction<br>|🀇0.9997|🀈1.0000|🀉1.0000|🀊1.0000|🀋0.9999|🀌1.0000|🀍0.9997|🀎0.0000|🀏0.8968|<br>|🀙0.0000|🀚0.9982|🀛0.9998|🀜0.9982|🀝1.0000|🀞0.9999|🀟0.9997|🀠0.9997|🀡0.0000|<br>|🀐0.9999|🀑0.2273|🀒0.9893|🀓0.9999|🀔0.0000|🀕0.6622|🀖0.0876|🀗0.0000|🀘0.4366|<br>|🀀0.0000|🀁0.0000|🀂0.0000|🀃0.0000|🀆0.0000|🀅0.0000|🀄︎0.0000|</div>

In [20]:
import numpy as np

mj_list_utf8 = "🀇🀈🀉🀊🀋🀌🀍🀎🀏🀙🀚🀛🀜🀝🀞🀟🀠🀡🀐🀑🀒🀓🀔🀕🀖🀗🀘🀀🀁🀂🀃🀆🀅🀄︎"
river_ = "🀃🀚🀂🀐🀡🀊🀗🀀🀌"
river_ = list(map((lambda i: None if mj_list_utf8.index(i) == 34 else mj_list_utf8.index(i)), river_))
river_ = list(filter((lambda i: i is not None), river_))
river = np.zeros((24, 34), dtype=np.int8)
river[np.arange(len(river_)), river_] = 1
river = torch.tensor(np.expand_dims(river, 0).astype(np.float32)).to("cuda")
model.eval()
arr2mjutf8(river[0], model(river)[0])

<div style='line-height:1em;font-size:30px'>River (x)<br>🀃🀚🀂🀐🀡🀊<br>🀗🀀🀌<br><br>Prediction<br>|🀇0.0001|🀈0.9375|🀉0.0001|🀊0.0000|🀋1.0000|🀌0.0000|🀍0.2411|🀎0.9999|🀏0.0060|<br>|🀙0.0001|🀚0.0000|🀛0.9998|🀜0.9999|🀝1.0000|🀞1.0000|🀟0.9999|🀠1.0000|🀡0.0000|<br>|🀐0.0001|🀑0.9998|🀒1.0000|🀓1.0000|🀔1.0000|🀕1.0000|🀖0.9999|🀗0.0000|🀘0.8126|<br>|🀀0.0000|🀁0.0000|🀂0.0000|🀃0.0000|🀆0.0000|🀅0.0000|🀄︎0.0000|</div>

In [21]:
import numpy as np

mj_list_utf8 = "🀇🀈🀉🀊🀋🀌🀍🀎🀏🀙🀚🀛🀜🀝🀞🀟🀠🀡🀐🀑🀒🀓🀔🀕🀖🀗🀘🀀🀁🀂🀃🀆🀅🀄︎"
river_ = "🀃🀚🀂🀐🀡🀗🀀🀌"
river_ = list(map((lambda i: None if mj_list_utf8.index(i) == 34 else mj_list_utf8.index(i)), river_))
river_ = list(filter((lambda i: i is not None), river_))
river = np.zeros((24, 34), dtype=np.int8)
river[np.arange(len(river_)), river_] = 1
river = torch.tensor(np.expand_dims(river, 0).astype(np.float32)).to("cuda")
model.eval()
arr2mjutf8(river[0], model(river)[0])

<div style='line-height:1em;font-size:30px'>River (x)<br>🀃🀚🀂🀐🀡🀗<br>🀀🀌<br><br>Prediction<br>|🀇0.9935|🀈1.0000|🀉0.9935|🀊1.0000|🀋1.0000|🀌0.0000|🀍0.9682|🀎0.9926|🀏0.0319|<br>|🀙0.0001|🀚0.0000|🀛0.9995|🀜0.9991|🀝1.0000|🀞1.0000|🀟0.9999|🀠0.9999|🀡0.0000|<br>|🀐0.0000|🀑0.9988|🀒1.0000|🀓1.0000|🀔0.9998|🀕1.0000|🀖1.0000|🀗0.0000|🀘0.1626|<br>|🀀0.0000|🀁0.0000|🀂0.0000|🀃0.0000|🀆0.0000|🀅0.0000|🀄︎0.0000|</div>