In [20]:
from dlgo.data.parallel_processor import GoDataProcessor
from dlgo.encoders.oneplane import OnePlaneEncoder
from dlgo.networks.small import Small

import torch
import torch.nn as nn
from torch.optim import SGD

In [21]:
go_board_rows, go_board_cols = 19, 19
num_classes = go_board_rows * go_board_cols
num_games = 100

def compute_acc(argmax, y):
    count = 0
    for i in range(len(argmax)):
        if argmax[i] == y[i]:
            count += 1
    return count / len(argmax)

def initialize_weights(m):
    if isinstance(m, nn.Conv2d):
        nn.init.kaiming_uniform_(m.weight.data,nonlinearity='relu')
        if m.bias is not None:
            nn.init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.BatchNorm2d):
        nn.init.constant_(m.weight.data, 1)
        nn.init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.Linear):
        nn.init.kaiming_uniform_(m.weight.data)
        nn.init.constant_(m.bias.data, 0)

BATCH_SIZE = 128
LEARNING_RATE = 0.001
NUM_EPOCHES = 5

In [22]:
encoder = OnePlaneEncoder((go_board_rows, go_board_cols))  # First we create an encoder of board size.

processor = GoDataProcessor(encoder=encoder.name())  # Then we initialize a Go Data processor with it.

generator = processor.load_go_data('train', num_games, use_generator=True)  # From the processor we create two data generators, for training and testing.
test_generator = processor.load_go_data('test', num_games, use_generator=True)

model = Small(go_board_rows, encoder.num_planes).cuda()

optimizer = SGD(model.parameters())
loss_fn = nn.CrossEntropyLoss()

total_steps = generator.get_num_samples() // BATCH_SIZE
print(generator.num_samples)

>>> Reading cached index page
KGS-2019_04-19-1255-.tar.gz 1255
KGS-2019_03-19-1478-.tar.gz 1478
KGS-2019_02-19-1412-.tar.gz 1412
KGS-2019_01-19-2095-.tar.gz 2095
KGS-2018_12-19-1992-.tar.gz 1992
KGS-2018_11-19-1879-.tar.gz 1879
KGS-2018_10-19-1209-.tar.gz 1209
KGS-2018_09-19-1587-.tar.gz 1587
KGS-2018_08-19-1447-.tar.gz 1447
KGS-2018_07-19-949-.tar.gz 949
KGS-2018_06-19-1002-.tar.gz 1002
KGS-2018_05-19-1590-.tar.gz 1590
KGS-2018_04-19-1612-.tar.gz 1612
KGS-2018_03-19-833-.tar.gz 833
KGS-2018_02-19-1167-.tar.gz 1167
KGS-2018_01-19-1526-.tar.gz 1526
KGS-2017_12-19-1488-.tar.gz 1488
KGS-2017_11-19-945-.tar.gz 945
KGS-2017_10-19-1351-.tar.gz 1351
KGS-2017_09-19-1353-.tar.gz 1353
KGS-2017_08-19-2205-.tar.gz 2205
KGS-2017_07-19-1191-.tar.gz 1191
KGS-2017_06-19-910-.tar.gz 910
KGS-2017_05-19-847-.tar.gz 847
KGS-2017_04-19-913-.tar.gz 913
KGS-2017_03-19-717-.tar.gz 717
KGS-2017_02-19-525-.tar.gz 525
KGS-2017_01-19-733-.tar.gz 733
KGS-2016_12-19-1208-.tar.gz 1208
KGS-2016_11-19-980-.tar.gz 980


In [23]:
model

Small(
  (conv1): Conv2d(1, 48, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
  (conv2): Conv2d(48, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (conv3): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (conv4): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (bn1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=11552, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=361, bias=True)
)

In [9]:
import numpy as np

In [11]:

x = torch.Tensor(np.random.rand(32, 1, 19, 19))
x

tensor([[[[0.0309, 0.9091, 0.0691,  ..., 0.9886, 0.0867, 0.8542],
          [0.8647, 0.5342, 0.7568,  ..., 0.4739, 0.2088, 0.1639],
          [0.3123, 0.4012, 0.8679,  ..., 0.6941, 0.2763, 0.1505],
          ...,
          [0.0465, 0.8297, 0.8725,  ..., 0.7844, 0.0643, 0.9094],
          [0.0106, 0.7013, 0.5412,  ..., 0.8544, 0.6170, 0.4582],
          [0.3127, 0.6905, 0.2242,  ..., 0.0474, 0.8159, 0.5017]]],


        [[[0.6942, 0.2762, 0.6338,  ..., 0.1202, 0.2771, 0.5740],
          [0.8529, 0.3737, 0.4984,  ..., 0.8490, 0.6774, 0.7114],
          [0.9966, 0.3081, 0.1232,  ..., 0.1555, 0.7079, 0.4874],
          ...,
          [0.9467, 0.3802, 0.9262,  ..., 0.6556, 0.5944, 0.9155],
          [0.7490, 0.7303, 0.2930,  ..., 0.9914, 0.9121, 0.2489],
          [0.5065, 0.2035, 0.6502,  ..., 0.1183, 0.6967, 0.1514]]],


        [[[0.6305, 0.2060, 0.6713,  ..., 0.3798, 0.9796, 0.7821],
          [0.5201, 0.4192, 0.7338,  ..., 0.9780, 0.1205, 0.4547],
          [0.8291, 0.8268, 0.1669,  ..

In [15]:
y = torch.Tensor(np.random.randint(0, 362, size=(32)))
y = torch.tensor(y, dtype=torch.long)
y

  y = torch.tensor(y, dtype=torch.long)


tensor([ 19,  63, 332, 347,  38, 167, 228, 189, 154,  71, 314, 301, 213, 125,
        151, 197,  54, 300, 250, 338,  66,  20, 336, 321,  22, 195,  48, 299,
        215, 246, 165, 114])

In [16]:
x.shape

torch.Size([32, 1, 19, 19])

In [17]:
model.train()
tot_loss = 0.0
steps = 0

for x, y in generator.generate(BATCH_SIZE, num_classes):
    steps += 1
    optimizer.zero_grad()
    x = x.cuda()
    y_ = model(x)
    loss = loss_fn(y_, y.cuda()) 
    loss.backward()
    tot_loss += loss.item()
    optimizer.step()

    if steps >= total_steps:
        break

In [18]:
y_

tensor([[0.0020, 0.0029, 0.0036,  ..., 0.0015, 0.0033, 0.0029],
        [0.0014, 0.0025, 0.0023,  ..., 0.0018, 0.0030, 0.0021],
        [0.0021, 0.0029, 0.0036,  ..., 0.0016, 0.0032, 0.0030],
        ...,
        [0.0026, 0.0029, 0.0025,  ..., 0.0023, 0.0041, 0.0024],
        [0.0021, 0.0034, 0.0031,  ..., 0.0023, 0.0031, 0.0023],
        [0.0026, 0.0029, 0.0026,  ..., 0.0025, 0.0041, 0.0025]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)

In [19]:
y_.shape

torch.Size([128, 361])

In [24]:
model.train()
tot_loss = 0.0
steps = 0

x, y = next(iter(generator.generate(BATCH_SIZE, num_classes)))

In [25]:
y

tensor([ 73,  71,  50, 288, 249, 211, 290, 268, 250, 270, 267, 286, 269, 289,
        287, 306, 232, 154,  43,  65,  40,  59,  41,  61,  42,  62,  63,  82,
         83,  64,  44, 101,  52, 309, 193, 192, 156, 136, 102,  85, 121, 138,
        123,  87,  67,  86, 125,  69,  51, 127, 165, 145, 107,  70, 108, 109,
        128, 126, 106,  72,  53, 110, 147, 163,  92, 141, 162, 182, 104, 149,
        168, 150, 111, 129, 186, 203, 224, 180, 119, 137,  81, 100,  99,  80,
        120,  81,  98,  77,  96,  39,  20,  19,   1,  97, 116,  78, 115, 117,
        118,  76,  57,  21,  22, 134, 158, 139, 140, 159, 160, 178, 179, 198,
        161, 199, 144, 164, 142, 263, 317, 129, 186,  70,  92, 109, 282, 262,
        224, 260])

In [26]:
x.shape

torch.Size([128, 1, 19, 19])

In [38]:
for x, y in generator.generate(BATCH_SIZE, num_classes):
    steps += 1
    optimizer.zero_grad()
    x = x.cuda()
    y_ = model(x)
    loss = loss_fn(y_, y.cuda()) 
    loss.backward()
    tot_loss += loss.item()
    optimizer.step()

    if steps >= total_steps:
        break

In [39]:
y_

tensor([[0.0026, 0.0027, 0.0031,  ..., 0.0026, 0.0025, 0.0028],
        [0.0025, 0.0027, 0.0030,  ..., 0.0026, 0.0025, 0.0027],
        [0.0026, 0.0026, 0.0031,  ..., 0.0026, 0.0025, 0.0028],
        ...,
        [0.0026, 0.0028, 0.0031,  ..., 0.0025, 0.0025, 0.0029],
        [0.0026, 0.0027, 0.0028,  ..., 0.0027, 0.0026, 0.0027],
        [0.0027, 0.0027, 0.0031,  ..., 0.0026, 0.0026, 0.0030]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)

In [40]:
y_.shape

torch.Size([128, 361])

In [41]:
max_value, argmax = torch.max(y_, dim=1)

In [42]:
max_value

tensor([0.0035, 0.0035, 0.0035, 0.0034, 0.0035, 0.0034, 0.0035, 0.0034, 0.0036,
        0.0034, 0.0037, 0.0034, 0.0037, 0.0035, 0.0038, 0.0037, 0.0039, 0.0036,
        0.0037, 0.0036, 0.0037, 0.0035, 0.0039, 0.0036, 0.0038, 0.0037, 0.0037,
        0.0038, 0.0040, 0.0037, 0.0041, 0.0040, 0.0040, 0.0038, 0.0043, 0.0038,
        0.0042, 0.0037, 0.0043, 0.0039, 0.0042, 0.0039, 0.0044, 0.0039, 0.0043,
        0.0039, 0.0043, 0.0041, 0.0043, 0.0040, 0.0044, 0.0042, 0.0043, 0.0046,
        0.0045, 0.0044, 0.0046, 0.0043, 0.0047, 0.0046, 0.0047, 0.0045, 0.0048,
        0.0047, 0.0053, 0.0047, 0.0050, 0.0049, 0.0051, 0.0047, 0.0050, 0.0048,
        0.0051, 0.0049, 0.0051, 0.0051, 0.0049, 0.0050, 0.0049, 0.0050, 0.0048,
        0.0055, 0.0048, 0.0055, 0.0052, 0.0054, 0.0053, 0.0050, 0.0052, 0.0048,
        0.0051, 0.0046, 0.0051, 0.0048, 0.0053, 0.0051, 0.0056, 0.0052, 0.0052,
        0.0052, 0.0055, 0.0052, 0.0055, 0.0053, 0.0052, 0.0055, 0.0053, 0.0054,
        0.0054, 0.0056, 0.0053, 0.0054, 

In [24]:
for param in model.parameters():
    print(param.requires_grad)

True
True
True
True
True
True
True
True
True
True
True
True


In [26]:
print(optimizer.param_groups[0]['lr'])

0.001


In [40]:
x.shape

torch.Size([128, 1, 19, 19])

In [41]:
x[0][0]

tensor([[ 0.,  0.,  0., -1.,  1.,  0.,  1.,  0.,  1.,  0.,  1., -1., -1.,  1.,
          0.,  1.,  1., -1., -1.],
        [ 0.,  0., -1.,  0., -1.,  1.,  1.,  0.,  0.,  1.,  0.,  1., -1., -1.,
          1.,  0.,  1., -1.,  0.],
        [ 0., -1., -1., -1.,  0., -1.,  1.,  1.,  1.,  1.,  1.,  1.,  0., -1.,
          1.,  1.,  1.,  1., -1.],
        [-1.,  0., -1., -1., -1., -1.,  1., -1., -1., -1., -1.,  1., -1., -1.,
         -1., -1.,  1.,  1.,  1.],
        [-1., -1.,  1., -1.,  1., -1., -1.,  0., -1.,  0., -1., -1.,  1.,  0.,
          1., -1., -1., -1., -1.],
        [ 1.,  1.,  1., -1.,  1., -1.,  0., -1.,  0., -1.,  1., -1.,  1.,  1.,
          0., -1.,  1.,  0.,  0.],
        [ 0.,  0.,  1.,  1.,  1.,  1., -1.,  1., -1., -1.,  1.,  1.,  0.,  1.,
          1., -1., -1.,  0.,  0.],
        [ 1.,  1.,  0.,  0.,  0.,  1.,  1.,  1.,  0., -1., -1.,  1.,  1.,  1.,
         -1.,  0., -1.,  0.,  0.],
        [ 1., -1.,  1.,  0.,  0.,  0., -1.,  1., -1., -1., -1., -1.,  1., -1.,
         