In [2]:
import wandb
import pathlib
import urllib
import zstandard
import chess
import torch
import numpy as np
from torch import nn
import math
import time

from torch.nn import init
from torch.utils.data import DataLoader
from torchinfo import summary

In [3]:
BATCH_SIZE = 64
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("DEVICE: " + str(DEVICE))

DEVICE: cuda


In [4]:
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mmatik[0m ([33mmatik001[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [5]:
dataset_train = torch.load('dataset_train.save')
dataset_test = torch.load('dataset_test.save')
dataset_valid = torch.load('dataset_valid.save')

loader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True)
loader_test = DataLoader(dataset_test, batch_size=BATCH_SIZE, shuffle=True)
loader_valid = DataLoader(dataset_valid, batch_size=BATCH_SIZE, shuffle=True)

print("Batches train", len(loader_train))
print("Batches valid", len(loader_valid))
print("Batches test", len(loader_test))

Batches train 13126
Batches valid 500
Batches test 500


In [6]:
DATA_SHAPE = (BATCH_SIZE,) +  tuple(dataset_train[0][0].shape)

In [8]:
class Model(nn.Module):
    def __init__(self, *args, **kwargs):
        super(Model, self).__init__()
        self.classifier = nn.Sequential(*args, **kwargs)


    def init_xavier(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                init.xavier_normal_(m.weight)
    
    def calc_accuracy(self, loader):
        correct = 0
        total = 0
        with torch.no_grad():
            for batch in loader:
                features, labels = batch
                labels = labels.to(DEVICE) 
                features = features.to(DEVICE)
                outputs = self(features)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0) 
                correct += (predicted == labels).sum().item() 
        return correct / total
    
    def forward(self, X):
        return self.classifier.forward(X)




In [13]:
model0 = Model(nn.Conv2d(21, 8 * 8, kernel_size=4, padding=4),
              nn.ReLU(),
              nn.Conv2d(8 * 8, 4 * 4, kernel_size=2, padding=4),
              nn.ReLU(),
              nn.MaxPool2d(kernel_size=4, stride=1),
              nn.Conv2d(4*4, 8 * 8, kernel_size=2, padding=4),
              nn.ReLU(),
              nn.Conv2d(8 * 8, 1, kernel_size=4, padding=4),
              nn.ReLU(),
              nn.Flatten(),
              nn.Linear(841, 256),
              nn.ReLU(),
              nn.Linear(256, 64),
              nn.ReLU(),
              nn.Linear(64, 11)
              )

architecture_summary = summary(model0, DATA_SHAPE)
print(architecture_summary)

Layer (type:depth-idx)                   Output Shape              Param #
Model                                    [64, 11]                  --
├─Sequential: 1-1                        --                        --
│    └─Conv2d: 2-1                       [64, 64, 13, 13]          21,568
│    └─ReLU: 2-2                         [64, 64, 13, 13]          --
│    └─Conv2d: 2-3                       [64, 16, 20, 20]          4,112
│    └─ReLU: 2-4                         [64, 16, 20, 20]          --
│    └─MaxPool2d: 2-5                    [64, 16, 17, 17]          --
│    └─Conv2d: 2-6                       [64, 64, 24, 24]          4,160
│    └─ReLU: 2-7                         [64, 64, 24, 24]          --
│    └─Conv2d: 2-8                       [64, 1, 29, 29]           1,025
│    └─ReLU: 2-9                         [64, 1, 29, 29]           --
│    └─Flatten: 2-10                     [64, 841]                 --
│    └─Linear: 2-11                      [64, 256]                 215,5

In [14]:
model1 = Model(nn.Conv2d(21, 300, kernel_size=5, padding=5),
              nn.ReLU(),
              nn.Conv2d(300, 30, kernel_size=2, padding=2),
              nn.ReLU(),
              nn.Flatten(),
              nn.Linear(8670, 20),
              nn.ReLU(),
              nn.Linear(20, 11)
              )

architecture_summary = summary(model1, DATA_SHAPE)
print(architecture_summary)

Layer (type:depth-idx)                   Output Shape              Param #
Model                                    [64, 11]                  --
├─Sequential: 1-1                        --                        --
│    └─Conv2d: 2-1                       [64, 300, 14, 14]         157,800
│    └─ReLU: 2-2                         [64, 300, 14, 14]         --
│    └─Conv2d: 2-3                       [64, 30, 17, 17]          36,030
│    └─ReLU: 2-4                         [64, 30, 17, 17]          --
│    └─Flatten: 2-5                      [64, 8670]                --
│    └─Linear: 2-6                       [64, 20]                  173,420
│    └─ReLU: 2-7                         [64, 20]                  --
│    └─Linear: 2-8                       [64, 11]                  231
Total params: 367,481
Trainable params: 367,481
Non-trainable params: 0
Total mult-adds (Units.GIGABYTES): 2.66
Input size (MB): 0.34
Forward/backward pass size (MB): 34.56
Params size (MB): 1.47
Estimated Tota

In [15]:
model2 = Model(nn.Conv2d(21, 70, kernel_size=4, padding=2),
              nn.ReLU(),
              nn.Conv2d(70, 70, kernel_size=2, padding=2),
              nn.ReLU(),
              nn.Conv2d(70, 50, kernel_size=2, padding=2),
              nn.ReLU(),
              nn.Conv2d(50, 2, kernel_size=2, padding=2),
              nn.ReLU(),
              nn.Flatten(),
              nn.Linear(648, 30),
              nn.ReLU(),
              nn.Linear(30, 30),
              nn.ReLU(),
              nn.Linear(30, 11)
              )

architecture_summary = summary(model2, DATA_SHAPE)
print(architecture_summary)

Layer (type:depth-idx)                   Output Shape              Param #
Model                                    [64, 11]                  --
├─Sequential: 1-1                        --                        --
│    └─Conv2d: 2-1                       [64, 70, 9, 9]            23,590
│    └─ReLU: 2-2                         [64, 70, 9, 9]            --
│    └─Conv2d: 2-3                       [64, 70, 12, 12]          19,670
│    └─ReLU: 2-4                         [64, 70, 12, 12]          --
│    └─Conv2d: 2-5                       [64, 50, 15, 15]          14,050
│    └─ReLU: 2-6                         [64, 50, 15, 15]          --
│    └─Conv2d: 2-7                       [64, 2, 18, 18]           402
│    └─ReLU: 2-8                         [64, 2, 18, 18]           --
│    └─Flatten: 2-9                      [64, 648]                 --
│    └─Linear: 2-10                      [64, 30]                  19,470
│    └─ReLU: 2-11                        [64, 30]                  -

In [39]:
MODELS_LIST = [model0, model1, model2]

In [40]:
def accuracy(out,truth):
    return torch.argmax(out,dim=1) == truth

In [41]:
def train(model, criterion, optimizer, epoch):
    model = model.to(DEVICE)
    model.init_xavier()
    criterion = criterion.to(DEVICE)
    batches_amount = len(loader_train)
    
    for i in range(epoch):
        time_started = time.time() * 1000
        loss_sum=0.0
        accuracy_sum=0.0
        for batch, truth in loader_train:
            batch = batch.to(DEVICE)
            truth = truth.to(DEVICE)
            
            optimizer.zero_grad()
            out = model.forward(batch)
            loss = criterion(out, truth)
            loss.backward()
            optimizer.step()
            
            accuracy_value = accuracy(out,truth).sum()/BATCH_SIZE

            loss_sum+=loss.item()
            accuracy_sum+=accuracy_value.item()

        passed_time = math.ceil(time.time() * 1000 - time_started)
        loss_average = loss_sum/batches_amount
        accuracy_average = accuracy_sum/batches_amount
        validation_accuracy = model.calc_accuracy(loader_valid)

        print(f"Epoch [{i+1}/{epoch}], train_loss: {loss_average}, train_accuracy: {accuracy_average}, validation_accuracy: {validation_accuracy}, time: {passed_time/1000}s")
        wandb.log({
            "epoch": i + 1, 
            "train_loss": loss_average, 
            "train_accuracy" : accuracy_average,
            "validation_accuracy" : validation_accuracy}
        )

In [None]:
sweep_config = {
    'method': 'bayes',  # random, grid or bayes
    'name': 'sweep-chess-tactics',
    'metric': {
        'goal': 'minimize', 
        'name': 'train_loss'
    },
    'parameters': {
        'epochs': {
            # 'values': [5, 10, 30]
            'values': [15, 30]
        },
        'learning_rate': {
            # 'values': [0.003, 0.001, 0.0003]
            'values': [0.0003]
        },
        'optimizer': {
            # 'values': ['sgd', 'adam']
            'values': ['adam']
        },
        'criterion': {
            'values': ['CrossEntropy']
        },
        'model': {
            'values': list(range(len(MODELS_LIST)))
        }
     },

}

def create_optimizer(model, optimizer):
    if optimizer == "sgd":
        optimizer = torch.optim.SGD
    elif optimizer == "adam":
        optimizer = torch.optim.Adam
    return optimizer

def create_criterion(criterion = "CrossEntropy"):
    if criterion == "CrossEntropy":
        return nn.CrossEntropyLoss()

def main():
    with wandb.init(config=None):
        config = wandb.config
        model = MODELS_LIST[int(config['model'])]
        opt_fn = create_optimizer(model, wandb.config.optimizer)
        crt_fn = create_criterion(wandb.config.criterion)
        train(model, crt_fn, opt_fn(model.classifier.parameters(), lr=wandb.config.learning_rate), wandb.config.epochs)
        model_path = f"model{config['model']}_epochs{config['epochs']}_learning_rate{config['learning_rate']}_optimizer{config['optimizer']}.save"
        torch.save(model, model_path)


sweep_id = wandb.sweep(sweep_config, project="chess-tactics-swp")
wandb.agent(sweep_id, main)

Create sweep with ID: espidqob
Sweep URL: https://wandb.ai/matik001/chess-tactics-swp/sweeps/espidqob


[34m[1mwandb[0m: Agent Starting Run: 6n9ftkir with config:
[34m[1mwandb[0m: 	criterion: CrossEntropy
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	learning_rate: 0.0003
[34m[1mwandb[0m: 	model: 0
[34m[1mwandb[0m: 	optimizer: adam


Epoch [1/30], train_loss: 1.0812824478277288, train_accuracy: 0.63950127609325, validation_accuracy: 0.70159375, time: 147.188s
Epoch [2/30], train_loss: 0.8199746288346678, train_accuracy: 0.7283587917111077, validation_accuracy: 0.7399375, time: 122.31s
Epoch [3/30], train_loss: 0.739751915678861, train_accuracy: 0.7543187185738229, validation_accuracy: 0.75734375, time: 122.702s
Epoch [4/30], train_loss: 0.6936169724225199, train_accuracy: 0.7686795291787293, validation_accuracy: 0.7665, time: 115.357s
Epoch [5/30], train_loss: 0.66210578470791, train_accuracy: 0.7784109305957642, validation_accuracy: 0.77378125, time: 115.567s
Epoch [6/30], train_loss: 0.6378262987825523, train_accuracy: 0.7856508551729392, validation_accuracy: 0.78003125, time: 127.833s
Epoch [7/30], train_loss: 0.6189672924100253, train_accuracy: 0.7922063080908122, validation_accuracy: 0.78321875, time: 124.694s
Epoch [8/30], train_loss: 0.6034148237338048, train_accuracy: 0.796961898141094, validation_accuracy:

VBox(children=(Label(value='0.001 MB of 0.021 MB uploaded\r'), FloatProgress(value=0.057295196782793364, max=1…

0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
train_accuracy,▁▄▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇█████████████
train_loss,█▅▄▄▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
validation_accuracy,▁▃▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇███████████

0,1
epoch,30.0
train_accuracy,0.83701
train_loss,0.47819
validation_accuracy,0.80378


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: b1hxypog with config:
[34m[1mwandb[0m: 	criterion: CrossEntropy
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	model: 1
[34m[1mwandb[0m: 	optimizer: sgd


Epoch [1/30], train_loss: 1.8616010130550664, train_accuracy: 0.37662963774188635, validation_accuracy: 0.37634375, time: 145.315s
Epoch [2/30], train_loss: 1.7153217190016374, train_accuracy: 0.3961971944994667, validation_accuracy: 0.433625, time: 138.179s


In [ ]:
wandb.finish()

# Testy modelu

In [9]:
model = torch.load('model0_epochs30_learning_rate0.0003_optimizeradam.save')
model.to(DEVICE)

Model(
  (classifier): Sequential(
    (0): Conv2d(21, 64, kernel_size=(4, 4), stride=(1, 1), padding=(4, 4))
    (1): ReLU()
    (2): Conv2d(64, 16, kernel_size=(2, 2), stride=(1, 1), padding=(4, 4))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=4, stride=1, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(16, 64, kernel_size=(2, 2), stride=(1, 1), padding=(4, 4))
    (6): ReLU()
    (7): Conv2d(64, 1, kernel_size=(4, 4), stride=(1, 1), padding=(4, 4))
    (8): ReLU()
    (9): Flatten(start_dim=1, end_dim=-1)
    (10): Linear(in_features=841, out_features=256, bias=True)
    (11): ReLU()
    (12): Linear(in_features=256, out_features=64, bias=True)
    (13): ReLU()
    (14): Linear(in_features=64, out_features=11, bias=True)
    (15): LogSoftmax(dim=None)
  )
)

In [10]:
model.calc_accuracy(loader_train)

  return self._call_impl(*args, **kwargs)


0.8402088977986448

In [11]:
model.calc_accuracy(loader_test)

0.80359375

In [12]:
model.calc_accuracy(loader_valid)

0.80378125