In [1]:
import wandb
import pathlib
import urllib
import zstandard
import chess
import torch
import numpy as np
from torch import nn
import math
import time

from torch.nn import init
from torch.utils.data import DataLoader
from torchinfo import summary

In [2]:
BATCH_SIZE = 64
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("DEVICE: " + str(DEVICE))

DEVICE: cuda


In [3]:
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mmatik[0m ([33mmatik001[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [4]:
dataset_train = torch.load('dataset_train.save')
dataset_test = torch.load('dataset_test.save')
dataset_valid = torch.load('dataset_valid.save')

loader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True)
loader_test = DataLoader(dataset_test, batch_size=BATCH_SIZE, shuffle=True)
loader_valid = DataLoader(dataset_valid, batch_size=BATCH_SIZE, shuffle=True)

print("Batches train", len(loader_train))
print("Batches valid", len(loader_valid))
print("Batches test", len(loader_test))

Batches train 12847
Batches valid 500
Batches test 500


In [5]:
DATA_SHAPE = (BATCH_SIZE,) +  tuple(dataset_train[0][0].shape)

In [6]:
class Model(nn.Module):
    def __init__(self, *args, **kwargs):
        super(Model, self).__init__()
        self.classifier = nn.Sequential(*args, **kwargs)


    def init_xavier(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                init.xavier_normal_(m.weight)
    
    def calc_accuracy(self, loader):
        correct = 0
        total = 0
        self.eval()
        with torch.no_grad():
            for batch in loader:
                features, labels = batch
                labels = labels.to(DEVICE) 
                features = features.to(DEVICE)
                outputs = self(features)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0) 
                correct += (predicted == labels).sum().item() 
        self.train()
        return correct / total
    
    def forward(self, X):
        return self.classifier.forward(X)




In [7]:
model0 = Model(nn.Conv2d(21, 8 * 8, kernel_size=4, padding=4),
              nn.ReLU(),
              nn.Conv2d(8 * 8, 4 * 4, kernel_size=2, padding=4),
              nn.ReLU(),
              nn.MaxPool2d(kernel_size=4, stride=1),
              nn.Conv2d(4*4, 8 * 8, kernel_size=2, padding=4),
              nn.ReLU(),
              nn.Conv2d(8 * 8, 1, kernel_size=4, padding=4),
              nn.ReLU(),
              nn.Flatten(),
              nn.Linear(841, 256),
              nn.ReLU(),
              nn.Linear(256, 64),
              nn.ReLU(),
              nn.Linear(64, 11)
              )

architecture_summary = summary(model0, DATA_SHAPE)
print(architecture_summary)

Layer (type:depth-idx)                   Output Shape              Param #
Model                                    [64, 11]                  --
├─Sequential: 1-1                        --                        --
│    └─Conv2d: 2-1                       [64, 64, 13, 13]          21,568
│    └─ReLU: 2-2                         [64, 64, 13, 13]          --
│    └─Conv2d: 2-3                       [64, 16, 20, 20]          4,112
│    └─ReLU: 2-4                         [64, 16, 20, 20]          --
│    └─MaxPool2d: 2-5                    [64, 16, 17, 17]          --
│    └─Conv2d: 2-6                       [64, 64, 24, 24]          4,160
│    └─ReLU: 2-7                         [64, 64, 24, 24]          --
│    └─Conv2d: 2-8                       [64, 1, 29, 29]           1,025
│    └─ReLU: 2-9                         [64, 1, 29, 29]           --
│    └─Flatten: 2-10                     [64, 841]                 --
│    └─Linear: 2-11                      [64, 256]                 215,5

In [8]:
model1 = Model(nn.Conv2d(21, 300, kernel_size=5, padding=5),
              nn.ReLU(),
              nn.Conv2d(300, 30, kernel_size=2, padding=2),
              nn.ReLU(),
              nn.Flatten(),
              nn.Dropout(0.5), 
              nn.Linear(8670, 20),
              nn.ReLU(),
              nn.Linear(20, 11)
              )

architecture_summary = summary(model1, DATA_SHAPE)
print(architecture_summary)

Layer (type:depth-idx)                   Output Shape              Param #
Model                                    [64, 11]                  --
├─Sequential: 1-1                        --                        --
│    └─Conv2d: 2-1                       [64, 300, 14, 14]         157,800
│    └─ReLU: 2-2                         [64, 300, 14, 14]         --
│    └─Conv2d: 2-3                       [64, 30, 17, 17]          36,030
│    └─ReLU: 2-4                         [64, 30, 17, 17]          --
│    └─Flatten: 2-5                      [64, 8670]                --
│    └─Dropout: 2-6                      [64, 8670]                --
│    └─Linear: 2-7                       [64, 20]                  173,420
│    └─ReLU: 2-8                         [64, 20]                  --
│    └─Linear: 2-9                       [64, 11]                  231
Total params: 367,481
Trainable params: 367,481
Non-trainable params: 0
Total mult-adds (Units.GIGABYTES): 2.66
Input size (MB): 0.34
Forwar

In [10]:
model2 = Model(nn.Conv2d(21, 300, kernel_size=5, padding=5),
              nn.ReLU(),
              nn.Conv2d(300, 30, kernel_size=2, padding=2),
              nn.ReLU(),
              nn.Flatten(),
              nn.BatchNorm1d(8670),
              nn.Linear(8670, 20),
              nn.ReLU(),
              nn.Linear(20, 11)
              )

architecture_summary = summary(model2, DATA_SHAPE)
print(architecture_summary)

Layer (type:depth-idx)                   Output Shape              Param #
Model                                    [64, 11]                  --
├─Sequential: 1-1                        --                        --
│    └─Conv2d: 2-1                       [64, 300, 14, 14]         157,800
│    └─ReLU: 2-2                         [64, 300, 14, 14]         --
│    └─Conv2d: 2-3                       [64, 30, 17, 17]          36,030
│    └─ReLU: 2-4                         [64, 30, 17, 17]          --
│    └─Flatten: 2-5                      [64, 8670]                --
│    └─BatchNorm1d: 2-6                  [64, 8670]                17,340
│    └─Linear: 2-7                       [64, 20]                  173,420
│    └─ReLU: 2-8                         [64, 20]                  --
│    └─Linear: 2-9                       [64, 11]                  231
Total params: 384,821
Trainable params: 384,821
Non-trainable params: 0
Total mult-adds (Units.GIGABYTES): 2.66
Input size (MB): 0.34
Fo

In [11]:
MODELS_LIST = [model0, model1, model2]

In [12]:
def accuracy(out,truth):
    return torch.argmax(out,dim=1) == truth

In [13]:
def train(model, criterion, optimizer, epoch):
    model = model.to(DEVICE)
    model.init_xavier()
    criterion = criterion.to(DEVICE)
    batches_amount = len(loader_train)
    
    for i in range(epoch):
        time_started = time.time() * 1000
        loss_sum=0.0
        accuracy_sum=0.0
        for batch, truth in loader_train:
            batch = batch.to(DEVICE)
            truth = truth.to(DEVICE)
            
            optimizer.zero_grad()
            out = model.forward(batch)
            loss = criterion(out, truth)
            loss.backward()
            optimizer.step()
            
            accuracy_value = accuracy(out,truth).sum()/BATCH_SIZE

            loss_sum+=loss.item()
            accuracy_sum+=accuracy_value.item()

        passed_time = math.ceil(time.time() * 1000 - time_started)
        loss_average = loss_sum/batches_amount
        accuracy_average = accuracy_sum/batches_amount
        validation_accuracy = model.calc_accuracy(loader_valid)

        print(f"Epoch [{i+1}/{epoch}], train_loss: {loss_average}, train_accuracy: {accuracy_average}, validation_accuracy: {validation_accuracy}, time: {passed_time/1000}s")
        wandb.log({
            "epoch": i + 1, 
            "train_loss": loss_average, 
            "train_accuracy" : accuracy_average,
            "validation_accuracy" : validation_accuracy}
        )

In [14]:
### SWEEP CONFIG FOR TESTS
sweep_config = {
    'method': 'bayes',  # random, grid or bayes
    'name': 'sweep-chess-tactics',
    'metric': {
        'goal': 'maximize', 
        'name': 'validation_accuracy'
    },
    'parameters': {
        'epochs': {
            # 'values': [5, 10, 30]
            'values': [20]
        },
        'learning_rate': {
            # 'values': [0.003, 0.001, 0.0003]
            'values': [0.001]
        },
        'optimizer': {
            # 'values': ['sgd', 'adam']
            'values': ['adam']
        },
        'criterion': {
            'values': ['CrossEntropy']
        },
        'model': {
            # 'values': list(range(len(MODELS_LIST)))
            'values': [2]
        }
     },

}

In [ ]:
### MAIN SWEEP CONFIG
sweep_config = {
    'method': 'bayes',  # random, grid or bayes
    'name': 'sweep-chess-tactics',
    'metric': {
        'goal': 'maximize', 
        'name': 'validation_accuracy'
    },
    'parameters': {
        'epochs': {
            # 'values': [5, 10, 30]
            'values': [15, 30]
        },
        'learning_rate': {
            # 'values': [0.003, 0.001, 0.0003]
            'values': [0.0003]
        },
        'optimizer': {
            # 'values': ['sgd', 'adam']
            'values': ['adam']
        },
        'criterion': {
            'values': ['CrossEntropy']
        },
        'model': {
            'values': list(range(len(MODELS_LIST)))
        }
     },

}

In [17]:
sweep_id = 'cua8zqmh'  # aby kontynuowac trening na poprzednim sweepie

In [15]:
sweep_id = wandb.sweep(sweep_config, project="chess-tactics-swp")

Create sweep with ID: svu6njs3
Sweep URL: https://wandb.ai/matik001/chess-tactics-swp/sweeps/svu6njs3


In [None]:


def create_optimizer(model, optimizer):
    if optimizer == "sgd":
        optimizer = torch.optim.SGD
    elif optimizer == "adam":
        optimizer = torch.optim.Adam
    return optimizer

def create_criterion(criterion = "CrossEntropy"):
    if criterion == "CrossEntropy":
        return nn.CrossEntropyLoss()

def main():
    with wandb.init(config=None):
        config = wandb.config
        model = MODELS_LIST[int(config['model'])]
        opt_fn = create_optimizer(model, wandb.config.optimizer)
        crt_fn = create_criterion(wandb.config.criterion)
        train(model, crt_fn, opt_fn(model.classifier.parameters(), lr=wandb.config.learning_rate), wandb.config.epochs)
        model_path = f"model{config['model']}_epochs{config['epochs']}_learning_rate{config['learning_rate']}_optimizer{config['optimizer']}.save"
        torch.save(model, model_path)


wandb.agent(sweep_id, main)

[34m[1mwandb[0m: Agent Starting Run: 2x9gwiny with config:
[34m[1mwandb[0m: 	criterion: CrossEntropy
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	model: 1
[34m[1mwandb[0m: 	optimizer: adam


Epoch [1/20], train_loss: 0.8388898842407311, train_accuracy: 0.7245648303105784, validation_accuracy: 0.7793125, time: 210.501s
Epoch [2/20], train_loss: 0.5938463079202794, train_accuracy: 0.803744308009652, validation_accuracy: 0.80903125, time: 136.948s
Epoch [3/20], train_loss: 0.5264002105018606, train_accuracy: 0.8262228049350043, validation_accuracy: 0.823875, time: 124.339s
Epoch [4/20], train_loss: 0.48411126335052107, train_accuracy: 0.839635469370281, validation_accuracy: 0.83034375, time: 120.059s
Epoch [5/20], train_loss: 0.4558224256197618, train_accuracy: 0.8487463026387484, validation_accuracy: 0.8345, time: 116.512s
Epoch [6/20], train_loss: 0.43437240735094235, train_accuracy: 0.8563015684595625, validation_accuracy: 0.83821875, time: 118.42s
Epoch [7/20], train_loss: 0.4176567796082165, train_accuracy: 0.8613440881139566, validation_accuracy: 0.84078125, time: 117.238s
Epoch [8/20], train_loss: 0.40268090303870985, train_accuracy: 0.8661093056744765, validation_accu

VBox(children=(Label(value='0.001 MB of 0.020 MB uploaded\r'), FloatProgress(value=0.060844529750479846, max=1…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▄▅▆▆▇▇▇▇▇▇▇████████
train_loss,█▅▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
validation_accuracy,▁▄▅▆▆▆▇▇▇▇▇██▇█▇████

0,1
epoch,20.0
train_accuracy,0.89096
train_loss,0.32626
validation_accuracy,0.85497


[34m[1mwandb[0m: Agent Starting Run: ty52ldte with config:
[34m[1mwandb[0m: 	criterion: CrossEntropy
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	model: 1
[34m[1mwandb[0m: 	optimizer: adam


Epoch [1/20], train_loss: 1.109234223484, train_accuracy: 0.633360414882852, validation_accuracy: 0.6819375, time: 125.635s
Epoch [2/20], train_loss: 0.90218142644667, train_accuracy: 0.7025095839495602, validation_accuracy: 0.712625, time: 113.505s
Epoch [3/20], train_loss: 0.8445460051704105, train_accuracy: 0.7213940997898342, validation_accuracy: 0.7243125, time: 118.644s
Epoch [4/20], train_loss: 0.8124914910924698, train_accuracy: 0.7315898166887211, validation_accuracy: 0.73096875, time: 114.506s
Epoch [5/20], train_loss: 0.7918330988615248, train_accuracy: 0.7385248015100802, validation_accuracy: 0.73834375, time: 111.367s
Epoch [6/20], train_loss: 0.7765340550804896, train_accuracy: 0.7430127169767261, validation_accuracy: 0.7383125, time: 116.775s
Epoch [7/20], train_loss: 0.765621404010071, train_accuracy: 0.746492371759944, validation_accuracy: 0.74590625, time: 112.668s
Epoch [8/20], train_loss: 0.7567636793718696, train_accuracy: 0.7488032225422278, validation_accuracy: 0

In [ ]:
wandb.finish()

# Testy modelu

In [16]:
model = torch.load('model1_epochs20_learning_rate0.001_optimizeradam.save')
model.to(DEVICE)

Model(
  (classifier): Sequential(
    (0): Conv2d(21, 300, kernel_size=(5, 5), stride=(1, 1), padding=(5, 5))
    (1): ReLU()
    (2): Conv2d(300, 30, kernel_size=(2, 2), stride=(1, 1), padding=(2, 2))
    (3): ReLU()
    (4): Flatten(start_dim=1, end_dim=-1)
    (5): Dropout(p=0.5, inplace=False)
    (6): Linear(in_features=8670, out_features=20, bias=True)
    (7): ReLU()
    (8): Linear(in_features=20, out_features=11, bias=True)
  )
)

In [None]:
model.calc_accuracy(loader_train)

In [17]:
model.calc_accuracy(loader_test)

0.87346875

In [18]:
model.calc_accuracy(loader_valid)

0.87275