In [1]:
import wandb
import pathlib
import urllib
import zstandard
import chess
import torch
import numpy as np
from torch import nn
import math
import time

from torch.nn import init
from torch.utils.data import DataLoader
from torchinfo import summary

In [2]:
BATCH_SIZE = 64
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("DEVICE: " + str(DEVICE))

DEVICE: cuda


In [3]:
wandb.login()

wandb: Currently logged in as: spajro (spajro-chess). Use `wandb login --relogin` to force relogin


True

In [4]:
dataset_train = torch.load('dataset_train.save')
dataset_test = torch.load('dataset_test.save')
dataset_valid = torch.load('dataset_valid.save')

loader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True)
loader_test = DataLoader(dataset_test, batch_size=BATCH_SIZE, shuffle=True)
loader_valid = DataLoader(dataset_valid, batch_size=BATCH_SIZE, shuffle=True)

print("Batches train", len(loader_train))
print("Batches valid", len(loader_valid))
print("Batches test", len(loader_test))

Batches train 12847
Batches valid 500
Batches test 500


In [5]:
DATA_SHAPE = (BATCH_SIZE,) +  tuple(dataset_train[0][0].shape)

In [6]:
class Model(nn.Module):
    def __init__(self, *args, **kwargs):
        super(Model, self).__init__()
        self.classifier = nn.Sequential(*args, **kwargs)


    def init_xavier(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                init.xavier_normal_(m.weight)
    
    def calc_accuracy(self, loader):
        correct = 0
        total = 0
        self.eval()
        with torch.no_grad():
            for batch in loader:
                features, labels = batch
                labels = labels.to(DEVICE) 
                features = features.to(DEVICE)
                outputs = self(features)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0) 
                correct += (predicted == labels).sum().item() 
        self.train()
        return correct / total
    
    def forward(self, X):
        return self.classifier.forward(X)




In [7]:
def create_model0():
    return Model(nn.Conv2d(21, 8 * 8, kernel_size=4, padding=4),
                  nn.ReLU(),
                  nn.Conv2d(8 * 8, 4 * 4, kernel_size=2, padding=4),
                  nn.ReLU(),
                  nn.MaxPool2d(kernel_size=4, stride=1),
                  nn.Conv2d(4*4, 8 * 8, kernel_size=2, padding=4),
                  nn.ReLU(),
                  nn.Conv2d(8 * 8, 1, kernel_size=4, padding=4),
                  nn.ReLU(),
                  nn.Flatten(),
                  nn.Linear(841, 256),
                  nn.ReLU(),
                  nn.Linear(256, 64),
                  nn.ReLU(),
                  nn.Linear(64, 11)
                  )

model0 = create_model0()
architecture_summary = summary(model0, DATA_SHAPE)
print(architecture_summary)

  return F.conv2d(input, weight, bias, self.stride,


Layer (type:depth-idx)                   Output Shape              Param #
Model                                    [64, 11]                  --
├─Sequential: 1-1                        --                        --
│    └─Conv2d: 2-1                       [64, 64, 13, 13]          21,568
│    └─ReLU: 2-2                         [64, 64, 13, 13]          --
│    └─Conv2d: 2-3                       [64, 16, 20, 20]          4,112
│    └─ReLU: 2-4                         [64, 16, 20, 20]          --
│    └─MaxPool2d: 2-5                    [64, 16, 17, 17]          --
│    └─Conv2d: 2-6                       [64, 64, 24, 24]          4,160
│    └─ReLU: 2-7                         [64, 64, 24, 24]          --
│    └─Conv2d: 2-8                       [64, 1, 29, 29]           1,025
│    └─ReLU: 2-9                         [64, 1, 29, 29]           --
│    └─Flatten: 2-10                     [64, 841]                 --
│    └─Linear: 2-11                      [64, 256]                 215,5

In [8]:
def create_model1():
    return Model(nn.Conv2d(21, 300, kernel_size=5, padding=5),
                  nn.ReLU(),
                  nn.Conv2d(300, 30, kernel_size=2, padding=2),
                  nn.ReLU(),
                  nn.Flatten(),
                  nn.Dropout(0.5), 
                  nn.Linear(8670, 20),
                  nn.ReLU(),
                  nn.Linear(20, 11)
                  )
model1 = create_model1()
architecture_summary = summary(model1, DATA_SHAPE)
print(architecture_summary)

Layer (type:depth-idx)                   Output Shape              Param #
Model                                    [64, 11]                  --
├─Sequential: 1-1                        --                        --
│    └─Conv2d: 2-1                       [64, 300, 14, 14]         157,800
│    └─ReLU: 2-2                         [64, 300, 14, 14]         --
│    └─Conv2d: 2-3                       [64, 30, 17, 17]          36,030
│    └─ReLU: 2-4                         [64, 30, 17, 17]          --
│    └─Flatten: 2-5                      [64, 8670]                --
│    └─Dropout: 2-6                      [64, 8670]                --
│    └─Linear: 2-7                       [64, 20]                  173,420
│    └─ReLU: 2-8                         [64, 20]                  --
│    └─Linear: 2-9                       [64, 11]                  231
Total params: 367,481
Trainable params: 367,481
Non-trainable params: 0
Total mult-adds (Units.GIGABYTES): 2.66
Input size (MB): 0.34
Forwar

In [9]:
def create_model2():
    return Model(nn.Conv2d(21, 300, kernel_size=5, padding=5),
                  nn.ReLU(),
                  nn.BatchNorm2d(300),
                  nn.Conv2d(300, 30, kernel_size=2, padding=2),
                  nn.ReLU(),
                  nn.BatchNorm2d(30),
                  nn.Conv2d(30, 30, kernel_size=2, padding=2),
                  nn.ReLU(),
                  nn.Flatten(),
                  nn.Dropout(0.5), 
                  nn.Linear(12000, 30),
                  nn.ReLU(),
                  nn.Linear(30, 11)
                  )
model2 = create_model2()
architecture_summary = summary(model2, DATA_SHAPE)
print(architecture_summary)

Layer (type:depth-idx)                   Output Shape              Param #
Model                                    [64, 11]                  --
├─Sequential: 1-1                        --                        --
│    └─Conv2d: 2-1                       [64, 300, 14, 14]         157,800
│    └─ReLU: 2-2                         [64, 300, 14, 14]         --
│    └─BatchNorm2d: 2-3                  [64, 300, 14, 14]         600
│    └─Conv2d: 2-4                       [64, 30, 17, 17]          36,030
│    └─ReLU: 2-5                         [64, 30, 17, 17]          --
│    └─BatchNorm2d: 2-6                  [64, 30, 17, 17]          60
│    └─Conv2d: 2-7                       [64, 30, 20, 20]          3,630
│    └─ReLU: 2-8                         [64, 30, 20, 20]          --
│    └─Flatten: 2-9                      [64, 12000]               --
│    └─Dropout: 2-10                     [64, 12000]               --
│    └─Linear: 2-11                      [64, 30]                  360,0

In [10]:
CREATE_MODELS_LIST = [create_model0, create_model1, create_model2]
def create_model(model_number:int):
    return CREATE_MODELS_LIST[model_number]()

In [11]:
def accuracy(out,truth):
    return torch.argmax(out,dim=1) == truth
def train(model, criterion, optimizer, epoch):
    model = model.to(DEVICE)
    model.init_xavier()
    criterion = criterion.to(DEVICE)
    batches_amount = len(loader_train)
    
    for i in range(epoch):
        time_started = time.time() * 1000
        loss_sum=0.0
        accuracy_sum=0.0
        for batch, truth in loader_train:
            batch = batch.to(DEVICE)
            truth = truth.to(DEVICE)
            
            optimizer.zero_grad()
            out = model.forward(batch)
            loss = criterion(out, truth)
            loss.backward()
            optimizer.step()
            
            accuracy_value = accuracy(out,truth).sum()/BATCH_SIZE

            loss_sum+=loss.item()
            accuracy_sum+=accuracy_value.item()

        passed_time = math.ceil(time.time() * 1000 - time_started)
        loss_average = loss_sum/batches_amount
        accuracy_average = accuracy_sum/batches_amount
        validation_accuracy = model.calc_accuracy(loader_valid)

        print(f"Epoch [{i+1}/{epoch}], train_loss: {loss_average}, train_accuracy: {accuracy_average}, validation_accuracy: {validation_accuracy}, time: {passed_time/1000}s")
        wandb.log({
            "epoch": i + 1, 
            "train_loss": loss_average, 
            "train_accuracy" : accuracy_average,
            "validation_accuracy" : validation_accuracy}
        )

In [12]:
### SWEEP CONFIG FOR TESTS
sweep_config = {
    'method': 'bayes',  # random, grid or bayes
    'name': 'sweep-chess-tactics',
    'metric': {
        'goal': 'maximize', 
        'name': 'validation_accuracy'
    },
    'parameters': {
        'epochs': {
            # 'values': [5, 10, 30]
            'values': [20]
        },
        'learning_rate': {
            # 'values': [0.003, 0.001, 0.0003]
            'values': [0.001]
        },
        'optimizer': {
            # 'values': ['sgd', 'adam']
            'values': ['adam']
        },
        'criterion': {
            'values': ['CrossEntropy']
        },
        'model': {
            # 'values': list(range(len(MODELS_LIST)))
            'values': [2]
        }
     },

}

In [13]:
### MAIN SWEEP CONFIG
sweep_config = {
    'method': 'bayes',  # random, grid or bayes
    'name': 'sweep-chess-tactics',
    'metric': {
        'goal': 'maximize', 
        'name': 'validation_accuracy'
    },
    'parameters': {
        'epochs': {
            # 'values': [5, 10, 30]
            'values': [20]
        },
        'learning_rate': {
            # 'values': [0.003, 0.001, 0.0003]
            'values': [0.0003, 0.001, 0.005], 
        },
        'optimizer': {
            'values': ['adam']
        },
        'criterion': {
            'values': ['CrossEntropy']
        },
        'model': {
            'values': list(range(len(CREATE_MODELS_LIST)))
        }
     },

}

In [14]:
sweep_id = 'cua8zqmh'  # aby kontynuowac trening na poprzednim sweepie

In [15]:
sweep_id = wandb.sweep(sweep_config, project="chess-tactics-swp")

Create sweep with ID: pu99pny3
Sweep URL: https://wandb.ai/spajro-chess/chess-tactics-swp/sweeps/pu99pny3


In [16]:


def create_optimizer(model, optimizer):
    if optimizer == "sgd":
        optimizer = torch.optim.SGD
    elif optimizer == "adam":
        optimizer = torch.optim.Adam
    return optimizer

def create_criterion(criterion = "CrossEntropy"):
    if criterion == "CrossEntropy":
        return nn.CrossEntropyLoss()

def main():
    with wandb.init(config=None) as run:
        config = wandb.config
        run.name = f"model{config['model']}_epochs{config['epochs']}_learning_rate{config['learning_rate']}_optimizer{config['optimizer']}"
        
        model = create_model(int(config['model']))
        opt_fn = create_optimizer(model, wandb.config.optimizer)
        crt_fn = create_criterion(wandb.config.criterion)
        train(model, crt_fn, opt_fn(model.classifier.parameters(), lr=wandb.config.learning_rate), wandb.config.epochs)
        model_path = f"{run.name}.save"
        torch.save(model, model_path)


wandb.agent(sweep_id, main)

wandb: Agent Starting Run: d84bdh97 with config:
wandb: 	criterion: CrossEntropy
wandb: 	epochs: 20
wandb: 	learning_rate: 0.01
wandb: 	model: 1
wandb: 	optimizer: adam


Epoch [1/20], train_loss: 1.3820604978901612, train_accuracy: 0.5332421966217794, validation_accuracy: 0.62040625, time: 22.26s
Epoch [2/20], train_loss: 1.1571842547825935, train_accuracy: 0.6216845372460497, validation_accuracy: 0.6695625, time: 22.149s
Epoch [3/20], train_loss: 1.098286242594355, train_accuracy: 0.645558058301549, validation_accuracy: 0.67528125, time: 23.045s
Epoch [4/20], train_loss: 1.0675043681488712, train_accuracy: 0.6588040982330505, validation_accuracy: 0.677625, time: 24.164s
Epoch [5/20], train_loss: 1.0493856784020954, train_accuracy: 0.6656271892270569, validation_accuracy: 0.67659375, time: 24.337s
Epoch [6/20], train_loss: 1.0434747907465232, train_accuracy: 0.6738343582159259, validation_accuracy: 0.69303125, time: 22.817s
Epoch [7/20], train_loss: 1.0416917369515457, train_accuracy: 0.6830193819568771, validation_accuracy: 0.7051875, time: 24.535s
Epoch [8/20], train_loss: 1.0357500949406648, train_accuracy: 0.6875498657274072, validation_accuracy: 0

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▅▆▆▆▇▇▇▇█████████▇█
train_loss,█▄▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▂▂▂
validation_accuracy,▁▄▅▅▅▆▇▇█▄▇▇█▇▆▇▄▆▆▇

0,1
epoch,20.0
train_accuracy,0.6969
train_loss,1.04787
validation_accuracy,0.7195


wandb: Sweep Agent: Waiting for job.
wandb: Job received.
wandb: Agent Starting Run: 2evd7mn1 with config:
wandb: 	criterion: CrossEntropy
wandb: 	epochs: 20
wandb: 	learning_rate: 0.001
wandb: 	model: 1
wandb: 	optimizer: adam


Epoch [1/20], train_loss: 0.8503898715923257, train_accuracy: 0.7189482466723749, validation_accuracy: 0.795625, time: 25.017s
Epoch [2/20], train_loss: 0.6144350974136145, train_accuracy: 0.7976777165875302, validation_accuracy: 0.8233125, time: 24.415s
Epoch [3/20], train_loss: 0.5455990709863748, train_accuracy: 0.820027292363976, validation_accuracy: 0.829125, time: 24.159s
Epoch [4/20], train_loss: 0.5019936093032149, train_accuracy: 0.8339982096987624, validation_accuracy: 0.84259375, time: 24.102s
Epoch [5/20], train_loss: 0.47160440028140616, train_accuracy: 0.8437110804078773, validation_accuracy: 0.845625, time: 24.383s
Epoch [6/20], train_loss: 0.4494340208042194, train_accuracy: 0.8510024227446097, validation_accuracy: 0.85484375, time: 23.808s
Epoch [7/20], train_loss: 0.43129222617892987, train_accuracy: 0.8564049486261384, validation_accuracy: 0.8571875, time: 22.76s
Epoch [8/20], train_loss: 0.41672464137625914, train_accuracy: 0.8615021989569549, validation_accuracy: 0

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▄▅▆▆▇▇▇▇▇▇▇████████
train_loss,█▅▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁
validation_accuracy,▁▄▄▆▆▇▇▇▇▇▇▇██▇▇▇███

0,1
epoch,20.0
train_accuracy,0.88658
train_loss,0.33944
validation_accuracy,0.86794


wandb: Ctrl + C detected. Stopping sweep.


In [17]:
wandb.finish()

# Testy modelu

In [18]:
model = torch.load('model1_epochs20_learning_rate0.001_optimizeradam.save')
model.to(DEVICE)

Model(
  (classifier): Sequential(
    (0): Conv2d(21, 300, kernel_size=(5, 5), stride=(1, 1), padding=(5, 5))
    (1): ReLU()
    (2): Conv2d(300, 30, kernel_size=(2, 2), stride=(1, 1), padding=(2, 2))
    (3): ReLU()
    (4): Flatten(start_dim=1, end_dim=-1)
    (5): Dropout(p=0.5, inplace=False)
    (6): Linear(in_features=8670, out_features=20, bias=True)
    (7): ReLU()
    (8): Linear(in_features=20, out_features=11, bias=True)
  )
)

In [19]:
model.calc_accuracy(loader_train)

0.9204691137583836

In [20]:
model.calc_accuracy(loader_test)

0.87028125

In [21]:
model.calc_accuracy(loader_valid)

0.8679375