# LOADING DATASET

In [1]:
!pip install zstandard
!pip install chess

Collecting zstandard
  Downloading zstandard-0.22.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.4/5.4 MB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: zstandard
Successfully installed zstandard-0.22.0
Collecting chess
  Downloading chess-1.10.0-py3-none-any.whl (154 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: chess
Successfully installed chess-1.10.0


In [2]:
import pathlib
import urllib
import zstandard
import chess
import torch
import numpy as np
from torch import nn
import math
import time

In [41]:
use_wandb = False

In [42]:
if use_wandb:
    !pip install wandb
    import wandb
    wandb.login()

Collecting wandb
  Downloading wandb-0.17.1-py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.8/6.8 MB[0m [31m19.8 MB/s[0m eta [36m0:00:00[0m
Collecting docker-pycreds>=0.4.0 (from wandb)
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting gitpython!=3.1.29,>=1.0.0 (from wandb)
  Downloading GitPython-3.1.43-py3-none-any.whl (207 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m27.8 MB/s[0m eta [36m0:00:00[0m
Collecting sentry-sdk>=1.0.0 (from wandb)
  Downloading sentry_sdk-2.5.1-py2.py3-none-any.whl (289 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m289.6/289.6 kB[0m [31m28.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting setproctitle (from wandb)
  Downloading setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [5]:
def __download(url: str, name: str) -> str:
    path, _ = urllib.request.urlretrieve(url, name)
    return path


def __unpack(path: str, name: str):
    input_file = pathlib.Path(path)
    with open(input_file, 'rb') as compressed:
        decomp = zstandard.ZstdDecompressor()
        output_path = name
        with open(output_path, 'wb') as destination:
            decomp.copy_stream(compressed, destination)
            destination.close()
        compressed.close()


def __remove(path: str):
    pathlib.Path.unlink(pathlib.Path(path))

In [6]:
path = __download("https://database.lichess.org/lichess_db_puzzle.csv.zst", "lichess_db_puzzle.csv.zst")

In [7]:
__unpack(path, "lichess_db_puzzle.csv")

In [8]:
__remove("lichess_db_puzzle.csv.zst")

In [9]:
class Puzzle:
    def __init__(self, row: str):
        fields = row.split(',')
        self.fen = fields[1]
        self.moves = fields[2].split(" ")
        self.tags = fields[7].split(" ")

    def __str__(self):
        return "{fen: " + self.fen + " ,tags: [" + ", ".join(self.tags) + "],moves: [" + ",".join(self.moves) + "]}"

In [10]:
def load(k: int) -> [Puzzle]:
    f = open("lichess_db_puzzle.csv")
    f.readline()
    result = []
    for i in range(k):
        result.append(Puzzle(f.readline()))
    f.close()
    return result

In [11]:
load(10)[0].__str__()

'{fen: r6k/pp2r2p/4Rp1Q/3p4/8/1N1P2R1/PqP2bPP/7K b - - 0 24 ,tags: [crushing, hangingPiece, long, middlegame],moves: [f2g3,e6e7,b2b1,b3c1,b1c1,h6c1]}'

# FILTER DATASET

In [12]:
expected_tags = {
    'attraction',
    'discoveredAttack',
    'doubleCheck',
    'fork',
    'pin',
    'sacrifice',
    'skewer',
    'xRayAttack',
    'zugzwang',
    'deflection',
    'clearance'
}

In [13]:
expected_tags_list = list(expected_tags)

In [14]:
def filter_data(data: [Puzzle]) -> [Puzzle]:
    return list(filter(lambda p: len(set(p.tags) & expected_tags) == 1, data))

In [15]:
len(filter_data(load(100)))

37

# CONVERSION TO TENSOR

In [16]:
def bitboard_to_tensor(bitboard: int) -> torch.Tensor:
    li = [1 if digit == '1' else 0 for digit in bin(bitboard)[2:]]
    li = [0 for _ in range(64 - len(li))] + li
    return torch.tensor(li).reshape((8, 8))

In [17]:
def fen_to_tensors_list(fen: str) -> [torch.Tensor]:
    board = chess.Board(fen)
    return [
        bitboard_to_tensor(board.occupied_co[chess.WHITE]),
        bitboard_to_tensor(board.occupied_co[chess.BLACK]),
        bitboard_to_tensor(board.pawns),
        bitboard_to_tensor(board.kings),
        bitboard_to_tensor(board.queens),
        bitboard_to_tensor(board.knights),
        bitboard_to_tensor(board.bishops),
        bitboard_to_tensor(board.rooks)
    ]

In [18]:
fen_to_tensors_list(load(1)[0].fen)

[tensor([[0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0],
         [1, 0, 0, 1, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0],
         [0, 1, 0, 0, 1, 0, 1, 0],
         [1, 1, 0, 0, 0, 1, 0, 1],
         [1, 0, 0, 0, 0, 0, 0, 0]]),
 tensor([[1, 0, 0, 0, 0, 0, 0, 1],
         [1, 0, 0, 1, 0, 0, 1, 1],
         [0, 0, 1, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 1, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 1, 0, 0, 0, 1, 0],
         [0, 0, 0, 0, 0, 0, 0, 0]]),
 tensor([[0, 0, 0, 0, 0, 0, 0, 0],
         [1, 0, 0, 0, 0, 0, 1, 1],
         [0, 0, 1, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 1, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 1, 0, 0, 0],
         [1, 1, 0, 0, 0, 1, 0, 1],
         [0, 0, 0, 0, 0, 0, 0, 0]]),
 tensor([[1, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0

In [19]:
def move_to_tensor(move: str) -> torch.Tensor:
    x1 = 7 - ord(move[0]) + ord('a')
    y1 = 8 - int(move[1])
    x2 = 7 - ord(move[2]) + ord('a')
    y2 = 8 - int(move[3])
    tensor = torch.zeros(8, 8)
    tensor[y1][x1] = 1
    tensor[y2][x2] = 1
    return tensor

In [20]:
print(move_to_tensor('e2e4'))

tensor([[0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.]])


In [21]:
def puzzle_to_tensor(puzzle: Puzzle) -> torch.Tensor:
    fen_tensors = fen_to_tensors_list(puzzle.fen)
    move_tensors = [move_to_tensor(puzzle.moves[0]), move_to_tensor(puzzle.moves[1])]  # FIRST TWO MOVES
    return torch.stack(fen_tensors + move_tensors)

In [22]:
puzzle_to_tensor(load(1)[0])

tensor([[[0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [1., 0., 0., 1., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 1., 0., 0., 1., 0., 1., 0.],
         [1., 1., 0., 0., 0., 1., 0., 1.],
         [1., 0., 0., 0., 0., 0., 0., 0.]],

        [[1., 0., 0., 0., 0., 0., 0., 1.],
         [1., 0., 0., 1., 0., 0., 1., 1.],
         [0., 0., 1., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 1., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 1., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0.],
         [1., 0., 0., 0., 0., 0., 1., 1.],
         [0., 0., 1., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 1., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 1., 0., 0., 0.],
         [1., 1., 0., 0., 0., 1., 0., 1.],
       

# CONVERT AND BATCH DATASET

In [23]:
def puzzle_to_truth(puzzle: Puzzle) -> torch.Tensor:
    tensor = torch.zeros(len(expected_tags_list))
    [tag] = set(puzzle.tags) & expected_tags
    index = expected_tags_list.index(tag)
    tensor[index] = 1
    return torch.zeros(1) + index

In [24]:
puzzle_to_truth(filter_data(load(100))[0])

tensor([0.])

In [25]:
BATCH_SIZE = 64

In [26]:
def convert_dataset(puzzles: [Puzzle]) -> list[tuple[torch.Tensor, torch.Tensor]]:
    return [(puzzle_to_tensor(puzzle), puzzle_to_truth(puzzle)) for puzzle in puzzles]

In [27]:
def dataset_to_batches(dataset: list[tuple[torch.Tensor, torch.Tensor]]) -> list[tuple[torch.Tensor, torch.Tensor]]:
    batches = []
    index = 0
    while index + BATCH_SIZE <= len(dataset):
        batch = []
        truth = []
        max_index = index + BATCH_SIZE
        while index < max_index:
            batch.append(dataset[index][0])
            truth.append(dataset[index][1])
            index += 1
        batches.append((torch.stack(batch).cuda(), torch.tensor(truth).cuda().type(torch.long)))

    return batches

In [28]:
batched_dataset=dataset_to_batches(convert_dataset(filter_data(load(10000))))
print(len(batched_dataset))
print(batched_dataset[0][0].shape,batched_dataset[0][1].shape)

47
torch.Size([64, 10, 8, 8]) torch.Size([64])


# TRAIN

In [29]:
def accuracy(out,truth):
    return torch.argmax(out,dim=1) == truth

In [30]:
class Model(nn.Module):
    def __init__(self, *args, **kwargs):
        super(Model, self).__init__()
        self.classifier = nn.Sequential(*args, **kwargs)

    def forward(self, X):
        return self.classifier.forward(X)


model = Model(nn.Conv2d(10, 8 * 8, kernel_size=4, padding=4),
              nn.ReLU(),
              nn.Conv2d(8 * 8, 4 * 4, kernel_size=2, padding=4),
              nn.ReLU(),
              nn.MaxPool2d(kernel_size=4, stride=1),
              nn.Conv2d(4*4, 8 * 8, kernel_size=2, padding=4),
              nn.ReLU(),
              nn.Conv2d(8 * 8, 1, kernel_size=4, padding=4),
              nn.ReLU(),
              nn.Flatten(),
              nn.Linear(841, 256),
              nn.ReLU(),
              nn.Linear(256, 64),
              nn.ReLU(),
              nn.Linear(64, 11),
              nn.LogSoftmax(),
              )
criterion = (
    nn.NLLLoss()
)


In [31]:
size_to_load=3000000
test_batches_count=500

In [32]:
all_batches=dataset_to_batches(convert_dataset(filter_data(load(size_to_load))))
train_batches=all_batches[test_batches_count:]
test_batches=all_batches[:test_batches_count]
print(len(all_batches),len(train_batches),len(test_batches))

14131 13631 500


In [49]:
def train(model, criterion, optimizer, epoch):
    model.cuda()
    criterion.cuda()
    batches = train_batches
    size=len(batches)
    print("Dataset size:", len(batches))
    for i in range(epoch):
        time_started = time.time() * 1000
        loss_sum=0.0
        accuracy_sum=0.0
        for batch, truth in batches:
            optimizer.zero_grad()
            out = model.forward(batch)
            loss = criterion(out, truth)
            loss.backward()
            optimizer.step()
            accuracy_value = accuracy(out,truth).sum()/BATCH_SIZE

            loss_sum+=loss.item()
            accuracy_sum+=accuracy_value.item()

        passed_time = math.ceil(time.time() * 1000 - time_started)
        loss_average = loss_sum/size
        accuracy_average = accuracy_sum/size
        print(f"Epoch [{i+1}/{epoch}], train_loss: {loss_average}, train_accuracy: {accuracy_average}, time: {passed_time/1000}s")
        if use_wandb:
            wandb.log({"epoch": i + 1, "train_loss": loss_average, "train_accuracy" : accuracy_average})

In [34]:
train(model,
      criterion,
      torch.optim.SGD(model.classifier.parameters(), lr=0.01),
      1)

Dataset size: 13631


  return F.conv2d(input, weight, bias, self.stride,
  return self._call_impl(*args, **kwargs)
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


Epoch [1/1], train_loss: 1.7287859148573748, train_accuracy: 0.4058327525493361, time: 62.301s


In [35]:
torch.save(model,'model.pt')

In [36]:
def test(model, criterion):
    model.cuda()
    criterion.cuda()
    batches = test_batches
    print("Dataset size:", len(batches))
    batch_index = 0

    total_loss = 0
    total_accuracy = 0
    for i in range(len(batches)):

        batch = batches[i][0]
        truth = batches[i][1]

        if batch_index == len(batches):
            batch_index = 0

        out = model.forward(batch)
        loss = criterion(out, truth)
        print(f"Batch [{i+1}/{len(batches)}], test_loss: {loss.item()}, test_accuracy: {(accuracy(out,truth).sum()/BATCH_SIZE).item()}")

        total_loss += loss.item()
        total_accuracy+=(accuracy(out,truth).sum()/BATCH_SIZE).item()

    return (total_loss / len(batches)),total_accuracy / len(batches)

In [37]:
test(model, criterion)

Dataset size: 500
Batch [1/500], test_loss: 1.4913530349731445, test_accuracy: 0.46875
Batch [2/500], test_loss: 1.5333746671676636, test_accuracy: 0.4375
Batch [3/500], test_loss: 1.5793960094451904, test_accuracy: 0.46875
Batch [4/500], test_loss: 1.4574456214904785, test_accuracy: 0.515625
Batch [5/500], test_loss: 1.5792930126190186, test_accuracy: 0.4375
Batch [6/500], test_loss: 1.6646337509155273, test_accuracy: 0.46875
Batch [7/500], test_loss: 1.4459553956985474, test_accuracy: 0.421875
Batch [8/500], test_loss: 1.4522058963775635, test_accuracy: 0.515625
Batch [9/500], test_loss: 1.64620041847229, test_accuracy: 0.4375
Batch [10/500], test_loss: 1.4456701278686523, test_accuracy: 0.5625
Batch [11/500], test_loss: 1.8471453189849854, test_accuracy: 0.40625
Batch [12/500], test_loss: 1.373580813407898, test_accuracy: 0.46875
Batch [13/500], test_loss: 1.812636375427246, test_accuracy: 0.359375
Batch [14/500], test_loss: 1.3551278114318848, test_accuracy: 0.46875
Batch [15/500],

(1.5775572588443756, 0.43834375)

In [38]:
def eye_test(model,puzzle):
    tensor=puzzle_to_tensor(puzzle).cuda()
    out=model.forward(tensor)
    return sorted(zip(expected_tags_list,out.squeeze().tolist()),key=lambda x:-x[1]),puzzle.tags

eye_test(model,filter_data(load(100))[0])

([('fork', -1.4304933547973633),
  ('deflection', -1.6195344924926758),
  ('skewer', -1.8068654537200928),
  ('pin', -2.0664966106414795),
  ('discoveredAttack', -2.3324050903320312),
  ('sacrifice', -2.383424758911133),
  ('attraction', -3.242687940597534),
  ('clearance', -3.7647244930267334),
  ('zugzwang', -4.711508750915527),
  ('xRayAttack', -4.775970458984375),
  ('doubleCheck', -5.785516738891602)],
 ['crushing', 'endgame', 'exposedKing', 'long', 'skewer'])

In [None]:
sweep_config = {
    'method': 'random',  # random, grid or bayes
    'name': 'sweep-chess-tactics',
    'metric': {'goal': 'minimize', 'name': 'train_loss'},
    'parameters':
    {
        # 'batch_size': {'values': [64, 128, 256]},
        'epochs': {'values': [5, 10, 15]},
        # 'learning_rate': {'values': [0.1, 0.01, 0.001]},
        'learning_rate': {'values': [0.001]},
        # 'learning_rate': {'max': 0.1, 'min': 0.0001},
        'optimizer': {
            'values': ['sgd', 'adam']
        },
        'criterion': {
            'values': ['NLLLoss']
        },
     },

}

def create_optimizer(model, optimizer):
    if optimizer == "sgd":
        optimizer = torch.optim.SGD
    elif optimizer == "adam":
        optimizer = torch.optim.Adam
    return optimizer

def create_criterion(criterion = "NLLLoss"):
    if criterion == "NLLLoss":
        return nn.NLLLoss()

def main(config=None):
    with wandb.init(config=config):
        opt_fn = create_optimizer(model, wandb.config.optimizer)
        crt_fn = create_criterion(wandb.config.criterion)
        train(model, crt_fn, opt_fn(model.classifier.parameters(), lr=wandb.config.learning_rate), wandb.config.epochs)
        # train(model, criterion, opt_fn(model.classifier.parameters(), lr=wandb.config.learning_rate), wandb.config.epochs)


if use_wandb:
    sweep_id = wandb.sweep(sweep_config, project="chess-tactics-swp")
    wandb.agent(sweep_id, main, count=2)

Create sweep with ID: u8y52lv7
Sweep URL: https://wandb.ai/wsniady-org/chess-tactics-swp/sweeps/u8y52lv7


[34m[1mwandb[0m: Agent Starting Run: q5lhy4q3 with config:
[34m[1mwandb[0m: 	criterion: NLLLoss
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: sgd


Dataset size: 13631
Epoch [1/10], train_loss: 1.187486183922321, train_accuracy: 0.5998081120240628, time: 59.478s
Epoch [2/10], train_loss: 1.1814785171618563, train_accuracy: 0.6022267716968674, time: 60.54s
Epoch [3/10], train_loss: 1.1771843292315947, train_accuracy: 0.6039278574572665, time: 59.747s


In [40]:
if use_wandb:
    wandb.finish()