# LOADING DATASET

In [66]:
!pip install zstandard
!pip install chess



In [33]:
import pathlib
import urllib
import zstandard
import chess
import torch
import numpy as np
from torch import nn
import math
import time

In [2]:
def __download(url: str, name: str) -> str:
    path, _ = urllib.request.urlretrieve(url, name)
    return path


def __unpack(path: str, name: str):
    input_file = pathlib.Path(path)
    with open(input_file, 'rb') as compressed:
        decomp = zstandard.ZstdDecompressor()
        output_path = name
        with open(output_path, 'wb') as destination:
            decomp.copy_stream(compressed, destination)
            destination.close()
        compressed.close()


def __remove(path: str):
    pathlib.Path.unlink(pathlib.Path(path))

In [3]:
path = __download("https://database.lichess.org/lichess_db_puzzle.csv.zst", "lichess_db_puzzle.csv.zst")

In [4]:
__unpack(path, "lichess_db_puzzle.csv")

In [5]:
__remove("lichess_db_puzzle.csv.zst")

In [6]:
class Puzzle:
    def __init__(self, row: str):
        fields = row.split(',')
        self.fen = fields[1]
        self.moves = fields[2].split(" ")
        self.tags = fields[7].split(" ")

    def __str__(self):
        return "{fen: " + self.fen + " ,tags: [" + ", ".join(self.tags) + "],moves: [" + ",".join(self.moves) + "]}"

In [7]:
def load(k: int) -> [Puzzle]:
    f = open("lichess_db_puzzle.csv")
    f.readline()
    result = []
    for i in range(k):
        result.append(Puzzle(f.readline()))
    f.close()
    return result

In [8]:
load(10)[0].__str__()

'{fen: r6k/pp2r2p/4Rp1Q/3p4/8/1N1P2R1/PqP2bPP/7K b - - 0 24 ,tags: [crushing, hangingPiece, long, middlegame],moves: [f2g3,e6e7,b2b1,b3c1,b1c1,h6c1]}'

# FILTER DATASET

In [9]:
expected_tags = {
    'attraction',
    'discoveredAttack',
    'doubleCheck',
    'fork',
    'pin',
    'sacrifice',
    'skewer',
    'xRayAttack',
    'zugzwang',
    'deflection',
    'clearance'
}

In [10]:
expected_tags_list = list(expected_tags)

In [11]:
def filter_data(data: [Puzzle]) -> [Puzzle]:
    return list(filter(lambda p: len(set(p.tags) & expected_tags) == 1, data))

In [12]:
len(filter_data(load(100)))

37

# CONVERSION TO TENSOR

In [13]:
def bitboard_to_tensor(bitboard: int) -> torch.Tensor:
    li = [1 if digit == '1' else 0 for digit in bin(bitboard)[2:]]
    li = [0 for _ in range(64 - len(li))] + li
    return torch.tensor(li).reshape((8, 8))

In [14]:
def fen_to_tensors_list(fen: str) -> [torch.Tensor]:
    board = chess.Board(fen)
    return [
        bitboard_to_tensor(board.occupied_co[chess.WHITE]),
        bitboard_to_tensor(board.occupied_co[chess.BLACK]),
        bitboard_to_tensor(board.pawns),
        bitboard_to_tensor(board.kings),
        bitboard_to_tensor(board.queens),
        bitboard_to_tensor(board.knights),
        bitboard_to_tensor(board.bishops),
        bitboard_to_tensor(board.rooks)
    ]

In [15]:
fen_to_tensors_list(load(1)[0].fen)

[tensor([[0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0],
         [1, 0, 0, 1, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0],
         [0, 1, 0, 0, 1, 0, 1, 0],
         [1, 1, 0, 0, 0, 1, 0, 1],
         [1, 0, 0, 0, 0, 0, 0, 0]]),
 tensor([[1, 0, 0, 0, 0, 0, 0, 1],
         [1, 0, 0, 1, 0, 0, 1, 1],
         [0, 0, 1, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 1, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 1, 0, 0, 0, 1, 0],
         [0, 0, 0, 0, 0, 0, 0, 0]]),
 tensor([[0, 0, 0, 0, 0, 0, 0, 0],
         [1, 0, 0, 0, 0, 0, 1, 1],
         [0, 0, 1, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 1, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 1, 0, 0, 0],
         [1, 1, 0, 0, 0, 1, 0, 1],
         [0, 0, 0, 0, 0, 0, 0, 0]]),
 tensor([[1, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0

In [16]:
def move_to_tensor(move: str) -> torch.Tensor:
    x1 = 7 - ord(move[0]) + ord('a')
    y1 = 8 - int(move[1])
    x2 = 7 - ord(move[2]) + ord('a')
    y2 = 8 - int(move[3])
    tensor = torch.zeros(8, 8)
    tensor[y1][x1] = 1
    tensor[y2][x2] = 1
    return tensor

In [18]:
print(move_to_tensor('e2e4'))

tensor([[0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.]])


In [19]:
def puzzle_to_tensor(puzzle: Puzzle) -> torch.Tensor:
    fen_tensors = fen_to_tensors_list(puzzle.fen)
    move_tensors = [move_to_tensor(puzzle.moves[0]), move_to_tensor(puzzle.moves[1])]  # FIRST TWO MOVES
    return torch.stack(fen_tensors + move_tensors)

In [20]:
puzzle_to_tensor(load(1)[0])

tensor([[[0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [1., 0., 0., 1., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 1., 0., 0., 1., 0., 1., 0.],
         [1., 1., 0., 0., 0., 1., 0., 1.],
         [1., 0., 0., 0., 0., 0., 0., 0.]],

        [[1., 0., 0., 0., 0., 0., 0., 1.],
         [1., 0., 0., 1., 0., 0., 1., 1.],
         [0., 0., 1., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 1., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 1., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0.],
         [1., 0., 0., 0., 0., 0., 1., 1.],
         [0., 0., 1., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 1., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 1., 0., 0., 0.],
         [1., 1., 0., 0., 0., 1., 0., 1.],
       

# CONVERT AND BATCH DATASET

In [21]:
def puzzle_to_truth(puzzle: Puzzle) -> torch.Tensor:
    tensor = torch.zeros(len(expected_tags_list))
    [tag] = set(puzzle.tags) & expected_tags
    index = expected_tags_list.index(tag)
    tensor[index] = 1
    return torch.zeros(1) + index

In [22]:
puzzle_to_truth(filter_data(load(100))[0])

tensor([9.])

In [23]:
BATCH_SIZE = 64

In [24]:
def convert_dataset(puzzles: [Puzzle]) -> list[tuple[torch.Tensor, torch.Tensor]]:
    return [(puzzle_to_tensor(puzzle), puzzle_to_truth(puzzle)) for puzzle in puzzles]

In [25]:
def dataset_to_batches(dataset: list[tuple[torch.Tensor, torch.Tensor]]) -> list[tuple[torch.Tensor, torch.Tensor]]:
    batches = []
    index = 0
    while index + BATCH_SIZE <= len(dataset):
        batch = []
        truth = []
        max_index = index + BATCH_SIZE
        while index < max_index:
            batch.append(dataset[index][0])
            truth.append(dataset[index][1])
            index += 1
        batches.append((torch.stack(batch).cuda(), torch.tensor(truth).cuda().type(torch.long)))

    return batches

In [26]:
batched_dataset=dataset_to_batches(convert_dataset(filter_data(load(10000))))
print(len(batched_dataset))
print(batched_dataset[0][0].shape,batched_dataset[0][1].shape)

47
torch.Size([64, 10, 8, 8]) torch.Size([64])


# TRAIN

In [27]:
def accuracy(out,truth):
    return torch.argmax(out,dim=1) == truth

In [68]:
class Model(nn.Module):
    def __init__(self, *args, **kwargs):
        super(Model, self).__init__()
        self.classifier = nn.Sequential(*args, **kwargs)

    def forward(self, X):
        return self.classifier.forward(X)


model = Model(nn.Conv2d(10, 8 * 8, kernel_size=4, padding=4),
              nn.ReLU(),
              nn.Conv2d(8 * 8, 4 * 4, kernel_size=2, padding=4),
              nn.ReLU(),
              nn.MaxPool2d(kernel_size=4, stride=1),
              nn.Conv2d(4*4, 8 * 8, kernel_size=2, padding=4),
              nn.ReLU(),
              nn.Conv2d(8 * 8, 1, kernel_size=4, padding=4),
              nn.ReLU(),
              nn.Flatten(),
              nn.Linear(841, 256),
              nn.ReLU(),
              nn.Linear(256, 64),
              nn.ReLU(),
              nn.Linear(64, 11),
              nn.LogSoftmax(),
              )
criterion = (
    nn.NLLLoss()
)


In [57]:
size_to_load=3000000
test_batches_count=500

In [48]:
all_batches=dataset_to_batches(convert_dataset(filter_data(load(size_to_load))))
train_batches=all_batches[test_batches_count:]
test_batches=all_batches[:test_batches_count]
print(len(all_batches),len(train_batches),len(test_batches))

14131 13631 500


In [49]:
def train(model, criterion, optimizer, epoch):
    model.cuda()
    criterion.cuda()
    batches = train_batches
    size=len(batches)
    print("Dataset size:", len(batches))
    for i in range(epoch):
        time_started = time.time() * 1000
        loss_sum=0.0
        accuracy_sum=0.0
        for batch, truth in batches:
            optimizer.zero_grad()
            out = model.forward(batch)
            loss = criterion(out, truth)
            loss.backward()
            optimizer.step()
            accuracy_value = accuracy(out,truth).sum()/BATCH_SIZE
            
            loss_sum+=loss.item()
            accuracy_sum+=accuracy_value.item()
            
        passed_time = math.ceil(time.time() * 1000 - time_started)
        print(f"Epoch [{i+1}/{epoch}], loss: {loss_sum/size}, accuracy: {accuracy_sum/size}, time: {passed_time/1000}s")

In [69]:
train(model,
      criterion,
      torch.optim.SGD(model.classifier.parameters(), lr=0.01),
      50)

Dataset size: 13631
Epoch [1/50], loss: 1.78140037952345, accuracy: 0.3936053389333138, time: 20.328s
Epoch [2/50], loss: 1.5396891129304602, accuracy: 0.4606171135646688, time: 19.267s
Epoch [3/50], loss: 1.4288309997380957, accuracy: 0.5023865637150613, time: 19.271s
Epoch [4/50], loss: 1.3520844229362077, accuracy: 0.5342097058176216, time: 20.084s
Epoch [5/50], loss: 1.2969078237588352, accuracy: 0.5577658462328516, time: 22.822s
Epoch [6/50], loss: 1.2572920057737034, accuracy: 0.5740098397036167, time: 23.771s
Epoch [7/50], loss: 1.228088633337756, accuracy: 0.5854669503337979, time: 19.807s
Epoch [8/50], loss: 1.2046782599927288, accuracy: 0.5945019624385591, time: 21.229s
Epoch [9/50], loss: 1.185227063941235, accuracy: 0.6021350689604578, time: 20.709s
Epoch [10/50], loss: 1.1684491891703894, accuracy: 0.6083995121414423, time: 20.289s
Epoch [11/50], loss: 1.153696843554746, accuracy: 0.6132723662974103, time: 21.455s
Epoch [12/50], loss: 1.140628134969937, accuracy: 0.6181635

In [None]:
torch.save(model,'model.pt')

In [70]:
def test(model, criterion):
    model.cuda()
    criterion.cuda()
    batches = test_batches
    print("Dataset size:", len(batches))
    batch_index = 0

    total_loss = 0
    total_accuracy = 0
    for i in range(len(batches)):

        batch = batches[i][0]
        truth = batches[i][1]

        if batch_index == len(batches):
            batch_index = 0

        out = model.forward(batch)
        loss = criterion(out, truth)
        print(f"Batch [{i+1}/{len(batches)}], loss: {loss.item()}, accuracy: {(accuracy(out,truth).sum()/BATCH_SIZE).item()}")

        total_loss += loss.item()
        total_accuracy+=(accuracy(out,truth).sum()/BATCH_SIZE).item()

    return (total_loss / len(batches)),total_accuracy / len(batches)

In [72]:
test(model, criterion)

Dataset size: 500
Batch [1/500], loss: 0.9553394317626953, accuracy: 0.671875
Batch [2/500], loss: 1.1467819213867188, accuracy: 0.625
Batch [3/500], loss: 0.8754767775535583, accuracy: 0.703125
Batch [4/500], loss: 0.9554334282875061, accuracy: 0.671875
Batch [5/500], loss: 0.8680264949798584, accuracy: 0.6875
Batch [6/500], loss: 1.1337652206420898, accuracy: 0.609375
Batch [7/500], loss: 0.984540581703186, accuracy: 0.703125
Batch [8/500], loss: 0.9950527548789978, accuracy: 0.671875
Batch [9/500], loss: 0.9830299019813538, accuracy: 0.6875
Batch [10/500], loss: 0.9327758550643921, accuracy: 0.703125
Batch [11/500], loss: 1.5757874250411987, accuracy: 0.46875
Batch [12/500], loss: 0.8815907835960388, accuracy: 0.75
Batch [13/500], loss: 1.37360417842865, accuracy: 0.6875
Batch [14/500], loss: 1.05478835105896, accuracy: 0.6875
Batch [15/500], loss: 1.3739686012268066, accuracy: 0.53125
Batch [16/500], loss: 1.023643970489502, accuracy: 0.640625
Batch [17/500], loss: 1.00412952899932

(1.0648356592655182, 0.65103125)

In [73]:
def eye_test(model,puzzle):
    tensor=puzzle_to_tensor(puzzle).cuda()
    out=model.forward(tensor)
    return sorted(zip(expected_tags_list,out.squeeze().tolist()),key=lambda x:-x[1]),puzzle.tags

eye_test(model,filter_data(load(100))[0])

([('deflection', -1.235765814781189),
  ('discoveredAttack', -1.252184271812439),
  ('pin', -2.113356590270996),
  ('attraction', -2.3022332191467285),
  ('fork', -2.7462387084960938),
  ('sacrifice', -2.806795358657837),
  ('clearance', -3.139376163482666),
  ('skewer', -3.9855213165283203),
  ('xRayAttack', -4.787434101104736),
  ('doubleCheck', -4.956280708312988),
  ('zugzwang', -7.119828701019287)],
 ['crushing', 'endgame', 'exposedKing', 'long', 'skewer'])