In [1]:
from raw_data_gather import gather
!pip install zstandard
!pip install stockfish
!pip install chess




[notice] A new release of pip is available: 23.2.1 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 23.2.1 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 23.2.1 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import chess
import concurrent
import csv
from stockfish import Stockfish
from concurrent.futures import ThreadPoolExecutor

Download data

In [3]:
gather("https://database.lichess.org","lichess_db_puzzle.csv.zst","lichess_db_puzzle.csv")

Load puzzles

In [5]:
class Puzzle:
    def __init__(self, row: str):
        fields = row.split(',')
        self.fen = fields[1]
        self.moves = fields[2].split(" ")
        self.tags = fields[7].split(" ")

    def __str__(self):
        return "{fen: " + self.fen + " ,tags: [" + ", ".join(self.tags) + "],moves: [" + ",".join(self.moves) + "]}"

In [6]:
def load(k: int) -> [Puzzle]:
    f = open("lichess_db_puzzle.csv")
    f.readline()
    result = []
    for i in range(k):
        result.append(Puzzle(f.readline()))
    f.close()
    return result

Evaluate positions

In [7]:
def generate_positions_for_puzzle(puzzle: Puzzle) -> [(str, [str])]:
    return [(puzzle.fen, puzzle.moves[:i]) for i in range(len(puzzle.moves) + 1)]

In [8]:
def generate_fen_for_position(position: (str, [str])) -> str:
    board = chess.Board(position[0])
    for move in position[1]:
        board.push_uci(move)
    if board.is_game_over():
        return 'FINISHED'
    return board.fen()

In [9]:
def puzzles_to_fens(puzzles: [Puzzle]) -> [str]:
    return [generate_fen_for_position(position)
            for puzzle in puzzles
            for position in generate_positions_for_puzzle(puzzle)]

In [10]:
def filter_finished_fens(fens: [str]) -> [str]:
    return [f for f in fens if f != 'FINISHED']

In [11]:
def evaluate_fen(fen: str, stockfish_path: str, ) -> str:
    stockfish = Stockfish(stockfish_path)
    stockfish.set_fen_position(fen)
    return stockfish.get_evaluation()

In [12]:
def evaluate_fens(fens: [str], stockfish_path: str) -> [(str, dict)]:
    with concurrent.futures.ThreadPoolExecutor(10) as executor:
        futures = [(fen, executor.submit(evaluate_fen, fen, stockfish_path)) for fen in fens]
    return [(f, e.result()) for f, e in futures]

Generate dataset with evaluated fens

In [13]:
MATE_VALUE = 10 ** 7


def map_evaluation_to_value(evaluation) -> float | str:
    match evaluation["type"]:
        case "cp":
            return evaluation["value"]
        case "mate":
            return "MATE"

In [14]:
def generate_dataset(size, stockfish_path) -> [(str, float)]:
    return [(f, map_evaluation_to_value(e)) for f, e in
            evaluate_fens(filter_finished_fens(puzzles_to_fens(load(size))), stockfish_path) if e != "MATE"]

In [15]:
def save_dataset_to_csv(dataset, filename):
    with open(filename, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerows(dataset)

In [16]:
SIZE = 10 ** 5
STOCKFISH_PATH = "D:\\Programy\\stockfish\\stockfish-windows-x86-64-avx2.exe"
dataset = generate_dataset(SIZE, STOCKFISH_PATH)
print("Dataset size: ", len(dataset))

KeyboardInterrupt: 

In [None]:
save_dataset_to_csv(dataset, "puzzle_dataset.csv")