In [3]:
import pandas as pd
import numpy as np
import chess

piece_map = {
    chess.PAWN: 0,
    chess.KNIGHT: 1,
    chess.BISHOP: 2,
    chess.ROOK: 3,
    chess.QUEEN: 4,
    chess.KING: 5
}

def board2tensor_1d(board):
    """
    Konwersja pozycji do 1D tensora: 12 * 8 * 8 = 768
    """
    tensor = np.zeros((12, 64), dtype=np.float32)

    for sq in chess.SQUARES:
        piece = board.piece_at(sq)
        if piece:
            base = piece_map[piece.piece_type]
            if piece.color == chess.BLACK:
                base += 6
            tensor[base, sq] = 1.0

    return tensor.flatten()  # 768-elementowy wektor

def load_and_prepare_gen(path, n=None):
    count = 0

    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue

            fen, moves_str = line.split("|")
            move_pairs = moves_str.split(";")

            board = chess.Board(fen)
            p_score = None

            for mp in move_pairs:
                if ":" not in mp:
                    continue

                uci, score = mp.split(":")
                score = float(score)

                # eliminacja matÃ³w
                if score > 4900:
                    continue

                #eliminacja beta prunning
                if p_score is None:
                    p_score = score
                elif p_score < score:
                    p_score = score

                    # ruch
                    try:
                        move = chess.Move.from_uci(uci)
                    except:
                        continue

                    board_copy = board.copy()
                    board_copy.push(move)

                    # tensor
                    x = board2tensor_1d(board_copy)

                    yield {
                        "fen_after": board_copy.fen(),
                        "X": x,
                        "y": score
                    }

                    count += 1
                    if n is not None and count >= n:
                        return

In [4]:
import random
import os

def save_batch(prefix, batch_id, X_batch, y_batch):
    np.savez_compressed(f"{prefix}_{batch_id}.npz", X=X_batch, y=y_batch)


def create_shuffled_split_files(path_input, batch_size=10000, test_size=0.2):
    train_X, train_y = [], []
    test_X, test_y = [], []
    train_id = 0
    test_id = 0

    for sample in load_and_prepare_gen(path_input):
        x = sample["X"]
        y = sample["y"]

        # losowy split
        if random.random() < test_size:
            test_X.append(x)
            test_y.append(y)

            if len(test_X) >= batch_size:
                save_batch("test", test_id, np.array(test_X), np.array(test_y))
                test_X, test_y = [], []
                test_id += 1
        else:
            train_X.append(x)
            train_y.append(y)

            if len(train_X) >= batch_size:
                save_batch("train", train_id, np.array(train_X), np.array(train_y))
                train_X, train_y = [], []
                train_id += 1

    # resztki
    if train_X:
        save_batch("train", train_id, np.array(train_X), np.array(train_y))
    if test_X:
        save_batch("test", test_id, np.array(test_X), np.array(test_y))


In [5]:
import glob
import tensorflow as tf

def npz_batch_generator(pattern, batch_size=512):
    files = sorted(glob.glob(pattern))
    X_buf, y_buf = [], []

    for fname in files:
        data = np.load(fname)
        X = data["X"]
        y = data["y"]

        for i in range(len(X)):
            X_buf.append(X[i])
            y_buf.append(y[i])

            if len(X_buf) == batch_size:
                yield np.array(X_buf, dtype=np.float32), np.array(y_buf, dtype=np.float32)
                X_buf, y_buf = [], []

def make_tf_dataset(pattern, batch_size=512):
    output_signature = (
        tf.TensorSpec(shape=(None, 768), dtype=tf.float32),
        tf.TensorSpec(shape=(None,), dtype=tf.float32)
    )

    ds = tf.data.Dataset.from_generator(
        lambda: npz_batch_generator(pattern, batch_size),
        output_signature=output_signature
    ).prefetch(tf.data.AUTOTUNE)

    return ds


2025-12-02 16:52:24.685010: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1764690744.695578   56516 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1764690744.698952   56516 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1764690744.707081   56516 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1764690744.707096   56516 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1764690744.707097   56516 computation_placer.cc:177] computation placer alr

In [6]:
create_shuffled_split_files("./trainingPositional.txt", batch_size=48000, test_size=0.2)