In [None]:
import os
import gzip
import yaml
import multiprocessing


def position_to_coord(position, board_size=19):
    return position % board_size, position // board_size


def position_to_move_string(positions):
    move_str = []
    for i, pos in enumerate(positions):
        x, y = position_to_coord(pos)
        stone = f"{chr(x + ord('a'))}{y + 1}"
        move_str.append(f"{'B' if i % 2 == 0 else 'W'}[{stone}]")
    return ";".join(move_str)


def process_gz_file(gz_path, txt_output_folder):
    txt_filename = os.path.basename(gz_path).replace(".gz", ".txt")
    txt_path = os.path.join(txt_output_folder, txt_filename)

    try:
        with gzip.open(gz_path, 'rt', encoding='utf-8') as f_in:
            data = yaml.safe_load(f_in)
            batch_positions = data.get("BatchOfPositions", [])

        if batch_positions:
            moves_list = [position_to_move_string(positions) for positions in batch_positions]
            with open(txt_path, "w", encoding='utf-8') as txt_file:
                txt_file.write("\n".join(moves_list) + "\n")

    except Exception as e:
        print(f"Lỗi khi xử lý {gz_path}: {e}")


def extract_and_convert_all_gz(input_folder, txt_output_folder, batch_size=512, num_workers=None):
    os.makedirs(txt_output_folder, exist_ok=True)

    if num_workers is None:
        num_workers = max(7, multiprocessing.cpu_count() - 1)

    gz_files = []
    for root, _, files in os.walk(input_folder):
        gz_files.extend([os.path.join(root, file) for file in files if file.endswith(".gz")])

    for i in range(0, len(gz_files), batch_size):
        batch = gz_files[i:i + batch_size]
        with multiprocessing.Pool(processes=num_workers) as pool:
            pool.starmap(process_gz_file, [(gz, txt_output_folder) for gz in batch])


if __name__ == "__main__":
    gz_folder = r"D:\FPT BT\KIFUS"
    txt_folder = r"D:\FPT BT\KIFUS_convert"
    extract_and_convert_all_gz(gz_folder, txt_folder, batch_size=512)