### GNN embedding

In [None]:
#!pip install python-chess


Collecting python-chess
  Downloading python_chess-1.999-py3-none-any.whl (1.4 kB)
Collecting chess<2,>=1
  Downloading chess-1.11.2.tar.gz (6.1 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.1/6.1 MB[0m [31m39.1 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m0:01[0m:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: chess
  Building wheel for chess (setup.py) ... [?25ldone
[?25h  Created wheel for chess: filename=chess-1.11.2-py3-none-any.whl size=147780 sha256=72c3cd4c7dfb6457f076021e77d5a3c34d804cf5258324ae2fba1ae45e427176
  Stored in directory: /Users/franciscomeza/Library/Caches/pip/wheels/6c/68/b2/d8242fcd2fd057e87e1985fdd6e71c04b540fc5fc93ad87545
Successfully built chess
Installing collected packages: chess, python-chess
Successfully installed chess-1.11.2 python-chess-1.999

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[

In [2]:
#!pip install torch-geometric torch torchvision torch-scatter torch-sparse

Collecting torch-geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl (1.1 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m21.8 MB/s[0m eta [36m0:00:00[0m MB/s[0m eta [36m0:00:01[0m
Collecting torch-scatter
  Downloading torch_scatter-2.1.2.tar.gz (108 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m108.0/108.0 kB[0m [31m25.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting torch-sparse
  Downloading torch_sparse-0.6.18.tar.gz (209 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m210.0/210.0 kB[0m [31m21.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: torch-scatter, torch-sparse
  Building wheel for torch-scatter (setup.py) ... [?25ldone
[?25h  Created wheel for torch-scatter: filename=torch_scatter-2.1.2-cp38-cp38-macosx_13_0

In [3]:
import os

In [5]:
os.chdir('..')

In [7]:
import os
import zipfile
import torch
import chess.pgn
import numpy as np
from tqdm import tqdm
from torch_geometric.data import Data

# =====================
# CONFIGURACIÓN
# =====================
ZIP_FOLDER = "pgns"              # carpeta con zips
EXTRACT_FOLDER = "temp_pgns"     # carpeta temporal
os.makedirs(EXTRACT_FOLDER, exist_ok=True)

# =====================
# FUNCIÓN DE FEATURES
# =====================

def board_to_feature(board):
    """Convierte el estado del tablero a un vector de 773 features"""
    piece_map = board.piece_map()
    planes = np.zeros((12, 64), dtype=np.float32)
    piece_to_index = {
        'P': 0, 'N': 1, 'B': 2, 'R': 3, 'Q': 4, 'K': 5,
        'p': 6, 'n': 7, 'b': 8, 'r': 9, 'q': 10, 'k': 11
    }
    for square, piece in piece_map.items():
        planes[piece_to_index[piece.symbol()]][square] = 1.0
    flat_pieces = planes.reshape(-1)  # 768

    extras = [
        float(board.turn),
        float(board.has_kingside_castling_rights(chess.WHITE)),
        float(board.has_queenside_castling_rights(chess.WHITE)),
        float(board.has_kingside_castling_rights(chess.BLACK)),
        float(board.has_queenside_castling_rights(chess.BLACK)),
    ]
    return np.concatenate([flat_pieces, extras])  # 773

# =====================
# FUNCIÓN PARA GRAFO DE PARTIDA
# =====================

def pgn_to_graph(game):
    board = game.board()
    x = []
    edge_index = [[], []]
    node_idx = 0

    x.append(board_to_feature(board.copy()))

    for move in game.mainline_moves():
        board.push(move)
        x.append(board_to_feature(board.copy()))
        edge_index[0].append(node_idx)
        edge_index[1].append(node_idx + 1)
        node_idx += 1

    x = torch.tensor(np.stack(x), dtype=torch.float)
    edge_index = torch.tensor(edge_index, dtype=torch.long)
    return Data(x=x, edge_index=edge_index)

# =====================
# PIPELINE PRINCIPAL
# =====================

graphs = []

for zipname in tqdm(os.listdir(ZIP_FOLDER)):
    if not zipname.endswith(".zip"):
        continue

    player_name = zipname.replace(".zip", "")
    with zipfile.ZipFile(os.path.join(ZIP_FOLDER, zipname), 'r') as zip_ref:
        zip_ref.extractall(EXTRACT_FOLDER)
    
    # Buscar archivo .pgn
    pgn_path = next(
        (os.path.join(EXTRACT_FOLDER, f) for f in os.listdir(EXTRACT_FOLDER) if f.endswith(".pgn")),
        None
    )
    if pgn_path is None:
        continue

    with open(pgn_path, encoding="utf-8", errors="ignore") as f:
        while True:
            game = chess.pgn.read_game(f)
            if game is None:
                break
            try:
                graph = pgn_to_graph(game)
                graph.player = player_name  # agregar nombre del jugador
                graphs.append(graph)
            except Exception as e:
                print(f"❌ Error procesando partida de {player_name}: {e}")
                continue

    # Elimina el .pgn temporal
    os.remove(pgn_path)

# =====================
# GUARDAR RESULTADOS
# =====================

torch.save(graphs, "chess_graphs.pt")
print(f"✅ {len(graphs)} grafos guardados en 'chess_graphs.pt'")

  4%|▍         | 2/50 [01:07<24:23, 30.48s/it]illegal san: 'Qxe1' in r2k3r/2pPp3/p4n2/3b2B1/1p5P/2qP4/3RQ1P1/4K2R w - - 2 31 while parsing <Game at 0x16c40fdc0 ('Gelfand,B' vs. 'Gareev,T', '2019.12.29' at 'Moscow RUS')>
100%|██████████| 50/50 [18:35<00:00, 22.32s/it]


✅ 103553 grafos guardados en 'chess_graphs.pt'


In [8]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_mean_pool
from torch_geometric.loader import DataLoader
import pandas as pd
from tqdm import tqdm

# ======================
# Hiperparámetros
# ======================
INPUT_DIM = 773          # dimensión de features por nodo
HIDDEN_DIM = 256         # capa intermedia
EMBED_DIM = 128          # dimensión final del embedding
BATCH_SIZE = 32

# ======================
# Modelo GCN Encoder
# ======================
class GNNEncoder(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, x, edge_index, batch):
        x = F.relu(self.conv1(x, edge_index))
        x = self.conv2(x, edge_index)
        return global_mean_pool(x, batch)

# ======================
# Cargar grafos
# ======================
graphs = torch.load("chess_graphs.pt")
loader = DataLoader(graphs, batch_size=BATCH_SIZE, shuffle=False)

model = GNNEncoder(INPUT_DIM, HIDDEN_DIM, EMBED_DIM)
model.eval()

# ======================
# Embedding loop
# ======================
embeddings = []
players = []

with torch.no_grad():
    for batch in tqdm(loader, desc="Generando embeddings"):
        z = model(batch.x, batch.edge_index, batch.batch)
        embeddings.append(z)
        players += batch.player

embeddings = torch.cat(embeddings).cpu().numpy()

# ======================
# Guardar resultados
# ======================
df = pd.DataFrame(embeddings, columns=[f"dim_{i}" for i in range(EMBED_DIM)])
df["player"] = players
df.to_csv("gnn_chess_embeddings.csv", index=False)
df.to_pickle("gnn_chess_embeddings.pkl")

print("✅ Embeddings GNN guardados como CSV y PKL.")

Generando embeddings: 100%|██████████| 3237/3237 [00:32<00:00, 98.28it/s] 


✅ Embeddings GNN guardados como CSV y PKL.
