# Chess Outcome — Model Training

**Purpose:** Train deep-learning models to predict the game result from early-game move sequences (first *N* full moves, as configured).

**Inputs:** Clean dataset from Notebook 2 (sequence column + target).

**Process:** Load & tokenise → build vocab from train only → pad/truncate to the configured length → stratified split → train model(s) with early stopping and LR scheduling → handle class imbalance (balanced batches or class weights) → evaluate and save artefacts.

**Metrics:** Accuracy, Macro-F1, and confusion matrix.

**Outputs:** Best model (`results/*.keras`) and run report (`results/seq_report.json`). Seed fixed; GPU used if available (mixed precision optional).

In [50]:
# ==============================================
# 1. Imports & Paths
# ==============================================
from pathlib import Path
import json
import os
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", 200)

import tensorflow as tf

# ----------------------------------------------
# 1.1 Config
# ----------------------------------------------
SEED = 42
CUTOFF_FULL_MOVES = 30
CUTOFF_PLIES = CUTOFF_FULL_MOVES * 2
MOVE_COL = f"moves_first{CUTOFF_FULL_MOVES}_san"

# ----------------------------------------------
# 1.2 Reproducibility
# ----------------------------------------------
np.random.seed(SEED)
tf.random.set_seed(SEED)
gpus = tf.config.list_physical_devices("GPU")
for g in gpus:
    try:
        tf.config.experimental.set_memory_growth(g, True)
    except Exception:
        pass

# ----------------------------------------------
# 1.3 Paths
# ----------------------------------------------
NB_DIR = Path.cwd()
DATA_DIR = (NB_DIR / "../data").resolve()
RESULTS_DIR = (NB_DIR / "../results").resolve()
RESULTS_DIR.mkdir(parents=True, exist_ok=True)
ELO_FEATS = ["elo_diff", "elo_avg"]

CLEAN_CSV = DATA_DIR / "chess_games_clean.csv"
META_JSON = DATA_DIR / "chess_games_clean_meta.json"

globals().setdefault("BATCH", 512)
globals().setdefault("EPOCHS", 40)


print("Data:", CLEAN_CSV)
print("Meta:", META_JSON)
print(f"Cutoff: {CUTOFF_FULL_MOVES} moves ({CUTOFF_PLIES} plies)")
print("Results dir:", RESULTS_DIR)

Data: E:\Github Projects\chess-outcome-prediction\data\chess_games_clean.csv
Meta: E:\Github Projects\chess-outcome-prediction\data\chess_games_clean_meta.json
Cutoff: 30 moves (60 plies)
Results dir: E:\Github Projects\chess-outcome-prediction\results


In [51]:
# ==============================================
# 2. Load, Tokenise, Split
# ==============================================
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from collections import Counter
import json

# ----------------------------------------------
# 2.1 Load
# ----------------------------------------------
df = pd.read_csv(CLEAN_CSV)
expected = {MOVE_COL, "plies_processed", "cutoff_reached", "target"}
missing = sorted(list(expected - set(df.columns)))
assert not missing, f"Missing columns: {missing}"
df = df.dropna(subset=[MOVE_COL, "target"]).reset_index(drop=True)
df = df[df[MOVE_COL].str.len() > 0].reset_index(drop=True)

# ----------------------------------------------
# 2.2 Labels (binary: white vs black only)
# ----------------------------------------------
# Keep only non-draw games (should already be true after your 02 notebook change,
# but this guards against stale CSVs)
df = df[df["target"].isin(["black", "white"])].reset_index(drop=True)

CLASS_ORDER = ["black", "white"]   # id: 0 = black, 1 = white (keep this stable)
label_to_id = {c:i for i,c in enumerate(CLASS_ORDER)}
id_to_label = {i:c for c,i in label_to_id.items()}

y = df["target"].map(label_to_id).astype("int32")
assert set(np.unique(y)) == {0,1}, f"Unexpected labels in y: {set(np.unique(y))}"

# ----------------------------------------------
# 2.3 Tokenise
# ----------------------------------------------
def to_tokens(s):
    return str(s).split()

tokens = df[MOVE_COL].apply(to_tokens)

# ----------------------------------------------
# 2.4 Split
# ----------------------------------------------
X_tmp, X_test_tok, y_tmp, y_test = train_test_split(
    tokens, y, test_size=0.15, random_state=SEED, stratify=y
)
X_train_tok, X_val_tok, y_train, y_val = train_test_split(
    X_tmp, y_tmp, test_size=0.1765, random_state=SEED, stratify=y_tmp
)  # 0.85 * 0.1765 ≈ 0.15 → 70/15/15

# ----------------------------------------------
# 2.5 Vocab (train only)
# ----------------------------------------------
counter = Counter(t for seq in X_train_tok for t in seq[:CUTOFF_PLIES])
vocab = ["<PAD>", "<UNK>"] + [tok for tok, _ in counter.most_common()]
stoi = {t:i for i,t in enumerate(vocab)}
PAD_ID, UNK_ID = 0, 1

# ----------------------------------------------
# 2.6 Numericalise
# ----------------------------------------------
def to_ids(seq, max_len=CUTOFF_PLIES):
    ids = [stoi.get(t, UNK_ID) for t in seq[:max_len]]
    if len(ids) < max_len:
        ids += [PAD_ID] * (max_len - len(ids))
    return np.array(ids, dtype=np.int32)

X_train_seq = np.stack([to_ids(s) for s in X_train_tok])
X_val_seq   = np.stack([to_ids(s) for s in X_val_tok])
X_test_seq  = np.stack([to_ids(s) for s in X_test_tok])

# ----------------------------------------------
# 2.7 Class Weights
# ----------------------------------------------
classes = np.array(sorted(label_to_id.values()))
cw = compute_class_weight(class_weight="balanced", classes=classes, y=y_train)
class_weight = {int(c): float(w) for c, w in zip(classes, cw)}

# ----------------------------------------------
# 2.8 Persist Artifacts
# ----------------------------------------------
(RESULTS_DIR / "vocab.json").write_text(json.dumps({"vocab": vocab}, ensure_ascii=False))
(RESULTS_DIR / "label_mapping.json").write_text(json.dumps({"label_to_id": label_to_id, "id_to_label": id_to_label}, indent=2))
(RESULTS_DIR / "class_weights.json").write_text(json.dumps(class_weight, indent=2))
(RESULTS_DIR / "dl_config.json").write_text(json.dumps({"cutoff_full_moves": CUTOFF_FULL_MOVES, "cutoff_plies": CUTOFF_PLIES, "pad_id": PAD_ID, "unk_id": UNK_ID}, indent=2))

# ----------------------------------------------
# 2.9 Summary
# ----------------------------------------------
print("Shapes:", X_train_seq.shape, X_val_seq.shape, X_test_seq.shape)
print("Vocab size:", len(vocab))
print("Label map:", label_to_id)
print("Class weights:", class_weight)
print("Samples (train/val/test):", len(X_train_seq), len(X_val_seq), len(X_test_seq))

Shapes: (58758, 60) (12594, 60) (12592, 60)
Vocab size: 6156
Label map: {'black': 0, 'white': 1}
Class weights: {0: 1.0008857697679965, 1: 0.9991157966332257}
Samples (train/val/test): 58758 12594 12592


In [52]:
# ==============================================
# 2.10 Numeric features (Elo) — standardise on train only
# ==============================================

from sklearn.preprocessing import StandardScaler

ELO_FEATS = ["elo_diff", "elo_avg"] if "ELO_FEATS" not in globals() else ELO_FEATS
assert all(f in df.columns for f in ELO_FEATS), f"Missing columns: {set(ELO_FEATS)-set(df.columns)}"

X_train_num = df.loc[X_train_tok.index, ELO_FEATS].astype("float32").values
X_val_num   = df.loc[X_val_tok.index,   ELO_FEATS].astype("float32").values
X_test_num  = df.loc[X_test_tok.index,  ELO_FEATS].astype("float32").values

num_scaler = StandardScaler().fit(X_train_num)
X_train_num = num_scaler.transform(X_train_num)
X_val_num   = num_scaler.transform(X_val_num)
X_test_num  = num_scaler.transform(X_test_num)

print("Seq shapes:", X_train_seq.shape, X_val_seq.shape, X_test_seq.shape)
print("Num shapes:", X_train_num.shape, X_val_num.shape, X_test_num.shape)

Seq shapes: (58758, 60) (12594, 60) (12592, 60)
Num shapes: (58758, 2) (12594, 2) (12592, 2)


In [53]:
# ----------------------------------------------
# 2.11 Label sanity & class weights (binary)
# ----------------------------------------------
import numpy as np
from sklearn.utils.class_weight import compute_class_weight

NUM_CLASSES = 2
y_train = np.asarray(y_train, dtype=np.int32)
y_val   = np.asarray(y_val,   dtype=np.int32)
y_test  = np.asarray(y_test,  dtype=np.int32)

valid_classes = np.array([0,1], dtype=np.int32)
for split_name, yy in [("train", y_train), ("val", y_val), ("test", y_test)]:
    extra = np.setdiff1d(np.unique(yy), valid_classes)
    assert extra.size == 0, f"{split_name} has unexpected labels: {extra}"

cw = compute_class_weight(class_weight="balanced", classes=valid_classes, y=y_train)
class_weight = {int(c): float(w) for c, w in zip(valid_classes, cw)}

print("Label uniques:",
      {k: sorted(np.unique(v).tolist()) for k, v in
       {"train": y_train, "val": y_val, "test": y_test}.items()})
print("class_weight:", class_weight)


Label uniques: {'train': [0, 1], 'val': [0, 1], 'test': [0, 1]}
class_weight: {0: 1.0008857697679965, 1: 0.9991157966332257}


In [54]:
# ----------------------------------------------
# 2.12 Build tf.data datasets (binary, no oversampling)
# ----------------------------------------------
import tensorflow as tf

BATCH = 512 if "BATCH" not in globals() else BATCH

ds_train = (
    tf.data.Dataset.from_tensor_slices(((X_train_seq, X_train_num), y_train))
    .shuffle(200_000, seed=SEED)
    .batch(BATCH)
    .prefetch(2)
)
ds_val  = tf.data.Dataset.from_tensor_slices(((X_val_seq,  X_val_num),  y_val )).batch(BATCH).prefetch(2)
ds_test = tf.data.Dataset.from_tensor_slices(((X_test_seq, X_test_num), y_test)).batch(BATCH)

print("Binary two-input training dataset ready.")

Binary two-input training dataset ready.


In [55]:
# ============================================================
# 3 · Model Training (Binary: white vs black, draws removed)
# ============================================================
import os, json, numpy as np, tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix

# -------------------------
# Reproducibility & paths
# -------------------------
SEED = globals().get("SEED", 42)
tf.keras.utils.set_random_seed(SEED)
np.random.seed(SEED)

RESULTS_DIR = globals().get("RESULTS_DIR", "results")
os.makedirs(RESULTS_DIR, exist_ok=True)
BEST_MODEL_PATH = os.path.join(RESULTS_DIR, "best_seq_model.keras")

# ----------------------------------------------------------------
# Expect the following to already exist from earlier notebook cells
#   - X_train_seq, X_val_seq, X_test_seq  (int token ids, padded)
#   - X_train_num, X_val_num, X_test_num  (float numeric features)
#   - y_train, y_val, y_test              (0=black, 1=white)
#   - ds_train, ds_val, ds_test           (tf.data datasets)  [optional]
#   - CLASS_ORDER = ["black","white"]
#   - class_weight                        (dict)              [optional]
# If a tf.data pipeline is not present, we'll build one quickly.
# ----------------------------------------------------------------
for name in ["X_train_seq","X_val_seq","X_test_seq",
             "X_train_num","X_val_num","X_test_num",
             "y_train","y_val","y_test"]:
    assert name in globals(), f"Missing expected variable: {name}"

SEQ_LEN = int(X_train_seq.shape[1])
NUM_NUMERIC = int(X_train_num.shape[1]) if len(X_train_num.shape) == 2 else 0

# Infer vocab size safely from training IDs (works even if vocab.json not loaded here)
VOCAB_SIZE = int(np.max(X_train_seq)) + 1
assert VOCAB_SIZE > 1, "VOCAB_SIZE must be > 1"

# -------------------------
# tf.data (if not provided)
# -------------------------
if "ds_train" not in globals():
    BATCH = 512
    ds_train = (
        tf.data.Dataset
        .from_tensor_slices(((X_train_seq, X_train_num), y_train))
        .shuffle(200_000, seed=SEED)
        .batch(BATCH)
        .prefetch(2)
    )
    ds_val = (
        tf.data.Dataset
        .from_tensor_slices(((X_val_seq, X_val_num), y_val))
        .batch(BATCH)
        .prefetch(2)
    )
    ds_test = (
        tf.data.Dataset
        .from_tensor_slices(((X_test_seq, X_test_num), y_test))
        .batch(BATCH)
    )

# -------------------------
# Model backbone
# -------------------------
EMB = 128
DROPOUT = 0.2

# Sequence branch
seq_in = layers.Input(shape=(SEQ_LEN,), dtype="int32", name="seq_in")
x = layers.Embedding(input_dim=VOCAB_SIZE, output_dim=EMB, mask_zero=True, name="tok_emb")(seq_in)
x = layers.Bidirectional(layers.GRU(128, return_sequences=True), name="bi_gru")(x)
x = layers.GlobalMaxPool1D(name="gmp")(x)
x = layers.Dropout(DROPOUT)(x)

# Numeric branch (optional; if you genuinely have 0 numeric features, we skip it)
if NUM_NUMERIC > 0:
    num_in = layers.Input(shape=(NUM_NUMERIC,), dtype="float32", name="num_in")
    n = layers.LayerNormalization()(num_in)
    n = layers.Dense(64, activation="relu")(n)
    n = layers.Dropout(DROPOUT)(n)
    fused = layers.Concatenate()([x, n])
    inputs = [seq_in, num_in]
else:
    fused = x
    inputs = [seq_in]

# Head
fused = layers.Dense(128, activation="relu")(fused)
fused = layers.Dropout(DROPOUT)(fused)
out = layers.Dense(1, activation="sigmoid", name="out")(fused)

model = models.Model(inputs=inputs, outputs=out)
model.summary()

# -------------------------
# Compile
# -------------------------
opt = tf.keras.optimizers.Adam(learning_rate=1e-3, clipnorm=1.0)
model.compile(
    optimizer=opt,
    loss="binary_crossentropy",
    metrics=["accuracy", tf.keras.metrics.AUC(name="auc")]
)

# -------------------------
# Callbacks
# -------------------------
callbacks = [
    tf.keras.callbacks.EarlyStopping(
        monitor="val_loss", patience=7, restore_best_weights=True
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor="val_loss", factor=0.5, patience=3, min_lr=1e-5
    ),
    tf.keras.callbacks.ModelCheckpoint(
        BEST_MODEL_PATH, monitor="val_loss", save_best_only=True
    ),
]

# -------------------------
# Train
# -------------------------
EPOCHS = globals().get("EPOCHS", 40)

# use class_weight if earlier cell computed it; otherwise None
cw = globals().get("class_weight", None)

history = model.fit(
    ds_train,
    validation_data=ds_val,
    epochs=EPOCHS,
    callbacks=callbacks,
    class_weight=cw,
    verbose=1,
)

# -------------------------
# Evaluation helpers
# -------------------------
def eval_ds(m, ds, y_true, name, thr=0.5):
    y_prob = m.predict(ds, verbose=0).ravel()
    y_pred = (y_prob >= thr).astype("int32")
    acc = accuracy_score(y_true, y_pred)
    f1m = f1_score(y_true, y_pred, average="macro")
    rep = classification_report(y_true, y_pred, target_names=CLASS_ORDER, zero_division=0)
    cm  = confusion_matrix(y_true, y_pred).tolist()
    print(f"[{name}] acc={acc:.4f}  f1_macro={f1m:.4f}")
    print(rep)
    return {"acc": acc, "f1_macro": f1m, "cm": cm}

print("\n--- Validation ---")
val_stats  = eval_ds(model, ds_val,  y_val,  "val",  thr=0.5)

print("\n--- Test ---")
test_stats = eval_ds(model, ds_test, y_test, "test", thr=0.5)

# -------------------------
# Save lightweight artefacts
# -------------------------
# Label mapping (2-class)
label_mapping = {"black": 0, "white": 1}
with open(os.path.join(RESULTS_DIR, "label_mapping.json"), "w") as f:
    json.dump(label_mapping, f, indent=2)

# Class weights (if any)
if cw is not None:
    with open(os.path.join(RESULTS_DIR, "class_weights.json"), "w") as f:
        json.dump({int(k): float(v) for k, v in cw.items()}, f, indent=2)

# Simple metrics record
with open(os.path.join(RESULTS_DIR, "seq_report.json"), "w") as f:
    json.dump({"val": val_stats, "test": test_stats}, f, indent=2)

print("\nSaved best model to:", BEST_MODEL_PATH)
print("Saved label_mapping.json, class_weights.json (if used), and seq_report.json to:", RESULTS_DIR)

Epoch 1/40
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 228ms/step - accuracy: 0.5212 - auc: 0.5356 - loss: 0.6899 - val_accuracy: 0.5968 - val_auc: 0.6428 - val_loss: 0.6602 - learning_rate: 0.0010
Epoch 2/40
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 367ms/step - accuracy: 0.6131 - auc: 0.6585 - loss: 0.6529 - val_accuracy: 0.6023 - val_auc: 0.6517 - val_loss: 0.6565 - learning_rate: 0.0010
Epoch 3/40
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 277ms/step - accuracy: 0.6326 - auc: 0.6905 - loss: 0.6344 - val_accuracy: 0.6044 - val_auc: 0.6501 - val_loss: 0.6568 - learning_rate: 0.0010
Epoch 4/40
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 360ms/step - accuracy: 0.6516 - auc: 0.7144 - loss: 0.6186 - val_accuracy: 0.6000 - val_auc: 0.6450 - val_loss: 0.6670 - learning_rate: 0.0010
Epoch 5/40
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 187ms/step - accuracy: 0.6750 - auc: 0