# CNN vs Professor's MCTS7500 Dataset

Analyze the professor's `mcts7500_pool.pickle` and evaluate our trained CNN against it.

## 1. Load & Analyze Professor's Dataset

In [1]:
import os
import pickle
import numpy as np

# Paths: Colab (Drive) or local — Drive mount is optional
BASE = os.getcwd()
try:
    from google.colab import drive
    drive.mount('/content/drive')
    BASE = '/content/drive/MyDrive'
    print('Using Google Drive')
except Exception as e:
    print('Drive mount skipped:', type(e).__name__)
    print('Using local/uploaded files. Upload mcts7500_pool.pickle to Colab if needed.')

candidates = [os.path.join(BASE, 'mcts7500_pool.pickle'), 'mcts7500_pool.pickle',
             os.path.join(os.getcwd(), 'mcts7500_pool.pickle'), '/content/mcts7500_pool.pickle']
PICKLE_PATH = next((p for p in candidates if os.path.exists(p)), candidates[0])
if not os.path.exists(PICKLE_PATH):
    raise FileNotFoundError('mcts7500_pool.pickle not found. Upload it to the project folder.')
print('Loading professor dataset from:', PICKLE_PATH)
with open(PICKLE_PATH, 'rb') as f:
    data = pickle.load(f)

print('Keys:', list(data.keys()))
print('README:', data['README'])
print()

board_x = np.array(data['board_x'], dtype=np.float32)
play_y = np.array(data['play_y'], dtype=np.int64)

print('Dataset structure:')
print(f'  board_x: {board_x.shape} (6x7, option-a encoding: +1/-1/0)')
print(f'  play_y:  {play_y.shape} (column 0-6)')
print(f'  Unique play values: {np.unique(play_y)}')
print(f'  Move distribution: {np.bincount(play_y, minlength=7)}')

Mounted at /content/drive
Using Google Drive
Loading professor dataset from: /content/drive/MyDrive/mcts7500_pool.pickle


  data = pickle.load(f)


Keys: ['board_x', 'play_y', 'README']
README: this is with MCTS7500 - but it looks for wins and blocks before doing mcts..all boards are saved assuming that the next move is plus - this works by saving -1*board when it is minus turn...

Dataset structure:
  board_x: (265620, 6, 7) (6x7, option-a encoding: +1/-1/0)
  play_y:  (265620,) (column 0-6)
  Unique play values: [0 1 2 3 4 5 6]
  Move distribution: [35500 36530 39033 44842 38365 36213 35137]


## 2. Convert to 6x7x2 (Option B) for Our CNN

Professor's format: 6x7 with +1 (plus), -1 (minus), 0 (empty). All boards from plus's perspective.

Our CNN expects 6x7x2: ch0 = current player (plus), ch1 = opponent (minus).

In [2]:
def option_a_to_option_b(board):
    """Convert (6,7) +1/-1/0 to (6,7,2) for CNN. Current player = plus."""
    out = np.zeros((6, 7, 2), dtype=np.float32)
    out[:, :, 0] = (board == 1).astype(np.float32)   # plus/current
    out[:, :, 1] = (board == -1).astype(np.float32)  # minus/opponent
    return out

X_prof = np.array([option_a_to_option_b(b) for b in board_x], dtype=np.float32)
y_prof = play_y

print('Converted X_prof:', X_prof.shape)
print('Sample board (option b, ch0=plus ch1=minus):')
print(X_prof[0][:, :, 0], '\nch1:\n', X_prof[0][:, :, 1])

Converted X_prof: (265620, 6, 7, 2)
Sample board (option b, ch0=plus ch1=minus):
[[0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 1.]
 [0. 1. 0. 0. 1. 0. 0.]] 
ch1:
 [[0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0. 1. 1.]]


## 3. Load Our CNN

In [3]:
try:
    import tensorflow as tf
    from tensorflow import keras
except ImportError:
    import sys, subprocess
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', 'tensorflow'])
    import tensorflow as tf
    from tensorflow import keras

import os

# Model path: Drive or local/uploaded
drive_models = '/content/drive/MyDrive/Connect4_Combined/models'
local_models = os.path.join(os.getcwd(), 'Connect4_Combined', 'models')
MODEL_DIR = drive_models if os.path.exists(drive_models) else local_models

CNN_PATH = os.path.join(MODEL_DIR, 'connect4_cnn_best.keras')
if not os.path.exists(CNN_PATH):
    CNN_PATH = os.path.join(MODEL_DIR, 'connect4_cnn_final.keras')

print('Loading CNN from:', CNN_PATH)
cnn = keras.models.load_model(CNN_PATH)
cnn.summary()

Loading CNN from: /content/drive/MyDrive/Connect4_Combined/models/connect4_cnn_best.keras


  saveable.load_own_variables(weights_store.get(inner_path))


## 4. Evaluate CNN on Professor's Dataset

In [4]:
BATCH = 2048
preds = cnn.predict(X_prof, batch_size=BATCH, verbose=1)
cnn_moves = np.argmax(preds, axis=1)

acc = np.mean(cnn_moves == y_prof)
top2_correct = sum(1 for i in range(len(y_prof)) 
                   if y_prof[i] in np.argsort(preds[i])[-2:])
top2_acc = top2_correct / len(y_prof)

print('='*50)
print('CNN vs Professor MCTS7500')
print('='*50)
print(f'Top-1 accuracy: {acc:.2%}')
print(f'Top-2 accuracy: {top2_acc:.2%}')
print(f'Samples: {len(y_prof):,}')

[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 25ms/step
CNN vs Professor MCTS7500
Top-1 accuracy: 72.68%
Top-2 accuracy: 88.73%
Samples: 265,620


## 5. Per-Column Breakdown

In [5]:
print('Per-column: count | CNN correct | accuracy')
for col in range(7):
    mask = y_prof == col
    n = mask.sum()
    if n == 0:
        continue
    correct = (cnn_moves[mask] == y_prof[mask]).sum()
    print(f'  Col {col}: {n:>6,} | {correct:>6,} | {correct/n:.2%}')

Per-column: count | CNN correct | accuracy
  Col 0: 35,500 | 24,668 | 69.49%
  Col 1: 36,530 | 25,812 | 70.66%
  Col 2: 39,033 | 28,085 | 71.95%
  Col 3: 44,842 | 36,432 | 81.25%
  Col 4: 38,365 | 27,608 | 71.96%
  Col 5: 36,213 | 25,309 | 69.89%
  Col 6: 35,137 | 25,149 | 71.57%


## 6. Play CNN vs MCTS (Simulated Games)

Use professor's positions: at each position, CNN chooses a move. Compare to MCTS recommended move. Track "agreement rate" as we step through game-like sequences.

Simpler: sample N random positions and report head-to-head — when CNN and MCTS agree, count as CNN "winning" that decision. We already did that above (accuracy).

Alternative: run actual games — CNN plays, MCTS plays (we'd need MCTS code). Since we only have the dataset, we report accuracy on the full pool.

In [6]:
print('Summary: CNN matches professor MCTS on {:.1%} of positions.'.format(acc))
print('(Higher = CNN plays more like the professor\'s MCTS7500 bot.)')

Summary: CNN matches professor MCTS on 72.7% of positions.
(Higher = CNN plays more like the professor's MCTS7500 bot.)


## 7. Game-Play Evaluation: Policy vs Weak MCTS

Run actual games: Policy (CNN or Transformer with win/block + legal-move masking) vs Weak MCTS. Uses `policy_move_with_rules` so we never miss obvious wins or blocks.

In [7]:
# Connect4 game engine + Weak MCTS + policy_move_with_rules (win/block + legal masking)
import random

class Connect4Eval:
    def __init__(self):
        self.board = np.zeros((6, 7), dtype=np.int8)
        self.heights = np.zeros(7, dtype=np.int8)
        self.current_player = 1
        self.winner = None
        self.move_count = 0

    def copy(self):
        g = Connect4Eval()
        g.board = self.board.copy()
        g.heights = self.heights.copy()
        g.current_player = self.current_player
        g.winner = self.winner
        g.move_count = self.move_count
        return g

    def legal_moves(self):
        return [c for c in range(7) if self.heights[c] < 6]

    def make_move(self, col):
        if self.heights[col] >= 6:
            return False
        row = self.heights[col]
        self.board[row, col] = self.current_player
        self.heights[col] += 1
        self.move_count += 1
        if self._check_win(row, col):
            self.winner = self.current_player
        elif self.move_count >= 42:
            self.winner = 0
        self.current_player *= -1
        return True

    def _check_win(self, row, col):
        player = self.board[row, col]
        if row <= 2 and np.sum(self.board[row:row+4, col]) == 4 * player:
            return True
        for c in range(max(0, col-3), min(4, col+1)):
            if np.sum(self.board[row, c:c+4]) == 4 * player:
                return True
        for dr, dc in [(1, 1), (1, -1)]:
            count = 1
            for sign in [1, -1]:
                r, c = row + sign*dr, col + sign*dc
                while 0 <= r < 6 and 0 <= c < 7 and self.board[r, c] == player:
                    count += 1
                    r += sign*dr
                    c += sign*dc
            if count >= 4:
                return True
        return False

    def is_terminal(self):
        return self.winner is not None

    def encode(self, perspective=1):
        b = self.board if perspective == 1 else -self.board
        enc = np.zeros((6, 7, 2), dtype=np.float32)
        enc[:, :, 0] = (b == 1).astype(np.float32)
        enc[:, :, 1] = (b == -1).astype(np.float32)
        return enc

def find_winning_move(game, player):
    """Return winning column for player, or None."""
    for col in game.legal_moves():
        test = game.copy()
        old_p = test.current_player
        test.current_player = player
        test.make_move(col)
        if test.winner == player:
            return col
        test.current_player = old_p
    return None

def policy_move_with_rules(game, model, perspective=1):
    """Win check -> block check -> model with legal-move masking."""
    win_col = find_winning_move(game, game.current_player)
    if win_col is not None:
        return win_col
    block_col = find_winning_move(game, -game.current_player)
    if block_col is not None:
        return block_col
    x = game.encode(perspective=perspective)[None, ...]
    probs = model.predict(x, verbose=0)[0]
    legal = game.legal_moves()
    mask = np.full(7, -1e9, dtype=np.float32)
    for c in legal:
        mask[c] = 0.0
    scores = probs + mask
    return int(np.argmax(scores))

class WeakMCTS:
    def __init__(self, sims=100):
        self.sims = sims

    def get_move(self, game):
        win = find_winning_move(game, game.current_player)
        if win is not None:
            return win
        block = find_winning_move(game, -game.current_player)
        if block is not None:
            return block
        return self._mcts(game, self.sims)

    def _mcts(self, game, sims):
        root_player = game.current_player
        stats = {}
        for _ in range(sims):
            node = game.copy()
            path = []
            while not node.is_terminal():
                state = hash(node.board.tobytes())
                path.append(state)
                if state not in stats:
                    stats[state] = [0, 0.0]
                moves = node.legal_moves()
                if not moves:
                    break
                best_move = None
                best_ucb = -1e9
                parent_visits = stats[state][0]
                for col in moves:
                    test = node.copy()
                    test.make_move(col)
                    child = hash(test.board.tobytes())
                    if child not in stats:
                        stats[child] = [0, 0.0]
                    visits, value = stats[child]
                    if visits == 0:
                        ucb = 1e9
                    else:
                        exploit = value / visits
                        explore = 1.4 * np.sqrt(np.log(parent_visits + 1) / visits)
                        ucb = exploit + explore
                    if ucb > best_ucb:
                        best_ucb = ucb
                        best_move = col
                node.make_move(best_move)
                if stats[hash(node.board.tobytes())][0] == 0:
                    path.append(hash(node.board.tobytes()))
                    break
            depth = 0
            while not node.is_terminal() and depth < 12:
                moves = node.legal_moves()
                if not moves:
                    break
                node.make_move(random.choice(moves))
                depth += 1
            result = 1.0 if node.winner == root_player else (-1.0 if node.winner == -root_player else 0.0)
            for st in path:
                if st in stats:
                    stats[st][0] += 1
                    stats[st][1] += result
        best_move, best_val = None, -1e9
        for col in game.legal_moves():
            test = game.copy()
            test.make_move(col)
            st = hash(test.board.tobytes())
            if st in stats and stats[st][0] > 0:
                val = stats[st][1] / stats[st][0]
            else:
                val = -1e9
            if val > best_val:
                best_val, best_move = val, col
        return best_move if best_move is not None else random.choice(game.legal_moves())

weak_mcts = WeakMCTS(sims=120)
print('Connect4Eval, WeakMCTS, policy_move_with_rules ready.')

Connect4Eval, WeakMCTS, policy_move_with_rules ready.


In [9]:
# Run games: Policy (CNN/Transformer) vs Weak MCTS
EVAL_GAMES = 50

def run_games(model, name):
    wins = losses = ties = 0
    for _ in range(EVAL_GAMES):
        game = Connect4Eval()
        while not game.is_terminal():
            if game.current_player == 1:
                col = policy_move_with_rules(game, model, perspective=1)
            else:
                col = weak_mcts.get_move(game)
            game.make_move(col)
        if game.winner == 1:
            wins += 1
        elif game.winner == -1:
            losses += 1
        else:
            ties += 1
    return wins, losses, ties

# CNN vs Weak MCTS
w, l, t = run_games(cnn, 'CNN')
print('='*50)
print('CNN (policy_move_with_rules) vs Weak MCTS (sims=120)')
print('='*50)
print(f'W: {w} | L: {l} | T: {t} | Games: {EVAL_GAMES}')
print(f'Win rate: {w/EVAL_GAMES:.1%}')

# Transformer vs Weak MCTS (load if available)
trans_path = os.path.join(MODEL_DIR, 'connect4_transformer_best.keras')
if not os.path.exists(trans_path):
    trans_path = os.path.join(MODEL_DIR, 'connect4_transformer_final.keras')

if os.path.exists(trans_path):
    print()
    transformer = None
    try:
        transformer = keras.models.load_model(trans_path)
    except Exception as e:
        print('Default transformer load failed; retrying with custom_objects...')
        print('Reason:', type(e).__name__)

        class BoardPatchEmbedding(keras.layers.Layer):
            def __init__(self, embed_dim, **kwargs):
                super().__init__(**kwargs)
                self.embed_dim = embed_dim

            def build(self, input_shape):
                self.proj = keras.layers.Dense(self.embed_dim)
                super().build(input_shape)

            def call(self, x):
                import tensorflow as tf
                x = tf.reshape(x, [-1, 42, 2])
                return self.proj(x)

        class SinusoidalPositionalEmbedding(keras.layers.Layer):
            def __init__(self, seq_len, embed_dim, **kwargs):
                super().__init__(**kwargs)
                self.seq_len = seq_len
                self.embed_dim = embed_dim

            def build(self, input_shape):
                pe = np.zeros((1, self.seq_len, self.embed_dim), dtype=np.float32)
                for pos in range(self.seq_len):
                    for i in range(0, self.embed_dim, 2):
                        pe[0, pos, i] = np.sin(pos / 10000**(i / self.embed_dim))
                        if i + 1 < self.embed_dim:
                            pe[0, pos, i+1] = np.cos(pos / 10000**(i / self.embed_dim))
                self.pos_emb = self.add_weight(
                    shape=(1, self.seq_len, self.embed_dim),
                    initializer=keras.initializers.Constant(pe),
                    trainable=False,
                )
                super().build(input_shape)

            def call(self, x):
                return x + self.pos_emb

        custom_objects = {
            'BoardPatchEmbedding': BoardPatchEmbedding,
            'SinusoidalPositionalEmbedding': SinusoidalPositionalEmbedding,
        }

        try:
            transformer = keras.models.load_model(trans_path, custom_objects=custom_objects)
        except Exception as e2:
            print('Could not load Transformer model:', type(e2).__name__)
            print('Skipping Transformer vs MCTS in this run.')

    if transformer is not None:
        w2, l2, t2 = run_games(transformer, 'Transformer')
        print('='*50)
        print('Transformer (policy_move_with_rules) vs Weak MCTS (sims=120)')
        print('='*50)
        print(f'W: {w2} | L: {l2} | T: {t2} | Games: {EVAL_GAMES}')
        print(f'Win rate: {w2/EVAL_GAMES:.1%}')
else:
    print('\n(Transformer model not found; skipping Transformer vs MCTS)')

CNN (policy_move_with_rules) vs Weak MCTS (sims=120)
W: 42 | L: 3 | T: 5 | Games: 50
Win rate: 84.0%

Default transformer load failed; retrying with custom_objects...
Reason: TypeError


  saveable.load_own_variables(weights_store.get(inner_path))


Transformer (policy_move_with_rules) vs Weak MCTS (sims=120)
W: 38 | L: 7 | T: 5 | Games: 50
Win rate: 76.0%


## 8. Compare CNN v1 vs CNN_V2 vs Transformer

This adds a single evaluation pass across all available models:
- Top-1 / Top-2 match on professor MCTS labels
- Per-column accuracy
- Gameplay win/loss/tie vs Weak MCTS

If a model file is missing, it will be skipped automatically.

In [None]:
import os
import glob
import pandas as pd

print('Comparison cell version: 2')


def _candidate_paths(*names):
    roots = [
        MODEL_DIR,
        '/content/drive/MyDrive/Connect4_Combined/models',
        '/content/drive/MyDrive',
        '/content',
        os.path.join(os.getcwd(), 'Connect4_Combined', 'models'),
    ]
    out = []
    for root in roots:
        if not root:
            continue
        for n in names:
            out.append(os.path.join(root, n))
    return out


def discover_model_candidates():
    candidates = {
        'cnn_v1': _candidate_paths(
            'connect4_cnn_best.keras', 'connect4_cnn_final.keras',
            'connect4_cnn_best.h5', 'connect4_cnn_final.h5'
        ),
        'cnn_v2': _candidate_paths(
            'connect4_cnn_v2_best.keras', 'connect4_cnn_v2_final.keras',
            'connect4_cnn_v2_best.h5', 'connect4_cnn_v2_final.h5',
            'connect4_cnn_only_best.keras', 'connect4_cnn_only_final.keras',
            'connect4_cnn_only_best.h5', 'connect4_cnn_only_final.h5',
            # compatibility alias produced by CNN-only notebook
            'connect4_cnn_final.keras', 'connect4_cnn_final.h5'
        ),
        'transformer': _candidate_paths(
            'connect4_transformer_best.keras', 'connect4_transformer_final.keras',
            'connect4_transformer_best.h5', 'connect4_transformer_final.h5'
        ),
    }

    # Recursive fallback search in Drive/content for cnn_v2 if still missing.
    if not any(os.path.exists(p) for p in candidates['cnn_v2']):
        patterns = [
            '/content/drive/MyDrive/**/*cnn*v2*.keras',
            '/content/drive/MyDrive/**/*cnn*v2*.h5',
            '/content/**/*cnn*v2*.keras',
            '/content/**/*cnn*v2*.h5',
            '/content/drive/MyDrive/**/*cnn*only*.keras',
            '/content/drive/MyDrive/**/*cnn*only*.h5',
        ]
        for pat in patterns:
            candidates['cnn_v2'].extend(glob.glob(pat, recursive=True))

    # Deduplicate while preserving order
    for k, lst in candidates.items():
        seen, dedup = set(), []
        for p in lst:
            if p not in seen:
                seen.add(p)
                dedup.append(p)
        candidates[k] = dedup

    return candidates


MODEL_CANDIDATES = discover_model_candidates()


def resolve_model_path(model_name, paths):
    for p in paths:
        if os.path.exists(p):
            return p
    return None


def evaluate_label_accuracy(model, X, y, batch=2048):
    preds = model.predict(X, batch_size=batch, verbose=0)
    pred_moves = np.argmax(preds, axis=1)
    top1 = float(np.mean(pred_moves == y))
    top2 = float(np.mean([y[i] in np.argsort(preds[i])[-2:] for i in range(len(y))]))
    per_col = {}
    for col in range(7):
        mask = (y == col)
        n = int(mask.sum())
        if n == 0:
            per_col[col] = np.nan
        else:
            per_col[col] = float(np.mean(pred_moves[mask] == y[mask]))
    return top1, top2, per_col


def run_games_quick(model, games=20):
    wins = losses = ties = 0
    for _ in range(games):
        game = Connect4Eval()
        while not game.is_terminal():
            if game.current_player == 1:
                col = policy_move_with_rules(game, model, perspective=1)
            else:
                col = weak_mcts.get_move(game)
            game.make_move(col)
        if game.winner == 1:
            wins += 1
        elif game.winner == -1:
            losses += 1
        else:
            ties += 1
    return wins, losses, ties


def load_model_safe(model_name, model_path):
    if model_name != 'transformer':
        return keras.models.load_model(model_path)

    try:
        return keras.models.load_model(model_path)
    except Exception:
        class BoardPatchEmbedding(keras.layers.Layer):
            def __init__(self, embed_dim, **kwargs):
                super().__init__(**kwargs)
                self.embed_dim = embed_dim

            def build(self, input_shape):
                self.proj = keras.layers.Dense(self.embed_dim)
                super().build(input_shape)

            def call(self, x):
                import tensorflow as tf
                x = tf.reshape(x, [-1, 42, 2])
                return self.proj(x)

        class SinusoidalPositionalEmbedding(keras.layers.Layer):
            def __init__(self, seq_len, embed_dim, **kwargs):
                super().__init__(**kwargs)
                self.seq_len = seq_len
                self.embed_dim = embed_dim

            def build(self, input_shape):
                pe = np.zeros((1, self.seq_len, self.embed_dim), dtype=np.float32)
                for pos in range(self.seq_len):
                    for i in range(0, self.embed_dim, 2):
                        pe[0, pos, i] = np.sin(pos / 10000**(i / self.embed_dim))
                        if i + 1 < self.embed_dim:
                            pe[0, pos, i + 1] = np.cos(pos / 10000**(i / self.embed_dim))
                self.pos_emb = self.add_weight(
                    shape=(1, self.seq_len, self.embed_dim),
                    initializer=keras.initializers.Constant(pe),
                    trainable=False,
                )
                super().build(input_shape)

            def call(self, x):
                return x + self.pos_emb

        custom_objects = {
            'BoardPatchEmbedding': BoardPatchEmbedding,
            'SinusoidalPositionalEmbedding': SinusoidalPositionalEmbedding,
        }
        return keras.models.load_model(model_path, custom_objects=custom_objects)


required = ['Connect4Eval', 'weak_mcts', 'policy_move_with_rules']
missing = [name for name in required if name not in globals()]
if missing:
    raise RuntimeError(f"Missing gameplay components: {missing}. Run Section 7 cells before this comparison cell.")

print('MODEL_DIR currently set to:', MODEL_DIR)
print('cnn_v2 candidates checked:')
for p in MODEL_CANDIDATES['cnn_v2'][:15]:
    print('  -', p)
if len(MODEL_CANDIDATES['cnn_v2']) > 15:
    print(f"  ... and {len(MODEL_CANDIDATES['cnn_v2']) - 15} more")

rows = []
for model_name, candidates in MODEL_CANDIDATES.items():
    model_path = resolve_model_path(model_name, candidates)
    if model_path is None:
        print(f"[SKIP] {model_name}: no model file found in candidates")
        continue

    print(f"\nEvaluating {model_name} from: {model_path}")
    try:
        model = load_model_safe(model_name, model_path)
    except Exception as e:
        print(f"[SKIP] {model_name}: failed to load ({type(e).__name__})")
        continue

    top1, top2, per_col = evaluate_label_accuracy(model, X_prof, y_prof, batch=2048)
    w, l, t = run_games_quick(model, games=20)

    row = {
        'model': model_name,
        'path': model_path,
        'top1': top1,
        'top2': top2,
        'wins': w,
        'losses': l,
        'ties': t,
        'win_rate': w / 20.0,
    }
    for c in range(7):
        row[f'col_{c}'] = per_col[c]
    rows.append(row)

if not rows:
    raise RuntimeError('No models found to evaluate.')

results_df = pd.DataFrame(rows).sort_values(['top1', 'win_rate'], ascending=False).reset_index(drop=True)

print('\n' + '=' * 70)
print('MODEL COMPARISON (Professor labels + gameplay vs Weak MCTS)')
print('=' * 70)
display(results_df[['model', 'top1', 'top2', 'wins', 'losses', 'ties', 'win_rate', 'path']])

if 'cnn_v1' in set(results_df['model']) and 'cnn_v2' in set(results_df['model']):
    base = results_df[results_df['model'] == 'cnn_v1'].iloc[0]
    v2 = results_df[results_df['model'] == 'cnn_v2'].iloc[0]
    print('\nΔ(cnn_v2 - cnn_v1):')
    print(f"  Top-1:  {v2['top1'] - base['top1']:+.4f}")
    print(f"  Top-2:  {v2['top2'] - base['top2']:+.4f}")
    print(f"  WinRate:{v2['win_rate'] - base['win_rate']:+.4f}")

print('\nPer-column accuracy table:')
display(results_df[['model'] + [f'col_{i}' for i in range(7)]])


Evaluating cnn_v1 from: /content/drive/MyDrive/Connect4_Combined/models/connect4_cnn_best.keras
[SKIP] cnn_v2: no model file found in candidates

Evaluating transformer from: /content/drive/MyDrive/Connect4_Combined/models/connect4_transformer_best.keras


TypeError: <class 'keras.src.models.functional.Functional'> could not be deserialized properly. Please ensure that components that are Python object instances (layers, models, etc.) returned by `get_config()` are explicitly deserialized in the model's `from_config()` method.

config={'module': 'keras.src.models.functional', 'class_name': 'Functional', 'config': {}, 'registered_name': 'Functional', 'build_config': {'input_shape': None}, 'compile_config': {'optimizer': {'module': 'keras.optimizers', 'class_name': 'Adam', 'config': {'name': 'adam', 'learning_rate': 9.999999747378752e-06, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-07, 'amsgrad': False}, 'registered_name': None}, 'loss': {'module': 'keras.losses', 'class_name': 'CategoricalCrossentropy', 'config': {'name': 'categorical_crossentropy', 'reduction': 'sum_over_batch_size', 'from_logits': False, 'label_smoothing': 0.05, 'axis': -1}, 'registered_name': None}, 'loss_weights': None, 'metrics': [{'module': 'keras.metrics', 'class_name': 'CategoricalAccuracy', 'config': {'name': 'accuracy', 'dtype': 'float32'}, 'registered_name': None}, {'module': 'keras.metrics', 'class_name': 'TopKCategoricalAccuracy', 'config': {'name': 'top2', 'dtype': 'float32', 'k': 2}, 'registered_name': None}], 'weighted_metrics': None, 'run_eagerly': False, 'steps_per_execution': 1, 'jit_compile': True}}.

Exception encountered: Could not locate class 'BoardPatchEmbedding'. Make sure custom classes are decorated with `@keras.saving.register_keras_serializable()`. Full object config: {'module': None, 'class_name': 'BoardPatchEmbedding', 'config': {'name': 'board_patch_embedding', 'embed_dim': 256, 'trainable': True, 'dtype': {'module': 'keras', 'class_name': 'DTypePolicy', 'config': {'name': 'mixed_float16'}, 'registered_name': None}}, 'registered_name': 'BoardPatchEmbedding', 'build_config': {'input_shape': [None, 6, 7, 2]}, 'name': 'board_patch_embedding', 'inbound_nodes': [{'args': [{'class_name': '__keras_tensor__', 'config': {'shape': [None, 6, 7, 2], 'dtype': 'float32', 'keras_history': ['input_layer_1', 0, 0]}}], 'kwargs': {}}]}