# Methods - Optimized Version

In [456]:
from GraphTsetlinMachine.tm import MultiClassGraphTsetlinMachine
from GraphTsetlinMachine.graphs import Graphs
import numpy as np
import subprocess
import time
import os
from tqdm import tqdm

# Configuration
BOARD_DIM = 3
NOTEBOOK_DIR = os.path.dirname(os.path.abspath("Tsetlin.ipynb"))
HEX_DIR = os.path.join(NOTEBOOK_DIR, "TsetlinMachine/hex")

if not os.path.exists(HEX_DIR):
    raise FileNotFoundError(f"ERROR: Cannot find hex.c at {HEX_DIR}")

print("Building hex using make...")

try:
    result = subprocess.run(
        ["make"],
        cwd=HEX_DIR,
        capture_output=True,
        text=True
    )

    print("=== Make Output ===")
    print(result.stdout)
    if result.stderr.strip():
        print("=== Make Errors ===")
        print(result.stderr)

    if result.returncode == 0:
        print("\n✓ Build successful!")
    else:
        print("\n❌ Build failed! See errors above.")

except Exception as e:
    print("Exception when running make:", e)

def c_position_to_node_id(c_position, board_dim=BOARD_DIM):
    padded_dim = board_dim + 2
    i = c_position // padded_dim
    j = c_position % padded_dim
    node_id = (i - 1) * board_dim + (j - 1)

    if node_id < 0 or node_id >= board_dim * board_dim:
        return None
    return node_id

def get_hex_edges(board_dim=BOARD_DIM):
    edges = []
    neighbor_offsets = [(0, 1), (0, -1), (-1, 1), (1, -1), (-1, 0), (1, 0)]

    for i in range(board_dim):
        for j in range(board_dim):
            node_id = i * board_dim + j
            for di, dj in neighbor_offsets:
                ni, nj = i + di, j + dj
                if 0 <= ni < board_dim and 0 <= nj < board_dim:
                    neighbor_id = ni * board_dim + nj
                    edges.append((node_id, neighbor_id))

    return edges

def parse_game_output(output):
    games = []
    current_game = None

    for line in output.split('\n'):
        line = line.strip()

        if line == "GAME_START":
            current_game = {'moves': [], 'winner': -1}
        elif line.startswith("MOVE"):
            if current_game is not None:
                parts = line.split()
                if len(parts) >= 3:
                    position = int(parts[1])
                    player = int(parts[2])
                    current_game['moves'].append((position, player))
        elif line.startswith("WINNER"):
            if current_game is not None:
                parts = line.split()
                if len(parts) >= 2:
                    current_game['winner'] = int(parts[1])
        elif line == "GAME_END":
            if current_game and current_game['winner'] != -1:
                games.append(current_game)
            current_game = None

    return games

def create_training_data_from_game(moves, winner, board_dim=BOARD_DIM):
    """
    Create ONE training sample per game:
      - board_state: final board (0=empty, 1=player0, 2=player1)
      - label: winner of the game (0 or 1)

    We keep node_features as before for compatibility, but the main
    object we care about is the final board_state.
    """
    num_nodes = board_dim * board_dim
    board_state = np.zeros(num_nodes, dtype=np.int32)
    edges = get_hex_edges(board_dim)  # not used in new Graphs, but kept

    # Play through the whole game to get the final board
    for c_position, player in moves:
        node_id = c_position_to_node_id(c_position, board_dim)
        if node_id is None:
            print(f"Skipping invalid move: c_pos={c_position}")
            continue
        board_state[node_id] = player + 1  # 1 = player 0, 2 = player 1

    # Build node_features from the FINAL full board_state
    node_features = np.zeros((num_nodes, 3), dtype=np.int32)
    for nid in range(num_nodes):
        if board_state[nid] == 1:
            node_features[nid, 0] = 1  # player_0 stone
        elif board_state[nid] == 2:
            node_features[nid, 1] = 1  # player_1 stone
        else:
            node_features[nid, 2] = 1  # empty

    label = int(winner)  # 0 or 1

    sample = {
        'board_state': board_state.reshape(board_dim, board_dim),
        'node_features': node_features,   # still there if you need it
        'edges': edges,                   # unused in new Graph building
        'position': -1,                   # not used now
        'player': -1,                     # not used now
        'label': label
    }

    # Return as a list to match old API
    return [sample]


def prepare_training_data(games, board_dim=BOARD_DIM):
    """
    Turn a list of games into a list of FINAL-state → winner samples.
    Exactly one sample per game.
    """
    all_samples = []

    print(f"Processing {len(games)} games into training samples...")

    # Game-level statistics
    player_0_wins = sum(1 for g in games if g['winner'] == 0)
    player_1_wins = sum(1 for g in games if g['winner'] == 1)
    print(f"Game outcomes (per game):")
    print(f"  Player 0 wins: {player_0_wins}")
    print(f"  Player 1 wins: {player_1_wins}")

    for game in tqdm(games, desc="Processing games"):
        samples = create_training_data_from_game(game['moves'], game['winner'], board_dim)
        all_samples.extend(samples)

    if len(all_samples) == 0:
        print("ERROR: No training samples created! Check your logic.")
        return all_samples

    labels = [s['label'] for s in all_samples]
    unique, counts = np.unique(labels, return_counts=True)
    print(f"\nLabel distribution (winner classes, per FINAL board):")
    for label, count in zip(unique, counts):
        print(f"  Winner {label}: {count} games ({count/len(labels)*100:.1f}%)")

    # Quick sanity check of final board states
    print("\nSample final board state check (first 5 samples):")
    for i in range(min(5, len(all_samples))):
        sample = all_samples[i]
        pieces = np.sum(sample['node_features'][:, :2])
        empties = np.sum(sample['node_features'][:, 2])
        print(f"  Sample {i}: {pieces} stones, {empties} empty cells, label(winner)={sample['label']}")

    print(f"{'='*60}\n")

    return all_samples


def generate_game_data(num_games=1000, hex_dir=HEX_DIR):
    hex_executable = os.path.join(hex_dir, "hex")

    if not os.path.exists(hex_executable):
        print(f"Executable not found at {hex_executable}")
        return []

    print(f"Generating {num_games} games...")

    try:
        result = subprocess.run(
            [hex_executable, str(num_games)],
            cwd=hex_dir,
            capture_output=True,
            text=True,
            timeout=120
        )

        if result.returncode != 0:
            print(f"Error running hex executable:")
            print(result.stderr)
            return []

        games = parse_game_output(result.stdout)
        print(f"Successfully parsed {len(games)} games from output")
        return games

    except Exception as e:
        print(f"Error running hex executable: {e}")
        return []


def prepare_gtm_data(training_samples, board_dim=BOARD_DIM,
                     hypervector_size=1024, hypervector_bits=2):
    """1 node per graph, occupied cells as properties"""
    from GraphTsetlinMachine.graphs import Graphs

    Y = np.array([s['label'] for s in training_samples], dtype=np.int32)
    num_graphs = len(training_samples)

    # Symbols: one for each (player, position)
    symbols = []
    for i in range(board_dim):
        for j in range(board_dim):
            symbols.append(f"P0_{i}_{j}")
            symbols.append(f"P1_{i}_{j}")

    graphs = Graphs(
        num_graphs,
        symbols=symbols,
        hypervector_size=hypervector_size,
        hypervector_bits=hypervector_bits
    )

    # Set 1 node per graph
    for graph_id in range(num_graphs):
        graphs.set_number_of_graph_nodes(graph_id, 1)

    graphs.prepare_node_configuration()

    # Add single node with no edges
    for graph_id in range(num_graphs):
        graphs.add_graph_node(graph_id, 0, 0)

    graphs.prepare_edge_configuration()

    # Add properties for occupied cells only
    for graph_id in range(num_graphs):
        node_features = training_samples[graph_id]['node_features']
        for cell_id in range(board_dim * board_dim):
            i = cell_id // board_dim
            j = cell_id % board_dim

            if node_features[cell_id, 0] == 1:
                graphs.add_graph_node_property(graph_id, 0, f"P0_{i}_{j}")
            elif node_features[cell_id, 1] == 1:
                graphs.add_graph_node_property(graph_id, 0, f"P1_{i}_{j}")

    graphs.encode()
    return graphs, Y



def train_model(graphs, Y, epochs=100):
    """
    Train a MultiClassGraphTsetlinMachine to predict the WINNER (0 or 1)
    from a given board state.
    """
    NUMBER_OF_CLAUSES = 60
    T = 600
    S = 1.0
    DEPTH = 1
    MESSAGE_SIZE = 256
    MESSAGE_BITS = 2

    print("Initializing Graph Tsetlin Machine...")
    print(f"  Clauses: {NUMBER_OF_CLAUSES}")
    print(f"  T: {T}")
    print(f"  s: {S}")
    print(f"  Depth: {DEPTH}")
    print(f"  Message Size: {MESSAGE_SIZE}")

    tm = MultiClassGraphTsetlinMachine(
        number_of_clauses=NUMBER_OF_CLAUSES,
        T=T,
        s=S,
        number_of_state_bits=8,
        depth=DEPTH,
        message_size=MESSAGE_SIZE,
        message_bits=MESSAGE_BITS,
        max_included_literals=4,
        grid=(16 * 13, 1, 1),
        block=(128, 1, 1)
    )

    # Class balancing: oversample minority class of winner
    class_0_indices = np.where(Y == 0)[0]
    class_1_indices = np.where(Y == 1)[0]

    print(f"\nClass distribution before balancing (winner classes):")
    print(f"  Winner 0: {len(class_0_indices)} states")
    print(f"  Winner 1: {len(class_1_indices)} states")

    if len(class_0_indices) > 0 and len(class_1_indices) > 0:
        # Balance by repeating minority class
        if len(class_0_indices) < len(class_1_indices):
            oversample_ratio = len(class_1_indices) // len(class_0_indices)
            class_0_indices = np.tile(class_0_indices, oversample_ratio)
        else:
            oversample_ratio = len(class_0_indices) // len(class_1_indices)
            class_1_indices = np.tile(class_1_indices, oversample_ratio)

        balanced_indices = np.concatenate([class_0_indices, class_1_indices])
        np.random.shuffle(balanced_indices)
        print(f"After balancing: {len(balanced_indices)} total samples")
    else:
        balanced_indices = np.arange(len(Y))
        print("Warning: only one winner class present; no balancing applied.")

    print(f"\nStarting training for {epochs} epochs...")
    print("="*60)

    start_total = time.time()

    for epoch in range(epochs):
        start_epoch = time.time()

        # NOTE: current GTM implementation uses full graph set; balancing
        # is mainly informational here. To actually subsample, GTM would need
        # support for graph subsets.
        tm.fit(graphs, Y, epochs=1, incremental=True)
        elapsed = time.time() - start_epoch

        predictions = tm.predict(graphs)
        accuracy = 100 * (predictions == Y).mean()

        class_0_mask = (Y == 0)
        class_1_mask = (Y == 1)
        class_0_acc = 100 * (predictions[class_0_mask] == 0).mean() if class_0_mask.any() else 0
        class_1_acc = 100 * (predictions[class_1_mask] == 1).mean() if class_1_mask.any() else 0

        print(f"Epoch {epoch+1}/{epochs} - Acc: {accuracy:.2f}% "
              f"(Winner 0 states: {class_0_acc:.1f}%, Winner 1 states: {class_1_acc:.1f}%) - {elapsed:.2f}s")

    total_time = time.time() - start_total
    print("\n" + "="*60)
    print(f"✓ Training completed in {total_time:.2f}s ({total_time/60:.2f} minutes)")

    print("\nFinal Evaluation...")
    predictions = tm.predict(graphs)

    accuracy = 100 * (predictions == Y).mean()
    print(f"\nOverall Accuracy: {accuracy:.2f}%")

    for class_id in [0, 1]:
        mask = Y == class_id
        if mask.any():
            class_acc = 100 * (predictions[mask] == class_id).mean()
            print(f"Winner {class_id} states: {class_acc:.2f}% "
                  f"({(predictions[mask] == class_id).sum()}/{mask.sum()})")

    return tm, predictions

import pickle
def save_model(tm, filepath="TsetlinMachine/hex_tm_model.pkl",
               board_dim=11, additional_info=None):
    """Save trained model with metadata"""
    print(f"Saving model to {filepath}...")

    os.makedirs(os.path.dirname(filepath), exist_ok=True)

    # Use built-in save() method
    state_dict = tm.save(fname=filepath)

    # Add metadata
    state_dict['board_dim'] = board_dim
    if additional_info:
        state_dict.update(additional_info)

    # Re-save with metadata
    with open(filepath, 'wb') as f:
        pickle.dump(state_dict, f)

    print(f"✓ Model saved successfully to {filepath}")
    return state_dict

Building hex using make...
=== Make Output ===
make: 'hex' is up to date.


✓ Build successful!


## Generate Game Data

Run this cell to generate Hex games and create training samples.

In [457]:
# Generate games
NUM_GAMES = 1000  # Adjust as needed

print(f"Generating {NUM_GAMES} Hex games...")
games = generate_game_data(NUM_GAMES)

if not games:
    raise Exception("No games generated! Check hex executable.")

print(f"\n✓ Successfully generated {len(games)} games")

# Process into training samples (FINAL state -> winner)
training_samples = prepare_training_data(games, BOARD_DIM)
print("\n" + "="*60)
print("DIAGNOSTIC: Checking final board states:")
print("="*60)
for i in range(min(10, len(training_samples))):
    sample = training_samples[i]
    non_zero = np.sum(sample['node_features'][:, :2])  # Count player pieces
    empty = np.sum(sample['node_features'][:, 2])      # Count empty cells
    print(f"Sample {i}: {non_zero} stones on board, {empty} empty cells, label(winner)={sample['label']}")

print("="*60 + "\n")
print(f"\n✓ Training data ready: {len(training_samples)} samples")


Generating 1000 Hex games...
Generating 1000 games...
Successfully parsed 1000 games from output

✓ Successfully generated 1000 games
Processing 1000 games into training samples...
Game outcomes (per game):
  Player 0 wins: 677
  Player 1 wins: 323


Processing games: 100%|██████████| 1000/1000 [00:00<00:00, 29194.01it/s]


Label distribution (winner classes, per FINAL board):
  Winner 0: 677 games (67.7%)
  Winner 1: 323 games (32.3%)

Sample final board state check (first 5 samples):
  Sample 0: 9 stones, 0 empty cells, label(winner)=0
  Sample 1: 7 stones, 2 empty cells, label(winner)=0
  Sample 2: 7 stones, 2 empty cells, label(winner)=0
  Sample 3: 7 stones, 2 empty cells, label(winner)=0
  Sample 4: 9 stones, 0 empty cells, label(winner)=0


DIAGNOSTIC: Checking final board states:
Sample 0: 9 stones on board, 0 empty cells, label(winner)=0
Sample 1: 7 stones on board, 2 empty cells, label(winner)=0
Sample 2: 7 stones on board, 2 empty cells, label(winner)=0
Sample 3: 7 stones on board, 2 empty cells, label(winner)=0
Sample 4: 9 stones on board, 0 empty cells, label(winner)=0
Sample 5: 7 stones on board, 2 empty cells, label(winner)=0
Sample 6: 9 stones on board, 0 empty cells, label(winner)=0
Sample 7: 6 stones on board, 3 empty cells, label(winner)=1
Sample 8: 6 stones on board, 3 empty cells, la




## Prepare Data for Graph Tsetlin Machine

Convert training samples into the GTM Graphs format.

In [458]:
graphs, Y = prepare_gtm_data(
    training_samples,
    board_dim=BOARD_DIM,
    hypervector_size=1024,
    hypervector_bits=2
)

print("\n" + "="*60)
print("DIAGNOSTIC: Checking graph properties")
print("="*60)
print(f"Max nodes: {graphs.max_number_of_graph_nodes}")
print(f"Number of edge types: {len(graphs.edge_type_id)}")

# Check first graph
print(f"\nFirst graph (should be Winner {Y[0]}):")
node_features = training_samples[0]['node_features']
player0_count = np.sum(node_features[:, 0])
player1_count = np.sum(node_features[:, 1])
empty_count = np.sum(node_features[:, 2])
print(f"  Player 0 stones: {player0_count}")
print(f"  Player 1 stones: {player1_count}")
print(f"  Empty cells: {empty_count}")
print("="*60 + "\n")


DIAGNOSTIC: Checking graph properties
Max nodes: 1
Number of edge types: 0

First graph (should be Winner 0):
  Player 0 stones: 5
  Player 1 stones: 4
  Empty cells: 0



## Train the Graph Tsetlin Machine

Train the model on the prepared graph data.

In [459]:
tm, predictions = train_model(graphs, Y, epochs=30)
save_model(tm=tm, filepath="TsetlinMachine/hex_tm_model.pkl", board_dim=BOARD_DIM, additional_info=None)

Initializing Graph Tsetlin Machine...
  Clauses: 60
  T: 600
  s: 1.0
  Depth: 1
  Message Size: 256
Initialization of sparse structure.

Class distribution before balancing (winner classes):
  Winner 0: 677 states
  Winner 1: 323 states
After balancing: 1323 total samples

Starting training for 30 epochs...
Epoch 1/30 - Acc: 68.70% (Winner 0 states: 100.0%, Winner 1 states: 3.1%) - 1.11s
Epoch 2/30 - Acc: 74.00% (Winner 0 states: 99.1%, Winner 1 states: 21.4%) - 0.37s
Epoch 3/30 - Acc: 74.80% (Winner 0 states: 97.9%, Winner 1 states: 26.3%) - 0.37s
Epoch 4/30 - Acc: 75.50% (Winner 0 states: 96.8%, Winner 1 states: 31.0%) - 0.36s
Epoch 5/30 - Acc: 82.90% (Winner 0 states: 98.8%, Winner 1 states: 49.5%) - 0.36s
Epoch 6/30 - Acc: 76.20% (Winner 0 states: 99.4%, Winner 1 states: 27.6%) - 0.35s
Epoch 7/30 - Acc: 82.20% (Winner 0 states: 96.5%, Winner 1 states: 52.3%) - 0.34s
Epoch 8/30 - Acc: 84.20% (Winner 0 states: 95.1%, Winner 1 states: 61.3%) - 0.34s
Epoch 9/30 - Acc: 81.50% (Winner 0

{'ta_state': array([4294967295,      65536,          0, ..., 3221225471, 3220176895,
                 0], dtype=uint32),
 'message_ta_state': [],
 'clause_weights': array([  81,   49, -195, -254,   86,  114,  157,  101,  118,  -60,   30,
         160,  101,   79,  152,   60,   88,  129,  127,   87,  -34, -214,
        -129, -184,  -93,  104, -212,  121,   61,  106,   48, -268,   72,
          90, -214, -250,   46, -273,  -69,  105,  266, -248, -234,  187,
          50,   19, -175,   81,  170,  194,  173,  115, -195,   93,  155,
        -276,  201, -231, -239,   52,  -48,  -35,  165,  232, -102,  -75,
        -123, -172, -104,  -87,  -80, -137,  -67,   39, -140,  -61,  -81,
        -100, -140,  -57,  -14,  212,   94,  184,   73, -106,  230, -146,
         -54,  -89,  -84,  215, -129,  -74,  221,  256,    1,  250,   82,
         -53, -306,  213,  241, -169,  -48,   20,  179,   -6, -146, -146,
        -157, -147,  188, -100, -167,  267, -227,  228,  256,  -31],
       dtype=int32),
 'hype

## Load a Trained Model (Optional)

Use this to load a previously trained model.

In [460]:
def load_model(filepath="TsetlinMachine/hex_tm_model.pkl"):
    """Load trained model with metadata"""
    print(f"Loading model from {filepath}...")

    # Load pickle file
    with open(filepath, 'rb') as f:
        state_dict = pickle.load(f)

    # Extract metadata
    metadata = {'board_dim': state_dict.get('board_dim', 11)}

    standard_keys = {
        'ta_state', 'message_ta_state', 'clause_weights', 'hypervectors',
        'number_of_outputs', 'number_of_literals', 'number_of_message_literals',
        'min_y', 'max_y', 'negative_clauses', 'max_number_of_graph_nodes',
        'number_of_clauses', 'T', 's', 'q', 'max_included_literals',
        'boost_true_positive_feedback', 'number_of_state_bits', 'depth',
        'message_size', 'message_bits', 'double_hashing',
        'board_dim'
    }

    for key, value in state_dict.items():
        if key not in standard_keys:
            metadata[key] = value

    # Create model with saved parameters
    tm = MultiClassGraphTsetlinMachine(
        number_of_clauses=state_dict['number_of_clauses'],
        T=state_dict['T'],
        s=state_dict['s'],
        q=state_dict['q'],
        max_included_literals=state_dict['max_included_literals'],
        boost_true_positive_feedback=state_dict['boost_true_positive_feedback'],
        number_of_state_bits=state_dict['number_of_state_bits'],
        depth=state_dict['depth'],
        message_size=state_dict['message_size'],
        message_bits=state_dict['message_bits'],
        double_hashing=state_dict['double_hashing']
    )

    # Load state using built-in load() method
    tm.load(fname=filepath)

    print(f"✓ Model loaded successfully")
    print(f"  Clauses: {state_dict['number_of_clauses']}, Depth: {state_dict['depth']}")

    return tm, metadata



In [461]:
from sklearn.metrics import confusion_matrix, classification_report, precision_score, recall_score, f1_score
from GraphTsetlinMachine.tm import MultiClassGraphTsetlinMachine
def evaluate_model(model_path="TsetlinMachine/hex_tm_model.pkl",
                   num_test_games=200, verbose=True):
    """Complete model evaluation"""

    # Load model
    tm, metadata = load_model(model_path)
    board_dim = metadata['board_dim']

    if verbose:
        print(f"\n{'='*70}")
        print("MODEL EVALUATION")
        print(f"{'='*70}")

    # Generate test data
    if verbose:
        print(f"\nGenerating {num_test_games} test games...")
    test_games = generate_game_data(num_test_games)

    test_samples = []
    for g in test_games:
        samples = create_training_data_from_game(g["moves"], g["winner"], board_dim)
        test_samples.extend(samples)

    Y_test = np.array([s["label"] for s in test_samples], dtype=np.int32)

    if verbose:
        print(f"✓ Created {len(test_samples)} test samples")
        print(f"  Winner 0: {np.sum(Y_test==0)}, Winner 1: {np.sum(Y_test==1)}")

    # Prepare graphs
    if verbose:
        print(f"Preparing graphs...")

    graphs_test, _ = prepare_gtm_data(
        test_samples, board_dim=board_dim,
        hypervector_size=1024, hypervector_bits=2
    )

    if verbose:
        print(f"✓ Graphs prepared\nMaking predictions...")

    # Predict
    predictions = tm.predict(graphs_test).astype(int)

    # Calculate metrics
    overall_acc = 100 * (predictions == Y_test).mean()

    # Print results
    print(f"\n{'='*70}")
    print(f"RESULTS - {len(test_samples)} samples from {num_test_games} games")
    print(f"{'='*70}")
    print(f"\nOverall Accuracy: {overall_acc:.2f}%")

    # Per-class accuracy
    for winner in [0, 1]:
        mask = Y_test == winner
        if mask.any():
            acc = 100 * (predictions[mask] == winner).mean()
            correct = (predictions[mask] == winner).sum()
            total = mask.sum()
            print(f"Winner {winner} Accuracy: {acc:.2f}% ({correct}/{total})")

    # Confusion matrix
    cm = confusion_matrix(Y_test, predictions)
    print(f"\nConfusion Matrix:")
    print(f"              Predicted")
    print(f"              0      1")
    print(f"Actual  0  [{cm[0,0]:4d}  {cm[0,1]:4d}]")
    print(f"        1  [{cm[1,0]:4d}  {cm[1,1]:4d}]")

    # Additional metrics
    p0 = precision_score(Y_test, predictions, pos_label=0, zero_division=0)
    r0 = recall_score(Y_test, predictions, pos_label=0, zero_division=0)
    f1_0 = f1_score(Y_test, predictions, pos_label=0, zero_division=0)

    p1 = precision_score(Y_test, predictions, pos_label=1, zero_division=0)
    r1 = recall_score(Y_test, predictions, pos_label=1, zero_division=0)
    f1_1 = f1_score(Y_test, predictions, pos_label=1, zero_division=0)

    print(f"\nDetailed Metrics:")
    print(f"  Winner 0: Precision={p0:.3f}, Recall={r0:.3f}, F1={f1_0:.3f}")
    print(f"  Winner 1: Precision={p1:.3f}, Recall={r1:.3f}, F1={f1_1:.3f}")

    # Sample predictions
    if verbose:
        print(f"\nSample Predictions:")
        for i in range(min(10, len(predictions))):
            status = "✓" if predictions[i] == Y_test[i] else "✗"
            print(f"  {status} Actual={Y_test[i]}, Predicted={predictions[i]}")

    print(f"{'='*70}\n")

    return {
        'accuracy': overall_acc,
        'predictions': predictions,
        'labels': Y_test,
        'confusion_matrix': cm,
        'metrics': {'p0': p0, 'r0': r0, 'f1_0': f1_0, 'p1': p1, 'r1': r1, 'f1_1': f1_1}
    }

In [462]:
tm = load_model("TsetlinMachine/hex_tm_model.pkl")
results = evaluate_model(num_test_games=1000)


Loading model from TsetlinMachine/hex_tm_model.pkl...
Initialization of sparse structure.
Loading model from TsetlinMachine/hex_tm_model.pkl.
✓ Model loaded successfully
  Clauses: 60, Depth: 1
Loading model from TsetlinMachine/hex_tm_model.pkl...
Initialization of sparse structure.
Loading model from TsetlinMachine/hex_tm_model.pkl.
✓ Model loaded successfully
  Clauses: 60, Depth: 1

MODEL EVALUATION

Generating 1000 test games...
Generating 1000 games...
Successfully parsed 1000 games from output
✓ Created 1000 test samples
  Winner 0: 677, Winner 1: 323
Preparing graphs...
✓ Graphs prepared
Making predictions...

RESULTS - 1000 samples from 1000 games

Overall Accuracy: 32.30%
Winner 0 Accuracy: 0.00% (0/677)
Winner 1 Accuracy: 100.00% (323/323)

Confusion Matrix:
              Predicted
              0      1
Actual  0  [   0   677]
        1  [   0   323]

Detailed Metrics:
  Winner 0: Precision=0.000, Recall=0.000, F1=0.000
  Winner 1: Precision=0.323, Recall=1.000, F1=0.488

Sa