In [1]:
import numpy as np
import torch
import random
import os
from tqdm import tqdm
import matplotlib.pyplot as plt

# Import your modules
from model.model import TicTacToeCNN
from src.dataloader import load_dataset
from src.eval import evaluate_agents
from src.train import train_model, train_model_with_early_stopping

In [2]:
def set_seed(seed):
    """Sets the random seed for reproducibility."""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed) # For multi-GPU setups
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


def load_mcts_data(data_dir="./monte_carlo_data"):
    """
    Loads pre-generated MCTS data from .npy files.
    Returns a dictionary mapping board state string to MCTS probabilities.
    """
    mcts_data = {}
    print(f"Loading MCTS data from {data_dir}...")
    for filename in tqdm(os.listdir(data_dir)):
        if filename.endswith(".npy"):
            file_path = os.path.join(data_dir, filename)
            data = np.load(file_path, allow_pickle=True)
            board_input = data[0]
            mcts_probs = data[1]

            # Reconstruct the board state string from the input array
            board_list = [' ' for _ in range(9)]
            for i in range(3):
                for j in range(3):
                    idx = i * 3 + j
                    if board_input[0, i, j] == 1:
                        board_list[idx] = 'O'
                    elif board_input[1, i, j] == 1:
                        board_list[idx] = 'X'
            state_key = ''.join(board_list)
            mcts_data[state_key] = mcts_probs
    print(f"Loaded {len(mcts_data)} MCTS states.")
    return mcts_data

In [3]:
# Hardcoded training parameters
epochs = 5 # Renamed to plural for clarity
optimizer_choice = "adam"
criterion_choice = "mse" # Or "kl_div"

# Define seeds for multiple runs
seeds = [42, 101, 202, 303, 404]

# Define default games for most evaluations and a reduced amount for MCTS
default_eval_games = 5000
mcts_eval_games = 5000 # Reduced games for MCTS comparison

all_evaluation_results = {}

# Load MCTS data once at the beginning
preloaded_mcts_data = load_mcts_data()

for seed in seeds:
    print(f"\n--- Running experiment with seed: {seed} ---")
    set_seed(seed) # Set seed for the current run

    # Define save directory for this seed
    seed_save_dir = os.path.join("results", f'seed_{seed}_{optimizer_choice}_{criterion_choice}_epoch_{epochs}')
    os.makedirs(seed_save_dir, exist_ok=True)

    models = {}
    # Train and save models
    for curriculum_type in ["easy_to_hard", "hard_to_easy", "random"]:
        print(f"Loading {curriculum_type} dataset...")
        # Use train_dataloader and test_dataloader for more detailed logging
        train_data_loader = load_dataset(curriculum_type)
        print(f"Training {curriculum_type} model...")
        model = TicTacToeCNN(kl_div=(criterion_choice == "kl_div"))
        train_model(model, train_data_loader, epochs=epochs, optimizer=optimizer_choice, criterion=criterion_choice, disable_wandb=True)

        model_name = f"model_{curriculum_type.replace('_to_', '_').replace('random', 'random_curriculum')}"
        torch.save(model.state_dict(), os.path.join(seed_save_dir, f"{model_name}.pth"))
        model.eval() # Set to eval mode for evaluation
        models[model_name] = model

        # Store results for this seed
        current_seed_results = {}

    print(f"\n--- Evaluating Models for Seed {seed} ---")

    # Comparisons between curriculum models
    print("\n--- Evaluating Curriculum Models Against Each Other ---")
    curriculum_model_names = list(models.keys())
    for i in range(len(curriculum_model_names)):
        for j in range(i + 1, len(curriculum_model_names)):
            name1 = curriculum_model_names[i]
            name2 = curriculum_model_names[j]
            results = evaluate_agents(models[name1], models[name2],
                                      games=default_eval_games,
                                      agent1_criterion=criterion_choice,
                                      agent2_criterion=criterion_choice)
            comparison_name = f"{name1.replace('model_', '')} vs {name2.replace('model_', '')}"
            current_seed_results[comparison_name] = results
            print(f"Results ({comparison_name}): {results}")

        # Comparisons with MCTS (using pre-computed data) and Random agents
        print("\n--- Evaluating Curriculum Models Against Pre-computed MCTS Agent ---")
    for model_name, model_obj in models.items():
        results = evaluate_agents(model_obj, 'mcts_data_agent', # Changed to 'mcts_data_agent'
                                  games=mcts_eval_games,
                                  agent1_criterion=criterion_choice,
                                  agent2_criterion=None, # MCTS doesn't have a criterion
                                  mcts_data=preloaded_mcts_data) # Pass the loaded data
        comparison_name = f"{model_name.replace('model_', '')} vs MCTS_data_agent"
        current_seed_results[comparison_name] = results
        print(f"Results ({comparison_name}): {results}")

        print("\n--- Evaluating Curriculum Models Against Pure Random Actions ---")
    for model_name, model_obj in models.items():
        results = evaluate_agents(model_obj, 'random_agent',
                                  games=default_eval_games,
                                  agent1_criterion=criterion_choice,
                                  agent2_criterion=None) # Random doesn't have a criterion
        comparison_name = f"{model_name.replace('model_', '')} vs Random_agent"
        current_seed_results[comparison_name] = results
        print(f"Results ({comparison_name}): {results}")


        all_evaluation_results[seed] = current_seed_results

    # Log results for the current seed to a file
    with open(os.path.join(seed_save_dir, "evaluation_results.txt"), "w") as f:
        f.write(f"--- Evaluation Results for Seed: {seed} ---\n\n")
        for comparison, res in current_seed_results.items():
            f.write(f"Results ({comparison}): {res}\n")
            f.write("\n")

print("\n--- All Experiments Complete ---")
print("\nSummary of all results (per seed):")
for seed, results_dict in all_evaluation_results.items():
    print(f"\nSeed {seed}:")
    for comparison, results in results_dict.items():
        print(f"  {comparison}: {results}")

Loading MCTS data from ./monte_carlo_data...


100%|████████████████████████████████████| 4520/4520 [00:00<00:00, 29639.78it/s]

Loaded 4520 MCTS states.

--- Running experiment with seed: 42 ---
Loading easy_to_hard dataset...
Training easy_to_hard model...



[34m[1mwandb[0m: [32m[41mERROR[0m Failed to detect the name of this notebook. You can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
 20%|█████████                                    | 1/5 [00:00<00:01,  2.43it/s]

Epoch 1, Average Loss: 0.0859


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.35it/s]

Epoch 2, Average Loss: 0.0425


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.32it/s]

Epoch 3, Average Loss: 0.0308


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.32it/s]

Epoch 4, Average Loss: 0.0274


100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.32it/s]


Epoch 5, Average Loss: 0.0277
Loading hard_to_easy dataset...
Training hard_to_easy model...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.28it/s]

Epoch 1, Average Loss: 0.1132


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.30it/s]

Epoch 2, Average Loss: 0.0560


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.29it/s]

Epoch 3, Average Loss: 0.0338


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.28it/s]

Epoch 4, Average Loss: 0.0295


100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.28it/s]


Epoch 5, Average Loss: 0.0283
Loading random dataset...
Training random model...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.31it/s]

Epoch 1, Average Loss: 0.0852


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.30it/s]

Epoch 2, Average Loss: 0.0326


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.30it/s]

Epoch 3, Average Loss: 0.0241


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.30it/s]

Epoch 4, Average Loss: 0.0181


100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.29it/s]

Epoch 5, Average Loss: 0.0143

--- Evaluating Models for Seed 42 ---

--- Evaluating Curriculum Models Against Each Other ---



  raw_output = agent(torch.tensor([board], dtype=torch.float32)).squeeze().view(9)


Results (easy_hard vs hard_easy): {'agent1_wins': 2883, 'agent2_wins': 1327, 'draw': 790}
Results (easy_hard vs random_curriculum): {'agent1_wins': 2972, 'agent2_wins': 1281, 'draw': 747}

--- Evaluating Curriculum Models Against Pre-computed MCTS Agent ---
Results (hard_easy vs random_curriculum): {'agent1_wins': 2194, 'agent2_wins': 2177, 'draw': 629}

--- Evaluating Curriculum Models Against Pre-computed MCTS Agent ---

--- Evaluating Curriculum Models Against Pre-computed MCTS Agent ---
Results (easy_hard vs MCTS_data_agent): {'agent1_wins': 2501, 'agent2_wins': 1582, 'draw': 917}

--- Evaluating Curriculum Models Against Pure Random Actions ---
Results (hard_easy vs MCTS_data_agent): {'agent1_wins': 1608, 'agent2_wins': 2561, 'draw': 831}

--- Evaluating Curriculum Models Against Pure Random Actions ---
Results (random_curriculum vs MCTS_data_agent): {'agent1_wins': 1571, 'agent2_wins': 2648, 'draw': 781}

--- Evaluating Curriculum Models Against Pure Random Actions ---
Results (e

 20%|█████████                                    | 1/5 [00:00<00:01,  2.06it/s]

Epoch 1, Average Loss: 0.0867


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.16it/s]

Epoch 2, Average Loss: 0.0421


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.20it/s]

Epoch 3, Average Loss: 0.0334


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.19it/s]

Epoch 4, Average Loss: 0.0306


100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.19it/s]


Epoch 5, Average Loss: 0.0282
Loading hard_to_easy dataset...
Training hard_to_easy model...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.20it/s]

Epoch 1, Average Loss: 0.1129


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.16it/s]

Epoch 2, Average Loss: 0.0554


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.16it/s]

Epoch 3, Average Loss: 0.0364


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.17it/s]

Epoch 4, Average Loss: 0.0326


100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.18it/s]


Epoch 5, Average Loss: 0.0307
Loading random dataset...
Training random model...


 20%|█████████                                    | 1/5 [00:00<00:02,  1.66it/s]

Epoch 1, Average Loss: 0.0825


 40%|██████████████████                           | 2/5 [00:01<00:01,  1.96it/s]

Epoch 2, Average Loss: 0.0296


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.10it/s]

Epoch 3, Average Loss: 0.0200


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.16it/s]

Epoch 4, Average Loss: 0.0149


100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.11it/s]

Epoch 5, Average Loss: 0.0124

--- Evaluating Models for Seed 101 ---

--- Evaluating Curriculum Models Against Each Other ---





Results (easy_hard vs hard_easy): {'agent1_wins': 2857, 'agent2_wins': 1364, 'draw': 779}
Results (easy_hard vs random_curriculum): {'agent1_wins': 2741, 'agent2_wins': 1508, 'draw': 751}

--- Evaluating Curriculum Models Against Pre-computed MCTS Agent ---
Results (hard_easy vs random_curriculum): {'agent1_wins': 2056, 'agent2_wins': 2251, 'draw': 693}

--- Evaluating Curriculum Models Against Pre-computed MCTS Agent ---

--- Evaluating Curriculum Models Against Pre-computed MCTS Agent ---
Results (easy_hard vs MCTS_data_agent): {'agent1_wins': 2464, 'agent2_wins': 1662, 'draw': 874}

--- Evaluating Curriculum Models Against Pure Random Actions ---
Results (hard_easy vs MCTS_data_agent): {'agent1_wins': 1608, 'agent2_wins': 2663, 'draw': 729}

--- Evaluating Curriculum Models Against Pure Random Actions ---
Results (random_curriculum vs MCTS_data_agent): {'agent1_wins': 1721, 'agent2_wins': 2532, 'draw': 747}

--- Evaluating Curriculum Models Against Pure Random Actions ---
Results (e

 20%|█████████                                    | 1/5 [00:00<00:01,  2.15it/s]

Epoch 1, Average Loss: 0.0846


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.17it/s]

Epoch 2, Average Loss: 0.0410


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.17it/s]

Epoch 3, Average Loss: 0.0324


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.18it/s]

Epoch 4, Average Loss: 0.0287


100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.17it/s]


Epoch 5, Average Loss: 0.0285
Loading hard_to_easy dataset...
Training hard_to_easy model...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.18it/s]

Epoch 1, Average Loss: 0.1119


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.17it/s]

Epoch 2, Average Loss: 0.0548


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.18it/s]

Epoch 3, Average Loss: 0.0334


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.18it/s]

Epoch 4, Average Loss: 0.0308


100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.18it/s]


Epoch 5, Average Loss: 0.0276
Loading random dataset...
Training random model...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.19it/s]

Epoch 1, Average Loss: 0.0819


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.17it/s]

Epoch 2, Average Loss: 0.0323


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.18it/s]

Epoch 3, Average Loss: 0.0237


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.17it/s]

Epoch 4, Average Loss: 0.0178


100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.18it/s]

Epoch 5, Average Loss: 0.0148

--- Evaluating Models for Seed 202 ---

--- Evaluating Curriculum Models Against Each Other ---





Results (easy_hard vs hard_easy): {'agent1_wins': 2937, 'agent2_wins': 1220, 'draw': 843}
Results (easy_hard vs random_curriculum): {'agent1_wins': 2991, 'agent2_wins': 1204, 'draw': 805}

--- Evaluating Curriculum Models Against Pre-computed MCTS Agent ---
Results (hard_easy vs random_curriculum): {'agent1_wins': 2228, 'agent2_wins': 2162, 'draw': 610}

--- Evaluating Curriculum Models Against Pre-computed MCTS Agent ---

--- Evaluating Curriculum Models Against Pre-computed MCTS Agent ---
Results (easy_hard vs MCTS_data_agent): {'agent1_wins': 2535, 'agent2_wins': 1516, 'draw': 949}

--- Evaluating Curriculum Models Against Pure Random Actions ---
Results (hard_easy vs MCTS_data_agent): {'agent1_wins': 1669, 'agent2_wins': 2567, 'draw': 764}

--- Evaluating Curriculum Models Against Pure Random Actions ---
Results (random_curriculum vs MCTS_data_agent): {'agent1_wins': 1557, 'agent2_wins': 2656, 'draw': 787}

--- Evaluating Curriculum Models Against Pure Random Actions ---
Results (e

 20%|█████████                                    | 1/5 [00:00<00:01,  2.05it/s]

Epoch 1, Average Loss: 0.0823


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.09it/s]

Epoch 2, Average Loss: 0.0417


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.09it/s]

Epoch 3, Average Loss: 0.0321


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.10it/s]

Epoch 4, Average Loss: 0.0296


100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.09it/s]


Epoch 5, Average Loss: 0.0282
Loading hard_to_easy dataset...
Training hard_to_easy model...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.10it/s]

Epoch 1, Average Loss: 0.1157


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.10it/s]

Epoch 2, Average Loss: 0.0481


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.11it/s]

Epoch 3, Average Loss: 0.0330


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.10it/s]

Epoch 4, Average Loss: 0.0304


100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.09it/s]


Epoch 5, Average Loss: 0.0287
Loading random dataset...
Training random model...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.05it/s]

Epoch 1, Average Loss: 0.0816


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.07it/s]

Epoch 2, Average Loss: 0.0286


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.06it/s]

Epoch 3, Average Loss: 0.0200


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.07it/s]

Epoch 4, Average Loss: 0.0162


100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.07it/s]

Epoch 5, Average Loss: 0.0140

--- Evaluating Models for Seed 303 ---

--- Evaluating Curriculum Models Against Each Other ---





Results (easy_hard vs hard_easy): {'agent1_wins': 2826, 'agent2_wins': 1373, 'draw': 801}
Results (easy_hard vs random_curriculum): {'agent1_wins': 2878, 'agent2_wins': 1392, 'draw': 730}

--- Evaluating Curriculum Models Against Pre-computed MCTS Agent ---
Results (hard_easy vs random_curriculum): {'agent1_wins': 2184, 'agent2_wins': 2149, 'draw': 667}

--- Evaluating Curriculum Models Against Pre-computed MCTS Agent ---

--- Evaluating Curriculum Models Against Pre-computed MCTS Agent ---
Results (easy_hard vs MCTS_data_agent): {'agent1_wins': 2458, 'agent2_wins': 1621, 'draw': 921}

--- Evaluating Curriculum Models Against Pure Random Actions ---
Results (hard_easy vs MCTS_data_agent): {'agent1_wins': 1620, 'agent2_wins': 2591, 'draw': 789}

--- Evaluating Curriculum Models Against Pure Random Actions ---
Results (random_curriculum vs MCTS_data_agent): {'agent1_wins': 1634, 'agent2_wins': 2602, 'draw': 764}

--- Evaluating Curriculum Models Against Pure Random Actions ---
Results (e

 20%|█████████                                    | 1/5 [00:00<00:01,  2.06it/s]

Epoch 1, Average Loss: 0.0869


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.07it/s]

Epoch 2, Average Loss: 0.0447


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.07it/s]

Epoch 3, Average Loss: 0.0338


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.07it/s]

Epoch 4, Average Loss: 0.0304


100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.07it/s]


Epoch 5, Average Loss: 0.0286
Loading hard_to_easy dataset...
Training hard_to_easy model...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.07it/s]

Epoch 1, Average Loss: 0.1150


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.07it/s]

Epoch 2, Average Loss: 0.0525


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.07it/s]

Epoch 3, Average Loss: 0.0355


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.07it/s]

Epoch 4, Average Loss: 0.0333


100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.07it/s]


Epoch 5, Average Loss: 0.0303
Loading random dataset...
Training random model...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.09it/s]

Epoch 1, Average Loss: 0.0805


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.06it/s]

Epoch 2, Average Loss: 0.0314


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.07it/s]

Epoch 3, Average Loss: 0.0216


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.03it/s]

Epoch 4, Average Loss: 0.0158


100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.05it/s]

Epoch 5, Average Loss: 0.0130

--- Evaluating Models for Seed 404 ---

--- Evaluating Curriculum Models Against Each Other ---





Results (easy_hard vs hard_easy): {'agent1_wins': 2828, 'agent2_wins': 1433, 'draw': 739}
Results (easy_hard vs random_curriculum): {'agent1_wins': 2833, 'agent2_wins': 1415, 'draw': 752}

--- Evaluating Curriculum Models Against Pre-computed MCTS Agent ---
Results (hard_easy vs random_curriculum): {'agent1_wins': 2146, 'agent2_wins': 2254, 'draw': 600}

--- Evaluating Curriculum Models Against Pre-computed MCTS Agent ---

--- Evaluating Curriculum Models Against Pre-computed MCTS Agent ---
Results (easy_hard vs MCTS_data_agent): {'agent1_wins': 2340, 'agent2_wins': 1788, 'draw': 872}

--- Evaluating Curriculum Models Against Pure Random Actions ---
Results (hard_easy vs MCTS_data_agent): {'agent1_wins': 1559, 'agent2_wins': 2615, 'draw': 826}

--- Evaluating Curriculum Models Against Pure Random Actions ---
Results (random_curriculum vs MCTS_data_agent): {'agent1_wins': 1623, 'agent2_wins': 2595, 'draw': 782}

--- Evaluating Curriculum Models Against Pure Random Actions ---
Results (e

In [4]:
# --- New Experiment: Training on different data portions ---
print("\n--- Running Data Portion Experiment ---")
data_percentages = [0.1, 0.25, 0.5, 0.75, 1.0]
data_portion_results = {curriculum_type: {p: [] for p in data_percentages} for curriculum_type in ["easy_to_hard", "hard_to_easy", "random"]}

for seed in seeds:
    print(f"\n--- Data Portion Experiment with seed: {seed} ---")
    set_seed(seed)

    portion_save_dir = os.path.join("results", f'data_portion_seed_{seed}_{optimizer_choice}_{criterion_choice}_epoch_{epochs}')
    os.makedirs(portion_save_dir, exist_ok=True)

    for curriculum_type in ["easy_to_hard", "hard_to_easy", "random"]:
        # Load fixed test set once per curriculum type
        test_data_loader = load_dataset(curriculum_type, split='test', data_percentage=1.0)

        for percentage in data_percentages:
            print(f"Training {curriculum_type} model with {percentage*100}% of training data...")
            
            # Load training set with specified percentage
            train_data_loader = load_dataset(curriculum_type, split='train', data_percentage=percentage)

            model = TicTacToeCNN(kl_div=(criterion_choice == "kl_div"))
            
            # Use train_model_with_early_stopping for the data portion experiment
            train_model_with_early_stopping(
                model, train_data_loader, test_data_loader,
                epochs=epochs, optimizer=optimizer_choice,
                criterion=criterion_choice, patience=10,
                min_delta=0.0001, disable_wandb=True
            )
            model.eval()

            # Evaluate against MCTS data agent
            print(f"Evaluating {curriculum_type} model ({percentage*100}% data) against MCTS_data_agent...")
            results = evaluate_agents(
                model, 'mcts_data_agent',
                games=mcts_eval_games,
                agent1_criterion=criterion_choice,
                agent2_criterion=None,
                mcts_data=preloaded_mcts_data
            )

            a1_win = results["agent1_wins"]
            a2_win = results["agent2_wins"]
            print(a1_win, a2_win)
            win_rate = a1_win / (a1_win + a2_win)
            data_portion_results[curriculum_type][percentage].append(win_rate)
            print(f"Win rate for {curriculum_type} with {percentage*100}% training data: {win_rate:.4f}")

            # Save the model
            model_portion_name = f"model_{curriculum_type.replace('_to_', '_').replace('random', 'random_curriculum')}_portion_{int(percentage*100)}"
            torch.save(model.state_dict(), os.path.join(portion_save_dir, f"{model_portion_name}.pth"))


--- Running Data Portion Experiment ---

--- Data Portion Experiment with seed: 42 ---
Training easy_to_hard model with 10.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:00,  8.59it/s]

Epoch 1, Average Train Loss: 0.1508
Epoch 1, Average Test Loss: 0.1078


 40%|██████████████████                           | 2/5 [00:00<00:00,  8.68it/s]

Epoch 2, Average Train Loss: 0.1052
Epoch 2, Average Test Loss: 0.1075


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  8.77it/s]

Epoch 3, Average Train Loss: 0.1043
Epoch 3, Average Test Loss: 0.1028


 80%|████████████████████████████████████         | 4/5 [00:00<00:00,  8.78it/s]

Epoch 4, Average Train Loss: 0.0971
Epoch 4, Average Test Loss: 0.0999


100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  8.75it/s]

Epoch 5, Average Train Loss: 0.0942
Epoch 5, Average Test Loss: 0.0936
Evaluating easy_to_hard model (10.0% data) against MCTS_data_agent...





1314 2945
Win rate for easy_to_hard with 10.0% training data: 0.3085
Training easy_to_hard model with 25.0% of training data...


  0%|                                                     | 0/5 [00:00<?, ?it/s]

Epoch 1, Average Train Loss: 0.1188


 20%|█████████                                    | 1/5 [00:00<00:00,  5.50it/s]

Epoch 1, Average Test Loss: 0.1054
Epoch 2, Average Train Loss: 0.1037


 40%|██████████████████                           | 2/5 [00:00<00:00,  5.54it/s]

Epoch 2, Average Test Loss: 0.0983


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  5.63it/s]

Epoch 3, Average Train Loss: 0.0906
Epoch 3, Average Test Loss: 0.0764
Epoch 4, Average Train Loss: 0.0683


 80%|████████████████████████████████████         | 4/5 [00:00<00:00,  5.69it/s]

Epoch 4, Average Test Loss: 0.0554
Epoch 5, Average Train Loss: 0.0516


100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  5.66it/s]

Epoch 5, Average Test Loss: 0.0454
Evaluating easy_to_hard model (25.0% data) against MCTS_data_agent...





1323 2909
Win rate for easy_to_hard with 25.0% training data: 0.3126
Training easy_to_hard model with 50.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  3.46it/s]

Epoch 1, Average Train Loss: 0.1030
Epoch 1, Average Test Loss: 0.0977


 40%|██████████████████                           | 2/5 [00:00<00:00,  3.53it/s]

Epoch 2, Average Train Loss: 0.0686
Epoch 2, Average Test Loss: 0.0687


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  3.60it/s]

Epoch 3, Average Train Loss: 0.0528
Epoch 3, Average Test Loss: 0.0680


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  3.61it/s]

Epoch 4, Average Train Loss: 0.0488
Epoch 4, Average Test Loss: 0.0586


100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  3.59it/s]

Epoch 5, Average Train Loss: 0.0450
Epoch 5, Average Test Loss: 0.0612
Evaluating easy_to_hard model (50.0% data) against MCTS_data_agent...





1605 2631
Win rate for easy_to_hard with 50.0% training data: 0.3789
Training easy_to_hard model with 75.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.54it/s]

Epoch 1, Average Train Loss: 0.0918
Epoch 1, Average Test Loss: 0.0584


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.62it/s]

Epoch 2, Average Train Loss: 0.0418
Epoch 2, Average Test Loss: 0.0404


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.65it/s]

Epoch 3, Average Train Loss: 0.0343
Epoch 3, Average Test Loss: 0.0398


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.65it/s]

Epoch 4, Average Train Loss: 0.0341
Epoch 4, Average Test Loss: 0.0489


100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  2.63it/s]

Epoch 5, Average Train Loss: 0.0354
Epoch 5, Average Test Loss: 0.0707
Evaluating easy_to_hard model (75.0% data) against MCTS_data_agent...





2282 1887
Win rate for easy_to_hard with 75.0% training data: 0.5474
Training easy_to_hard model with 100.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.02it/s]

Epoch 1, Average Train Loss: 0.0926
Epoch 1, Average Test Loss: 0.0673


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.10it/s]

Epoch 2, Average Train Loss: 0.0467
Epoch 2, Average Test Loss: 0.0437


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.07it/s]

Epoch 3, Average Train Loss: 0.0380
Epoch 3, Average Test Loss: 0.0502


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.10it/s]

Epoch 4, Average Train Loss: 0.0367
Epoch 4, Average Test Loss: 0.0545


100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.09it/s]

Epoch 5, Average Train Loss: 0.0362
Epoch 5, Average Test Loss: 0.0667
Evaluating easy_to_hard model (100.0% data) against MCTS_data_agent...





2125 2059
Win rate for easy_to_hard with 100.0% training data: 0.5079
Training hard_to_easy model with 10.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:00,  7.98it/s]

Epoch 1, Average Train Loss: 0.1367
Epoch 1, Average Test Loss: 0.1138


 40%|██████████████████                           | 2/5 [00:00<00:00,  8.22it/s]

Epoch 2, Average Train Loss: 0.1135
Epoch 2, Average Test Loss: 0.1058


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  8.30it/s]

Epoch 3, Average Train Loss: 0.1047
Epoch 3, Average Test Loss: 0.1021


 80%|████████████████████████████████████         | 4/5 [00:00<00:00,  8.36it/s]

Epoch 4, Average Train Loss: 0.0993
Epoch 4, Average Test Loss: 0.0981


100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  8.37it/s]

Epoch 5, Average Train Loss: 0.0938
Epoch 5, Average Test Loss: 0.0917
Evaluating hard_to_easy model (10.0% data) against MCTS_data_agent...





1239 2961
Win rate for hard_to_easy with 10.0% training data: 0.2950
Training hard_to_easy model with 25.0% of training data...


  0%|                                                     | 0/5 [00:00<?, ?it/s]

Epoch 1, Average Train Loss: 0.1155


 20%|█████████                                    | 1/5 [00:00<00:00,  5.28it/s]

Epoch 1, Average Test Loss: 0.1033
Epoch 2, Average Train Loss: 0.1012


 40%|██████████████████                           | 2/5 [00:00<00:00,  5.38it/s]

Epoch 2, Average Test Loss: 0.0938


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  5.54it/s]

Epoch 3, Average Train Loss: 0.0852
Epoch 3, Average Test Loss: 0.0716
Epoch 4, Average Train Loss: 0.0613


 80%|████████████████████████████████████         | 4/5 [00:00<00:00,  5.59it/s]

Epoch 4, Average Test Loss: 0.0526
Epoch 5, Average Train Loss: 0.0472


100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  5.56it/s]

Epoch 5, Average Test Loss: 0.0452
Evaluating hard_to_easy model (25.0% data) against MCTS_data_agent...





1389 2889
Win rate for hard_to_easy with 25.0% training data: 0.3247
Training hard_to_easy model with 50.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  3.49it/s]

Epoch 1, Average Train Loss: 0.1006
Epoch 1, Average Test Loss: 0.0857


 40%|██████████████████                           | 2/5 [00:00<00:00,  3.55it/s]

Epoch 2, Average Train Loss: 0.0638
Epoch 2, Average Test Loss: 0.0578


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  3.62it/s]

Epoch 3, Average Train Loss: 0.0492
Epoch 3, Average Test Loss: 0.0589


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  3.62it/s]

Epoch 4, Average Train Loss: 0.0472
Epoch 4, Average Test Loss: 0.0474


100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  3.61it/s]

Epoch 5, Average Train Loss: 0.0413
Epoch 5, Average Test Loss: 0.0473
Evaluating hard_to_easy model (50.0% data) against MCTS_data_agent...





1586 2625
Win rate for hard_to_easy with 50.0% training data: 0.3766
Training hard_to_easy model with 75.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.53it/s]

Epoch 1, Average Train Loss: 0.0893
Epoch 1, Average Test Loss: 0.0545


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.65it/s]

Epoch 2, Average Train Loss: 0.0434
Epoch 2, Average Test Loss: 0.0391


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.68it/s]

Epoch 3, Average Train Loss: 0.0371
Epoch 3, Average Test Loss: 0.0396


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.68it/s]

Epoch 4, Average Train Loss: 0.0367
Epoch 4, Average Test Loss: 0.0505


100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  2.68it/s]

Epoch 5, Average Train Loss: 0.0362
Epoch 5, Average Test Loss: 0.0569
Evaluating hard_to_easy model (75.0% data) against MCTS_data_agent...





1857 2264
Win rate for hard_to_easy with 75.0% training data: 0.4506
Training hard_to_easy model with 100.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.03it/s]

Epoch 1, Average Train Loss: 0.1195
Epoch 1, Average Test Loss: 0.0854


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.10it/s]

Epoch 2, Average Train Loss: 0.0638
Epoch 2, Average Test Loss: 0.0520


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.12it/s]

Epoch 3, Average Train Loss: 0.0405
Epoch 3, Average Test Loss: 0.0562


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.13it/s]

Epoch 4, Average Train Loss: 0.0349
Epoch 4, Average Test Loss: 0.0592


100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.12it/s]

Epoch 5, Average Train Loss: 0.0379
Epoch 5, Average Test Loss: 0.0565
Evaluating hard_to_easy model (100.0% data) against MCTS_data_agent...





1524 2687
Win rate for hard_to_easy with 100.0% training data: 0.3619
Training random model with 10.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:00,  8.54it/s]

Epoch 1, Average Train Loss: 0.1428
Epoch 1, Average Test Loss: 0.1084


 40%|██████████████████                           | 2/5 [00:00<00:00,  8.52it/s]

Epoch 2, Average Train Loss: 0.1141
Epoch 2, Average Test Loss: 0.1064


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  8.48it/s]

Epoch 3, Average Train Loss: 0.1073
Epoch 3, Average Test Loss: 0.1018


 80%|████████████████████████████████████         | 4/5 [00:00<00:00,  8.56it/s]

Epoch 4, Average Train Loss: 0.0998
Epoch 4, Average Test Loss: 0.0969


100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  8.56it/s]

Epoch 5, Average Train Loss: 0.0943
Epoch 5, Average Test Loss: 0.0891
Evaluating random model (10.0% data) against MCTS_data_agent...





1370 2847
Win rate for random with 10.0% training data: 0.3249
Training random model with 25.0% of training data...


  0%|                                                     | 0/5 [00:00<?, ?it/s]

Epoch 1, Average Train Loss: 0.1217


 20%|█████████                                    | 1/5 [00:00<00:00,  5.38it/s]

Epoch 1, Average Test Loss: 0.1040
Epoch 2, Average Train Loss: 0.1053


 40%|██████████████████                           | 2/5 [00:00<00:00,  5.46it/s]

Epoch 2, Average Test Loss: 0.0968


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  5.58it/s]

Epoch 3, Average Train Loss: 0.0909
Epoch 3, Average Test Loss: 0.0745
Epoch 4, Average Train Loss: 0.0659


 80%|████████████████████████████████████         | 4/5 [00:00<00:00,  5.61it/s]

Epoch 4, Average Test Loss: 0.0544
Epoch 5, Average Train Loss: 0.0495


100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  5.61it/s]

Epoch 5, Average Test Loss: 0.0440
Evaluating random model (25.0% data) against MCTS_data_agent...





1341 2877
Win rate for random with 25.0% training data: 0.3179
Training random model with 50.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  3.38it/s]

Epoch 1, Average Train Loss: 0.1045
Epoch 1, Average Test Loss: 0.0982


 40%|██████████████████                           | 2/5 [00:00<00:00,  3.50it/s]

Epoch 2, Average Train Loss: 0.0743
Epoch 2, Average Test Loss: 0.0678


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  3.57it/s]

Epoch 3, Average Train Loss: 0.0531
Epoch 3, Average Test Loss: 0.0610


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  3.59it/s]

Epoch 4, Average Train Loss: 0.0480
Epoch 4, Average Test Loss: 0.0479


100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  3.57it/s]

Epoch 5, Average Train Loss: 0.0442
Epoch 5, Average Test Loss: 0.0473
Evaluating random model (50.0% data) against MCTS_data_agent...





1665 2582
Win rate for random with 50.0% training data: 0.3920
Training random model with 75.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.38it/s]

Epoch 1, Average Train Loss: 0.0923
Epoch 1, Average Test Loss: 0.0568


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.52it/s]

Epoch 2, Average Train Loss: 0.0426
Epoch 2, Average Test Loss: 0.0414


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.62it/s]

Epoch 3, Average Train Loss: 0.0368
Epoch 3, Average Test Loss: 0.0399


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.65it/s]

Epoch 4, Average Train Loss: 0.0369
Epoch 4, Average Test Loss: 0.0520


100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  2.62it/s]

Epoch 5, Average Train Loss: 0.0371
Epoch 5, Average Test Loss: 0.0655
Evaluating random model (75.0% data) against MCTS_data_agent...





2109 2058
Win rate for random with 75.0% training data: 0.5061
Training random model with 100.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.01it/s]

Epoch 1, Average Train Loss: 0.0879
Epoch 1, Average Test Loss: 0.0434


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.10it/s]

Epoch 2, Average Train Loss: 0.0332
Epoch 2, Average Test Loss: 0.0305


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.11it/s]

Epoch 3, Average Train Loss: 0.0253
Epoch 3, Average Test Loss: 0.0219


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.13it/s]

Epoch 4, Average Train Loss: 0.0195
Epoch 4, Average Test Loss: 0.0167


100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.12it/s]

Epoch 5, Average Train Loss: 0.0161
Epoch 5, Average Test Loss: 0.0146
Evaluating random model (100.0% data) against MCTS_data_agent...





1535 2633
Win rate for random with 100.0% training data: 0.3683

--- Data Portion Experiment with seed: 101 ---
Training easy_to_hard model with 10.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:00,  8.12it/s]

Epoch 1, Average Train Loss: 0.1418
Epoch 1, Average Test Loss: 0.1087


 40%|██████████████████                           | 2/5 [00:00<00:00,  8.29it/s]

Epoch 2, Average Train Loss: 0.1055
Epoch 2, Average Test Loss: 0.1083


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  8.37it/s]

Epoch 3, Average Train Loss: 0.1031
Epoch 3, Average Test Loss: 0.1045


 80%|████████████████████████████████████         | 4/5 [00:00<00:00,  8.50it/s]

Epoch 4, Average Train Loss: 0.0960
Epoch 4, Average Test Loss: 0.1021


100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  8.43it/s]

Epoch 5, Average Train Loss: 0.0954
Epoch 5, Average Test Loss: 0.0977
Evaluating easy_to_hard model (10.0% data) against MCTS_data_agent...





1290 2976
Win rate for easy_to_hard with 10.0% training data: 0.3024
Training easy_to_hard model with 25.0% of training data...


  0%|                                                     | 0/5 [00:00<?, ?it/s]

Epoch 1, Average Train Loss: 0.1191


 20%|█████████                                    | 1/5 [00:00<00:00,  5.34it/s]

Epoch 1, Average Test Loss: 0.1043
Epoch 2, Average Train Loss: 0.1039


 40%|██████████████████                           | 2/5 [00:00<00:00,  5.45it/s]

Epoch 2, Average Test Loss: 0.0992


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  5.59it/s]

Epoch 3, Average Train Loss: 0.0919
Epoch 3, Average Test Loss: 0.0776
Epoch 4, Average Train Loss: 0.0674


 80%|████████████████████████████████████         | 4/5 [00:00<00:00,  5.63it/s]

Epoch 4, Average Test Loss: 0.0552
Epoch 5, Average Train Loss: 0.0503


100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  5.63it/s]

Epoch 5, Average Test Loss: 0.0420
Evaluating easy_to_hard model (25.0% data) against MCTS_data_agent...





1376 2895
Win rate for easy_to_hard with 25.0% training data: 0.3222
Training easy_to_hard model with 50.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  3.46it/s]

Epoch 1, Average Train Loss: 0.1013
Epoch 1, Average Test Loss: 0.0981


 40%|██████████████████                           | 2/5 [00:00<00:00,  3.53it/s]

Epoch 2, Average Train Loss: 0.0665
Epoch 2, Average Test Loss: 0.0570


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  3.60it/s]

Epoch 3, Average Train Loss: 0.0481
Epoch 3, Average Test Loss: 0.0630


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  3.64it/s]

Epoch 4, Average Train Loss: 0.0466
Epoch 4, Average Test Loss: 0.0576


100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  3.62it/s]

Epoch 5, Average Train Loss: 0.0437
Epoch 5, Average Test Loss: 0.0529
Evaluating easy_to_hard model (50.0% data) against MCTS_data_agent...





1774 2393
Win rate for easy_to_hard with 50.0% training data: 0.4257
Training easy_to_hard model with 75.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.31it/s]

Epoch 1, Average Train Loss: 0.0918
Epoch 1, Average Test Loss: 0.0637


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.43it/s]

Epoch 2, Average Train Loss: 0.0424
Epoch 2, Average Test Loss: 0.0431


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.54it/s]

Epoch 3, Average Train Loss: 0.0356
Epoch 3, Average Test Loss: 0.0467


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.60it/s]

Epoch 4, Average Train Loss: 0.0372
Epoch 4, Average Test Loss: 0.0665


100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  2.57it/s]

Epoch 5, Average Train Loss: 0.0389
Epoch 5, Average Test Loss: 0.0664
Evaluating easy_to_hard model (75.0% data) against MCTS_data_agent...





2199 1937
Win rate for easy_to_hard with 75.0% training data: 0.5317
Training easy_to_hard model with 100.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.01it/s]

Epoch 1, Average Train Loss: 0.0931
Epoch 1, Average Test Loss: 0.0642


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.09it/s]

Epoch 2, Average Train Loss: 0.0477
Epoch 2, Average Test Loss: 0.0483


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.12it/s]

Epoch 3, Average Train Loss: 0.0376
Epoch 3, Average Test Loss: 0.0538


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.12it/s]

Epoch 4, Average Train Loss: 0.0359
Epoch 4, Average Test Loss: 0.0656


100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.11it/s]

Epoch 5, Average Train Loss: 0.0328
Epoch 5, Average Test Loss: 0.0684
Evaluating easy_to_hard model (100.0% data) against MCTS_data_agent...





2208 1969
Win rate for easy_to_hard with 100.0% training data: 0.5286
Training hard_to_easy model with 10.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:00,  8.64it/s]

Epoch 1, Average Train Loss: 0.1426
Epoch 1, Average Test Loss: 0.1101


 40%|██████████████████                           | 2/5 [00:00<00:00,  8.49it/s]

Epoch 2, Average Train Loss: 0.1167
Epoch 2, Average Test Loss: 0.1062


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  8.50it/s]

Epoch 3, Average Train Loss: 0.1097
Epoch 3, Average Test Loss: 0.1029


 80%|████████████████████████████████████         | 4/5 [00:00<00:00,  8.59it/s]

Epoch 4, Average Train Loss: 0.1050
Epoch 4, Average Test Loss: 0.0997


100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  8.62it/s]

Epoch 5, Average Train Loss: 0.1008
Epoch 5, Average Test Loss: 0.0945
Evaluating hard_to_easy model (10.0% data) against MCTS_data_agent...





1262 3007
Win rate for hard_to_easy with 10.0% training data: 0.2956
Training hard_to_easy model with 25.0% of training data...


  0%|                                                     | 0/5 [00:00<?, ?it/s]

Epoch 1, Average Train Loss: 0.1187


 20%|█████████                                    | 1/5 [00:00<00:00,  5.46it/s]

Epoch 1, Average Test Loss: 0.1034
Epoch 2, Average Train Loss: 0.1024


 40%|██████████████████                           | 2/5 [00:00<00:00,  5.48it/s]

Epoch 2, Average Test Loss: 0.0950


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  5.63it/s]

Epoch 3, Average Train Loss: 0.0869
Epoch 3, Average Test Loss: 0.0706
Epoch 4, Average Train Loss: 0.0623


 80%|████████████████████████████████████         | 4/5 [00:00<00:00,  5.69it/s]

Epoch 4, Average Test Loss: 0.0503
Epoch 5, Average Train Loss: 0.0467


100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  5.68it/s]

Epoch 5, Average Test Loss: 0.0392
Evaluating hard_to_easy model (25.0% data) against MCTS_data_agent...





1412 2826
Win rate for hard_to_easy with 25.0% training data: 0.3332
Training hard_to_easy model with 50.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  3.34it/s]

Epoch 1, Average Train Loss: 0.1006
Epoch 1, Average Test Loss: 0.0981


 40%|██████████████████                           | 2/5 [00:00<00:00,  3.12it/s]

Epoch 2, Average Train Loss: 0.0704
Epoch 2, Average Test Loss: 0.0622


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  3.34it/s]

Epoch 3, Average Train Loss: 0.0528
Epoch 3, Average Test Loss: 0.0632


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  3.46it/s]

Epoch 4, Average Train Loss: 0.0474
Epoch 4, Average Test Loss: 0.0528


100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  3.44it/s]

Epoch 5, Average Train Loss: 0.0442
Epoch 5, Average Test Loss: 0.0572
Evaluating hard_to_easy model (50.0% data) against MCTS_data_agent...





1677 2493
Win rate for hard_to_easy with 50.0% training data: 0.4022
Training hard_to_easy model with 75.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.46it/s]

Epoch 1, Average Train Loss: 0.0906
Epoch 1, Average Test Loss: 0.0570


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.62it/s]

Epoch 2, Average Train Loss: 0.0410
Epoch 2, Average Test Loss: 0.0394


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.65it/s]

Epoch 3, Average Train Loss: 0.0348
Epoch 3, Average Test Loss: 0.0434


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.67it/s]

Epoch 4, Average Train Loss: 0.0338
Epoch 4, Average Test Loss: 0.0597


100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  2.66it/s]

Epoch 5, Average Train Loss: 0.0352
Epoch 5, Average Test Loss: 0.0706
Evaluating hard_to_easy model (75.0% data) against MCTS_data_agent...





2086 2042
Win rate for hard_to_easy with 75.0% training data: 0.5053
Training hard_to_easy model with 100.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.03it/s]

Epoch 1, Average Train Loss: 0.1186
Epoch 1, Average Test Loss: 0.0924


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.09it/s]

Epoch 2, Average Train Loss: 0.0624
Epoch 2, Average Test Loss: 0.0494


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.12it/s]

Epoch 3, Average Train Loss: 0.0378
Epoch 3, Average Test Loss: 0.0555


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.12it/s]

Epoch 4, Average Train Loss: 0.0362
Epoch 4, Average Test Loss: 0.0577


100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.12it/s]

Epoch 5, Average Train Loss: 0.0385
Epoch 5, Average Test Loss: 0.0536
Evaluating hard_to_easy model (100.0% data) against MCTS_data_agent...





1459 2743
Win rate for hard_to_easy with 100.0% training data: 0.3472
Training random model with 10.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:00,  8.57it/s]

Epoch 1, Average Train Loss: 0.1446
Epoch 1, Average Test Loss: 0.1108


 40%|██████████████████                           | 2/5 [00:00<00:00,  8.57it/s]

Epoch 2, Average Train Loss: 0.1164
Epoch 2, Average Test Loss: 0.1057


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  8.59it/s]

Epoch 3, Average Train Loss: 0.1076
Epoch 3, Average Test Loss: 0.1019


 80%|████████████████████████████████████         | 4/5 [00:00<00:00,  8.57it/s]

Epoch 4, Average Train Loss: 0.1003
Epoch 4, Average Test Loss: 0.0975


100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  8.63it/s]

Epoch 5, Average Train Loss: 0.0949
Epoch 5, Average Test Loss: 0.0903
Evaluating random model (10.0% data) against MCTS_data_agent...





1342 2911
Win rate for random with 10.0% training data: 0.3155
Training random model with 25.0% of training data...


  0%|                                                     | 0/5 [00:00<?, ?it/s]

Epoch 1, Average Train Loss: 0.1198


 20%|█████████                                    | 1/5 [00:00<00:00,  4.28it/s]

Epoch 1, Average Test Loss: 0.1022
Epoch 2, Average Train Loss: 0.1030


 40%|██████████████████                           | 2/5 [00:00<00:00,  4.41it/s]

Epoch 2, Average Test Loss: 0.0926
Epoch 3, Average Train Loss: 0.0859
Epoch 3, Average Test Loss: 0.0668


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  4.66it/s]

Epoch 4, Average Train Loss: 0.0599


 80%|████████████████████████████████████         | 4/5 [00:00<00:00,  4.82it/s]

Epoch 4, Average Test Loss: 0.0510
Epoch 5, Average Train Loss: 0.0475


100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  4.79it/s]

Epoch 5, Average Test Loss: 0.0413
Evaluating random model (25.0% data) against MCTS_data_agent...





1337 2908
Win rate for random with 25.0% training data: 0.3150
Training random model with 50.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  3.35it/s]

Epoch 1, Average Train Loss: 0.1014
Epoch 1, Average Test Loss: 0.0952


 40%|██████████████████                           | 2/5 [00:00<00:00,  3.27it/s]

Epoch 2, Average Train Loss: 0.0670
Epoch 2, Average Test Loss: 0.0630


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  3.26it/s]

Epoch 3, Average Train Loss: 0.0521
Epoch 3, Average Test Loss: 0.0667


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  3.03it/s]

Epoch 4, Average Train Loss: 0.0506
Epoch 4, Average Test Loss: 0.0515


100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  2.86it/s]

Epoch 5, Average Train Loss: 0.0443
Epoch 5, Average Test Loss: 0.0465
Evaluating random model (50.0% data) against MCTS_data_agent...





1601 2590
Win rate for random with 50.0% training data: 0.3820
Training random model with 75.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.56it/s]

Epoch 1, Average Train Loss: 0.0875
Epoch 1, Average Test Loss: 0.0565


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.64it/s]

Epoch 2, Average Train Loss: 0.0411
Epoch 2, Average Test Loss: 0.0518


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.67it/s]

Epoch 3, Average Train Loss: 0.0379
Epoch 3, Average Test Loss: 0.0447


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.67it/s]

Epoch 4, Average Train Loss: 0.0366
Epoch 4, Average Test Loss: 0.0602


100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  2.65it/s]

Epoch 5, Average Train Loss: 0.0365
Epoch 5, Average Test Loss: 0.0649
Evaluating random model (75.0% data) against MCTS_data_agent...





2147 1927
Win rate for random with 75.0% training data: 0.5270
Training random model with 100.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:02,  1.98it/s]

Epoch 1, Average Train Loss: 0.0926
Epoch 1, Average Test Loss: 0.0474


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.08it/s]

Epoch 2, Average Train Loss: 0.0351
Epoch 2, Average Test Loss: 0.0312


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.10it/s]

Epoch 3, Average Train Loss: 0.0265
Epoch 3, Average Test Loss: 0.0246


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.12it/s]

Epoch 4, Average Train Loss: 0.0207
Epoch 4, Average Test Loss: 0.0194


100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.11it/s]

Epoch 5, Average Train Loss: 0.0170
Epoch 5, Average Test Loss: 0.0161
Evaluating random model (100.0% data) against MCTS_data_agent...





1581 2649
Win rate for random with 100.0% training data: 0.3738

--- Data Portion Experiment with seed: 202 ---
Training easy_to_hard model with 10.0% of training data...


  0%|                                                     | 0/5 [00:00<?, ?it/s]

Epoch 1, Average Train Loss: 0.1523


 40%|██████████████████                           | 2/5 [00:00<00:00,  4.46it/s]

Epoch 1, Average Test Loss: 0.1096
Epoch 2, Average Train Loss: 0.1083
Epoch 2, Average Test Loss: 0.1086
Epoch 3, Average Train Loss: 0.1070


 80%|████████████████████████████████████         | 4/5 [00:00<00:00,  6.65it/s]

Epoch 3, Average Test Loss: 0.1051
Epoch 4, Average Train Loss: 0.1001
Epoch 4, Average Test Loss: 0.1020
Epoch 5, Average Train Loss: 0.0983


100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  5.94it/s]


Epoch 5, Average Test Loss: 0.0975
Evaluating easy_to_hard model (10.0% data) against MCTS_data_agent...
1298 2938
Win rate for easy_to_hard with 10.0% training data: 0.3064
Training easy_to_hard model with 25.0% of training data...


  0%|                                                     | 0/5 [00:00<?, ?it/s]

Epoch 1, Average Train Loss: 0.1171


 20%|█████████                                    | 1/5 [00:00<00:00,  4.95it/s]

Epoch 1, Average Test Loss: 0.1044
Epoch 2, Average Train Loss: 0.1012


 40%|██████████████████                           | 2/5 [00:00<00:00,  5.01it/s]

Epoch 2, Average Test Loss: 0.0996


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  5.34it/s]

Epoch 3, Average Train Loss: 0.0895
Epoch 3, Average Test Loss: 0.0766
Epoch 4, Average Train Loss: 0.0642


 80%|████████████████████████████████████         | 4/5 [00:00<00:00,  5.47it/s]

Epoch 4, Average Test Loss: 0.0557
Epoch 5, Average Train Loss: 0.0466


100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  5.41it/s]

Epoch 5, Average Test Loss: 0.0434
Evaluating easy_to_hard model (25.0% data) against MCTS_data_agent...





1336 2882
Win rate for easy_to_hard with 25.0% training data: 0.3167
Training easy_to_hard model with 50.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  3.19it/s]

Epoch 1, Average Train Loss: 0.0992
Epoch 1, Average Test Loss: 0.0829


 40%|██████████████████                           | 2/5 [00:00<00:00,  3.34it/s]

Epoch 2, Average Train Loss: 0.0598
Epoch 2, Average Test Loss: 0.0590


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  3.45it/s]

Epoch 3, Average Train Loss: 0.0511
Epoch 3, Average Test Loss: 0.0674


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  3.52it/s]

Epoch 4, Average Train Loss: 0.0488
Epoch 4, Average Test Loss: 0.0512


100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  3.50it/s]

Epoch 5, Average Train Loss: 0.0437
Epoch 5, Average Test Loss: 0.0522
Evaluating easy_to_hard model (50.0% data) against MCTS_data_agent...





1616 2557
Win rate for easy_to_hard with 50.0% training data: 0.3873
Training easy_to_hard model with 75.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.54it/s]

Epoch 1, Average Train Loss: 0.0878
Epoch 1, Average Test Loss: 0.0551


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.60it/s]

Epoch 2, Average Train Loss: 0.0423
Epoch 2, Average Test Loss: 0.0425


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.63it/s]

Epoch 3, Average Train Loss: 0.0361
Epoch 3, Average Test Loss: 0.0423


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.65it/s]

Epoch 4, Average Train Loss: 0.0367
Epoch 4, Average Test Loss: 0.0516


100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  2.64it/s]

Epoch 5, Average Train Loss: 0.0369
Epoch 5, Average Test Loss: 0.0606
Evaluating easy_to_hard model (75.0% data) against MCTS_data_agent...





1996 2134
Win rate for easy_to_hard with 75.0% training data: 0.4833
Training easy_to_hard model with 100.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.02it/s]

Epoch 1, Average Train Loss: 0.0950
Epoch 1, Average Test Loss: 0.0679


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.09it/s]

Epoch 2, Average Train Loss: 0.0480
Epoch 2, Average Test Loss: 0.0466


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.12it/s]

Epoch 3, Average Train Loss: 0.0367
Epoch 3, Average Test Loss: 0.0490


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.13it/s]

Epoch 4, Average Train Loss: 0.0341
Epoch 4, Average Test Loss: 0.0617


100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.12it/s]

Epoch 5, Average Train Loss: 0.0335
Epoch 5, Average Test Loss: 0.0727
Evaluating easy_to_hard model (100.0% data) against MCTS_data_agent...





2531 1673
Win rate for easy_to_hard with 100.0% training data: 0.6020
Training hard_to_easy model with 10.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:00,  8.34it/s]

Epoch 1, Average Train Loss: 0.1358
Epoch 1, Average Test Loss: 0.1122


 40%|██████████████████                           | 2/5 [00:00<00:00,  8.35it/s]

Epoch 2, Average Train Loss: 0.1108
Epoch 2, Average Test Loss: 0.1068


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  8.24it/s]

Epoch 3, Average Train Loss: 0.1028
Epoch 3, Average Test Loss: 0.1032


 80%|████████████████████████████████████         | 4/5 [00:00<00:00,  8.36it/s]

Epoch 4, Average Train Loss: 0.0968
Epoch 4, Average Test Loss: 0.1000


100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  8.39it/s]

Epoch 5, Average Train Loss: 0.0935
Epoch 5, Average Test Loss: 0.0942
Evaluating hard_to_easy model (10.0% data) against MCTS_data_agent...





1287 2949
Win rate for hard_to_easy with 10.0% training data: 0.3038
Training hard_to_easy model with 25.0% of training data...


  0%|                                                     | 0/5 [00:00<?, ?it/s]

Epoch 1, Average Train Loss: 0.1222


 20%|█████████                                    | 1/5 [00:00<00:00,  5.18it/s]

Epoch 1, Average Test Loss: 0.1042
Epoch 2, Average Train Loss: 0.1065


 40%|██████████████████                           | 2/5 [00:00<00:00,  5.21it/s]

Epoch 2, Average Test Loss: 0.0981


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  5.40it/s]

Epoch 3, Average Train Loss: 0.0946
Epoch 3, Average Test Loss: 0.0792
Epoch 4, Average Train Loss: 0.0727


 80%|████████████████████████████████████         | 4/5 [00:00<00:00,  5.52it/s]

Epoch 4, Average Test Loss: 0.0597
Epoch 5, Average Train Loss: 0.0547


100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  5.50it/s]

Epoch 5, Average Test Loss: 0.0470
Evaluating hard_to_easy model (25.0% data) against MCTS_data_agent...





1349 2868
Win rate for hard_to_easy with 25.0% training data: 0.3199
Training hard_to_easy model with 50.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  3.47it/s]

Epoch 1, Average Train Loss: 0.1018
Epoch 1, Average Test Loss: 0.0932


 40%|██████████████████                           | 2/5 [00:00<00:00,  3.50it/s]

Epoch 2, Average Train Loss: 0.0681
Epoch 2, Average Test Loss: 0.0671


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  3.56it/s]

Epoch 3, Average Train Loss: 0.0517
Epoch 3, Average Test Loss: 0.0639


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  3.60it/s]

Epoch 4, Average Train Loss: 0.0485
Epoch 4, Average Test Loss: 0.0535


100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  3.57it/s]

Epoch 5, Average Train Loss: 0.0445
Epoch 5, Average Test Loss: 0.0557
Evaluating hard_to_easy model (50.0% data) against MCTS_data_agent...





1670 2580
Win rate for hard_to_easy with 50.0% training data: 0.3929
Training hard_to_easy model with 75.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.55it/s]

Epoch 1, Average Train Loss: 0.0921
Epoch 1, Average Test Loss: 0.0626


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.62it/s]

Epoch 2, Average Train Loss: 0.0430
Epoch 2, Average Test Loss: 0.0455


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.58it/s]

Epoch 3, Average Train Loss: 0.0348
Epoch 3, Average Test Loss: 0.0409


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.58it/s]

Epoch 4, Average Train Loss: 0.0341
Epoch 4, Average Test Loss: 0.0470


100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  2.61it/s]

Epoch 5, Average Train Loss: 0.0343
Epoch 5, Average Test Loss: 0.0701
Evaluating hard_to_easy model (75.0% data) against MCTS_data_agent...





2201 1990
Win rate for hard_to_easy with 75.0% training data: 0.5252
Training hard_to_easy model with 100.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:02,  1.96it/s]

Epoch 1, Average Train Loss: 0.1214
Epoch 1, Average Test Loss: 0.0958


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.04it/s]

Epoch 2, Average Train Loss: 0.0697
Epoch 2, Average Test Loss: 0.0499


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.08it/s]

Epoch 3, Average Train Loss: 0.0398
Epoch 3, Average Test Loss: 0.0563


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.10it/s]

Epoch 4, Average Train Loss: 0.0372
Epoch 4, Average Test Loss: 0.0583


100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.09it/s]

Epoch 5, Average Train Loss: 0.0353
Epoch 5, Average Test Loss: 0.0592
Evaluating hard_to_easy model (100.0% data) against MCTS_data_agent...





1525 2719
Win rate for hard_to_easy with 100.0% training data: 0.3593
Training random model with 10.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:00,  8.19it/s]

Epoch 1, Average Train Loss: 0.1461
Epoch 1, Average Test Loss: 0.1070
Epoch 2, Average Train Loss: 0.1050


 40%|██████████████████                           | 2/5 [00:00<00:00,  7.79it/s]

Epoch 2, Average Test Loss: 0.1070
Epoch 3, Average Train Loss: 0.1041


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  7.76it/s]

Epoch 3, Average Test Loss: 0.1019
Epoch 4, Average Train Loss: 0.0963


 80%|████████████████████████████████████         | 4/5 [00:00<00:00,  8.12it/s]

Epoch 4, Average Test Loss: 0.0980
Epoch 5, Average Train Loss: 0.0918


100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  8.14it/s]

Epoch 5, Average Test Loss: 0.0902
Evaluating random model (10.0% data) against MCTS_data_agent...





1308 2954
Win rate for random with 10.0% training data: 0.3069
Training random model with 25.0% of training data...


  0%|                                                     | 0/5 [00:00<?, ?it/s]

Epoch 1, Average Train Loss: 0.1157


 20%|█████████                                    | 1/5 [00:00<00:00,  5.57it/s]

Epoch 1, Average Test Loss: 0.1052
Epoch 2, Average Train Loss: 0.1020


 40%|██████████████████                           | 2/5 [00:00<00:00,  5.57it/s]

Epoch 2, Average Test Loss: 0.1024


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  5.63it/s]

Epoch 3, Average Train Loss: 0.0923
Epoch 3, Average Test Loss: 0.0843
Epoch 4, Average Train Loss: 0.0714


 80%|████████████████████████████████████         | 4/5 [00:00<00:00,  5.60it/s]

Epoch 4, Average Test Loss: 0.0635
Epoch 5, Average Train Loss: 0.0545


100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  5.62it/s]

Epoch 5, Average Test Loss: 0.0518
Evaluating random model (25.0% data) against MCTS_data_agent...





1373 2837
Win rate for random with 25.0% training data: 0.3261
Training random model with 50.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  3.43it/s]

Epoch 1, Average Train Loss: 0.0992
Epoch 1, Average Test Loss: 0.0961


 40%|██████████████████                           | 2/5 [00:00<00:00,  3.50it/s]

Epoch 2, Average Train Loss: 0.0716
Epoch 2, Average Test Loss: 0.0691


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  3.59it/s]

Epoch 3, Average Train Loss: 0.0550
Epoch 3, Average Test Loss: 0.0578


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  3.63it/s]

Epoch 4, Average Train Loss: 0.0493
Epoch 4, Average Test Loss: 0.0513


100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  3.60it/s]

Epoch 5, Average Train Loss: 0.0451
Epoch 5, Average Test Loss: 0.0451
Evaluating random model (50.0% data) against MCTS_data_agent...





1644 2569
Win rate for random with 50.0% training data: 0.3902
Training random model with 75.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.48it/s]

Epoch 1, Average Train Loss: 0.0933
Epoch 1, Average Test Loss: 0.0656


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.62it/s]

Epoch 2, Average Train Loss: 0.0444
Epoch 2, Average Test Loss: 0.0485


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.64it/s]

Epoch 3, Average Train Loss: 0.0354
Epoch 3, Average Test Loss: 0.0439


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.66it/s]

Epoch 4, Average Train Loss: 0.0343
Epoch 4, Average Test Loss: 0.0551


100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  2.65it/s]

Epoch 5, Average Train Loss: 0.0354
Epoch 5, Average Test Loss: 0.0636
Evaluating random model (75.0% data) against MCTS_data_agent...





2360 1829
Win rate for random with 75.0% training data: 0.5634
Training random model with 100.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.02it/s]

Epoch 1, Average Train Loss: 0.0870
Epoch 1, Average Test Loss: 0.0409


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.06it/s]

Epoch 2, Average Train Loss: 0.0349
Epoch 2, Average Test Loss: 0.0308


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.07it/s]

Epoch 3, Average Train Loss: 0.0277
Epoch 3, Average Test Loss: 0.0243


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.09it/s]

Epoch 4, Average Train Loss: 0.0219
Epoch 4, Average Test Loss: 0.0197


100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.08it/s]

Epoch 5, Average Train Loss: 0.0175
Epoch 5, Average Test Loss: 0.0167
Evaluating random model (100.0% data) against MCTS_data_agent...





1484 2678
Win rate for random with 100.0% training data: 0.3566

--- Data Portion Experiment with seed: 303 ---
Training easy_to_hard model with 10.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:00,  8.26it/s]

Epoch 1, Average Train Loss: 0.1446
Epoch 1, Average Test Loss: 0.1065


 40%|██████████████████                           | 2/5 [00:00<00:00,  8.28it/s]

Epoch 2, Average Train Loss: 0.1120
Epoch 2, Average Test Loss: 0.1048


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  8.29it/s]

Epoch 3, Average Train Loss: 0.1059
Epoch 3, Average Test Loss: 0.1015


 80%|████████████████████████████████████         | 4/5 [00:00<00:00,  8.38it/s]

Epoch 4, Average Train Loss: 0.0984
Epoch 4, Average Test Loss: 0.0970


100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  8.37it/s]

Epoch 5, Average Train Loss: 0.0937
Epoch 5, Average Test Loss: 0.0900
Evaluating easy_to_hard model (10.0% data) against MCTS_data_agent...





1273 2961
Win rate for easy_to_hard with 10.0% training data: 0.3007
Training easy_to_hard model with 25.0% of training data...


  0%|                                                     | 0/5 [00:00<?, ?it/s]

Epoch 1, Average Train Loss: 0.1210


 20%|█████████                                    | 1/5 [00:00<00:00,  5.12it/s]

Epoch 1, Average Test Loss: 0.1034
Epoch 2, Average Train Loss: 0.1017


 40%|██████████████████                           | 2/5 [00:00<00:00,  5.09it/s]

Epoch 2, Average Test Loss: 0.0926


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  5.24it/s]

Epoch 3, Average Train Loss: 0.0823
Epoch 3, Average Test Loss: 0.0652
Epoch 4, Average Train Loss: 0.0572


 80%|████████████████████████████████████         | 4/5 [00:00<00:00,  5.22it/s]

Epoch 4, Average Test Loss: 0.0488
Epoch 5, Average Train Loss: 0.0459


100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  5.23it/s]

Epoch 5, Average Test Loss: 0.0428
Evaluating easy_to_hard model (25.0% data) against MCTS_data_agent...





1297 2934
Win rate for easy_to_hard with 25.0% training data: 0.3065
Training easy_to_hard model with 50.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  3.30it/s]

Epoch 1, Average Train Loss: 0.0996
Epoch 1, Average Test Loss: 0.0961


 40%|██████████████████                           | 2/5 [00:00<00:00,  3.44it/s]

Epoch 2, Average Train Loss: 0.0723
Epoch 2, Average Test Loss: 0.0708


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  3.54it/s]

Epoch 3, Average Train Loss: 0.0537
Epoch 3, Average Test Loss: 0.0630


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  3.56it/s]

Epoch 4, Average Train Loss: 0.0491
Epoch 4, Average Test Loss: 0.0455


100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  3.54it/s]

Epoch 5, Average Train Loss: 0.0434
Epoch 5, Average Test Loss: 0.0424
Evaluating easy_to_hard model (50.0% data) against MCTS_data_agent...





1552 2629
Win rate for easy_to_hard with 50.0% training data: 0.3712
Training easy_to_hard model with 75.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.53it/s]

Epoch 1, Average Train Loss: 0.0906
Epoch 1, Average Test Loss: 0.0612


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.62it/s]

Epoch 2, Average Train Loss: 0.0433
Epoch 2, Average Test Loss: 0.0506


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.66it/s]

Epoch 3, Average Train Loss: 0.0385
Epoch 3, Average Test Loss: 0.0485


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.69it/s]

Epoch 4, Average Train Loss: 0.0372
Epoch 4, Average Test Loss: 0.0620


100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  2.67it/s]

Epoch 5, Average Train Loss: 0.0386
Epoch 5, Average Test Loss: 0.0614
Evaluating easy_to_hard model (75.0% data) against MCTS_data_agent...





2154 2019
Win rate for easy_to_hard with 75.0% training data: 0.5162
Training easy_to_hard model with 100.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.01it/s]

Epoch 1, Average Train Loss: 0.0897
Epoch 1, Average Test Loss: 0.0630


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.09it/s]

Epoch 2, Average Train Loss: 0.0452
Epoch 2, Average Test Loss: 0.0459


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.11it/s]

Epoch 3, Average Train Loss: 0.0373
Epoch 3, Average Test Loss: 0.0518


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.13it/s]

Epoch 4, Average Train Loss: 0.0356
Epoch 4, Average Test Loss: 0.0650


100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.12it/s]

Epoch 5, Average Train Loss: 0.0363
Epoch 5, Average Test Loss: 0.0667
Evaluating easy_to_hard model (100.0% data) against MCTS_data_agent...





2238 1979
Win rate for easy_to_hard with 100.0% training data: 0.5307
Training hard_to_easy model with 10.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:00,  8.34it/s]

Epoch 1, Average Train Loss: 0.1459
Epoch 1, Average Test Loss: 0.1082


 40%|██████████████████                           | 2/5 [00:00<00:00,  8.03it/s]

Epoch 2, Average Train Loss: 0.1100
Epoch 2, Average Test Loss: 0.1063
Epoch 3, Average Train Loss: 0.1062


 80%|████████████████████████████████████         | 4/5 [00:00<00:00,  8.26it/s]

Epoch 3, Average Test Loss: 0.1036
Epoch 4, Average Train Loss: 0.0997
Epoch 4, Average Test Loss: 0.0994
Epoch 5, Average Train Loss: 0.0956


100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  8.18it/s]


Epoch 5, Average Test Loss: 0.0931
Evaluating hard_to_easy model (10.0% data) against MCTS_data_agent...
1295 2957
Win rate for hard_to_easy with 10.0% training data: 0.3046
Training hard_to_easy model with 25.0% of training data...


  0%|                                                     | 0/5 [00:00<?, ?it/s]

Epoch 1, Average Train Loss: 0.1154


 20%|█████████                                    | 1/5 [00:00<00:00,  5.49it/s]

Epoch 1, Average Test Loss: 0.1048
Epoch 2, Average Train Loss: 0.1000


 40%|██████████████████                           | 2/5 [00:00<00:00,  5.42it/s]

Epoch 2, Average Test Loss: 0.0979


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  5.58it/s]

Epoch 3, Average Train Loss: 0.0868
Epoch 3, Average Test Loss: 0.0772
Epoch 4, Average Train Loss: 0.0656


 80%|████████████████████████████████████         | 4/5 [00:00<00:00,  5.61it/s]

Epoch 4, Average Test Loss: 0.0577
Epoch 5, Average Train Loss: 0.0512


100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  5.59it/s]

Epoch 5, Average Test Loss: 0.0464
Evaluating hard_to_easy model (25.0% data) against MCTS_data_agent...





1436 2752
Win rate for hard_to_easy with 25.0% training data: 0.3429
Training hard_to_easy model with 50.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  3.49it/s]

Epoch 1, Average Train Loss: 0.1016
Epoch 1, Average Test Loss: 0.0964


 40%|██████████████████                           | 2/5 [00:00<00:00,  3.53it/s]

Epoch 2, Average Train Loss: 0.0748
Epoch 2, Average Test Loss: 0.0688


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  3.57it/s]

Epoch 3, Average Train Loss: 0.0562
Epoch 3, Average Test Loss: 0.0628


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  3.59it/s]

Epoch 4, Average Train Loss: 0.0513
Epoch 4, Average Test Loss: 0.0474


100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  3.59it/s]

Epoch 5, Average Train Loss: 0.0454
Epoch 5, Average Test Loss: 0.0420
Evaluating hard_to_easy model (50.0% data) against MCTS_data_agent...





1516 2684
Win rate for hard_to_easy with 50.0% training data: 0.3610
Training hard_to_easy model with 75.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.31it/s]

Epoch 1, Average Train Loss: 0.0890
Epoch 1, Average Test Loss: 0.0561


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.50it/s]

Epoch 2, Average Train Loss: 0.0408
Epoch 2, Average Test Loss: 0.0416


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.57it/s]

Epoch 3, Average Train Loss: 0.0364
Epoch 3, Average Test Loss: 0.0433


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.63it/s]

Epoch 4, Average Train Loss: 0.0396
Epoch 4, Average Test Loss: 0.0733


100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  2.60it/s]

Epoch 5, Average Train Loss: 0.0407
Epoch 5, Average Test Loss: 0.0650
Evaluating hard_to_easy model (75.0% data) against MCTS_data_agent...





1982 2227
Win rate for hard_to_easy with 75.0% training data: 0.4709
Training hard_to_easy model with 100.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:02,  1.89it/s]

Epoch 1, Average Train Loss: 0.1173
Epoch 1, Average Test Loss: 0.0906


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.04it/s]

Epoch 2, Average Train Loss: 0.0643
Epoch 2, Average Test Loss: 0.0509


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.05it/s]

Epoch 3, Average Train Loss: 0.0408
Epoch 3, Average Test Loss: 0.0505


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.09it/s]

Epoch 4, Average Train Loss: 0.0332
Epoch 4, Average Test Loss: 0.0573


100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.07it/s]

Epoch 5, Average Train Loss: 0.0345
Epoch 5, Average Test Loss: 0.0561
Evaluating hard_to_easy model (100.0% data) against MCTS_data_agent...





1483 2750
Win rate for hard_to_easy with 100.0% training data: 0.3503
Training random model with 10.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:00,  8.24it/s]

Epoch 1, Average Train Loss: 0.1596
Epoch 1, Average Test Loss: 0.1122


 40%|██████████████████                           | 2/5 [00:00<00:00,  8.30it/s]

Epoch 2, Average Train Loss: 0.1108
Epoch 2, Average Test Loss: 0.1085


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  8.29it/s]

Epoch 3, Average Train Loss: 0.1119
Epoch 3, Average Test Loss: 0.1041


 80%|████████████████████████████████████         | 4/5 [00:00<00:00,  8.42it/s]

Epoch 4, Average Train Loss: 0.1044
Epoch 4, Average Test Loss: 0.1003


100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  8.42it/s]

Epoch 5, Average Train Loss: 0.1014
Epoch 5, Average Test Loss: 0.0948
Evaluating random model (10.0% data) against MCTS_data_agent...





1339 2927
Win rate for random with 10.0% training data: 0.3139
Training random model with 25.0% of training data...


  0%|                                                     | 0/5 [00:00<?, ?it/s]

Epoch 1, Average Train Loss: 0.1188


 20%|█████████                                    | 1/5 [00:00<00:00,  5.43it/s]

Epoch 1, Average Test Loss: 0.1037
Epoch 2, Average Train Loss: 0.1048


 40%|██████████████████                           | 2/5 [00:00<00:00,  5.46it/s]

Epoch 2, Average Test Loss: 0.0989


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  5.58it/s]

Epoch 3, Average Train Loss: 0.0944
Epoch 3, Average Test Loss: 0.0791
Epoch 4, Average Train Loss: 0.0697


 80%|████████████████████████████████████         | 4/5 [00:00<00:00,  5.56it/s]

Epoch 4, Average Test Loss: 0.0548
Epoch 5, Average Train Loss: 0.0499


100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  5.55it/s]

Epoch 5, Average Test Loss: 0.0430
Evaluating random model (25.0% data) against MCTS_data_agent...





1304 2985
Win rate for random with 25.0% training data: 0.3040
Training random model with 50.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  3.15it/s]

Epoch 1, Average Train Loss: 0.1005
Epoch 1, Average Test Loss: 0.0921


 40%|██████████████████                           | 2/5 [00:00<00:00,  3.29it/s]

Epoch 2, Average Train Loss: 0.0649
Epoch 2, Average Test Loss: 0.0694


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  3.38it/s]

Epoch 3, Average Train Loss: 0.0528
Epoch 3, Average Test Loss: 0.0641


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  3.43it/s]

Epoch 4, Average Train Loss: 0.0482
Epoch 4, Average Test Loss: 0.0498


100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  3.41it/s]

Epoch 5, Average Train Loss: 0.0434
Epoch 5, Average Test Loss: 0.0514
Evaluating random model (50.0% data) against MCTS_data_agent...





1698 2526
Win rate for random with 50.0% training data: 0.4020
Training random model with 75.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.31it/s]

Epoch 1, Average Train Loss: 0.0911
Epoch 1, Average Test Loss: 0.0578


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.48it/s]

Epoch 2, Average Train Loss: 0.0413
Epoch 2, Average Test Loss: 0.0408


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.57it/s]

Epoch 3, Average Train Loss: 0.0344
Epoch 3, Average Test Loss: 0.0462


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.62it/s]

Epoch 4, Average Train Loss: 0.0348
Epoch 4, Average Test Loss: 0.0536


100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  2.58it/s]

Epoch 5, Average Train Loss: 0.0366
Epoch 5, Average Test Loss: 0.0594
Evaluating random model (75.0% data) against MCTS_data_agent...





2149 1968
Win rate for random with 75.0% training data: 0.5220
Training random model with 100.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:02,  2.00it/s]

Epoch 1, Average Train Loss: 0.0906
Epoch 1, Average Test Loss: 0.0467


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.09it/s]

Epoch 2, Average Train Loss: 0.0353
Epoch 2, Average Test Loss: 0.0329


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.12it/s]

Epoch 3, Average Train Loss: 0.0273
Epoch 3, Average Test Loss: 0.0254


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.14it/s]

Epoch 4, Average Train Loss: 0.0209
Epoch 4, Average Test Loss: 0.0193


100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.12it/s]

Epoch 5, Average Train Loss: 0.0164
Epoch 5, Average Test Loss: 0.0157
Evaluating random model (100.0% data) against MCTS_data_agent...





1554 2646
Win rate for random with 100.0% training data: 0.3700

--- Data Portion Experiment with seed: 404 ---
Training easy_to_hard model with 10.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:00,  8.05it/s]

Epoch 1, Average Train Loss: 0.1359
Epoch 1, Average Test Loss: 0.1113
Epoch 2, Average Train Loss: 0.1118


 40%|██████████████████                           | 2/5 [00:00<00:00,  7.71it/s]

Epoch 2, Average Test Loss: 0.1073
Epoch 3, Average Train Loss: 0.1045


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  7.77it/s]

Epoch 3, Average Test Loss: 0.1033
Epoch 4, Average Train Loss: 0.0986


 80%|████████████████████████████████████         | 4/5 [00:00<00:00,  7.56it/s]

Epoch 4, Average Test Loss: 0.1009
Epoch 5, Average Train Loss: 0.0960


100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  7.68it/s]

Epoch 5, Average Test Loss: 0.0954
Evaluating easy_to_hard model (10.0% data) against MCTS_data_agent...





1346 2908
Win rate for easy_to_hard with 10.0% training data: 0.3164
Training easy_to_hard model with 25.0% of training data...


  0%|                                                     | 0/5 [00:00<?, ?it/s]

Epoch 1, Average Train Loss: 0.1182


 20%|█████████                                    | 1/5 [00:00<00:00,  5.32it/s]

Epoch 1, Average Test Loss: 0.1046
Epoch 2, Average Train Loss: 0.1023


 40%|██████████████████                           | 2/5 [00:00<00:00,  5.44it/s]

Epoch 2, Average Test Loss: 0.0982


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  5.55it/s]

Epoch 3, Average Train Loss: 0.0892
Epoch 3, Average Test Loss: 0.0763
Epoch 4, Average Train Loss: 0.0646


 80%|████████████████████████████████████         | 4/5 [00:00<00:00,  5.57it/s]

Epoch 4, Average Test Loss: 0.0555
Epoch 5, Average Train Loss: 0.0491


100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  5.57it/s]

Epoch 5, Average Test Loss: 0.0451
Evaluating easy_to_hard model (25.0% data) against MCTS_data_agent...





1432 2779
Win rate for easy_to_hard with 25.0% training data: 0.3401
Training easy_to_hard model with 50.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  3.48it/s]

Epoch 1, Average Train Loss: 0.0999
Epoch 1, Average Test Loss: 0.0969


 40%|██████████████████                           | 2/5 [00:00<00:00,  3.54it/s]

Epoch 2, Average Train Loss: 0.0690
Epoch 2, Average Test Loss: 0.0709


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  3.58it/s]

Epoch 3, Average Train Loss: 0.0541
Epoch 3, Average Test Loss: 0.0671


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  3.62it/s]

Epoch 4, Average Train Loss: 0.0495
Epoch 4, Average Test Loss: 0.0571


100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  3.60it/s]

Epoch 5, Average Train Loss: 0.0447
Epoch 5, Average Test Loss: 0.0577
Evaluating easy_to_hard model (50.0% data) against MCTS_data_agent...





1608 2617
Win rate for easy_to_hard with 50.0% training data: 0.3806
Training easy_to_hard model with 75.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.55it/s]

Epoch 1, Average Train Loss: 0.0908
Epoch 1, Average Test Loss: 0.0602


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.63it/s]

Epoch 2, Average Train Loss: 0.0431
Epoch 2, Average Test Loss: 0.0519


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.67it/s]

Epoch 3, Average Train Loss: 0.0388
Epoch 3, Average Test Loss: 0.0516


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.69it/s]

Epoch 4, Average Train Loss: 0.0391
Epoch 4, Average Test Loss: 0.0597


100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  2.67it/s]

Epoch 5, Average Train Loss: 0.0384
Epoch 5, Average Test Loss: 0.0591
Evaluating easy_to_hard model (75.0% data) against MCTS_data_agent...





2095 2138
Win rate for easy_to_hard with 75.0% training data: 0.4949
Training easy_to_hard model with 100.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.01it/s]

Epoch 1, Average Train Loss: 0.0914
Epoch 1, Average Test Loss: 0.0666


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.08it/s]

Epoch 2, Average Train Loss: 0.0463
Epoch 2, Average Test Loss: 0.0510


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.10it/s]

Epoch 3, Average Train Loss: 0.0382
Epoch 3, Average Test Loss: 0.0590


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.12it/s]

Epoch 4, Average Train Loss: 0.0371
Epoch 4, Average Test Loss: 0.0685


100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.10it/s]

Epoch 5, Average Train Loss: 0.0362
Epoch 5, Average Test Loss: 0.0651
Evaluating easy_to_hard model (100.0% data) against MCTS_data_agent...





2115 2090
Win rate for easy_to_hard with 100.0% training data: 0.5030
Training hard_to_easy model with 10.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:00,  7.89it/s]

Epoch 1, Average Train Loss: 0.1559
Epoch 1, Average Test Loss: 0.1103
Epoch 2, Average Train Loss: 0.1045


 40%|██████████████████                           | 2/5 [00:00<00:00,  7.50it/s]

Epoch 2, Average Test Loss: 0.1094
Epoch 3, Average Train Loss: 0.1062


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  7.56it/s]

Epoch 3, Average Test Loss: 0.1028
Epoch 4, Average Train Loss: 0.0977


 80%|████████████████████████████████████         | 4/5 [00:00<00:00,  7.93it/s]

Epoch 4, Average Test Loss: 0.0989
Epoch 5, Average Train Loss: 0.0944


100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  7.96it/s]

Epoch 5, Average Test Loss: 0.0933
Evaluating hard_to_easy model (10.0% data) against MCTS_data_agent...





1312 2934
Win rate for hard_to_easy with 10.0% training data: 0.3090
Training hard_to_easy model with 25.0% of training data...


  0%|                                                     | 0/5 [00:00<?, ?it/s]

Epoch 1, Average Train Loss: 0.1182


 20%|█████████                                    | 1/5 [00:00<00:00,  5.46it/s]

Epoch 1, Average Test Loss: 0.1041
Epoch 2, Average Train Loss: 0.1040


 40%|██████████████████                           | 2/5 [00:00<00:00,  5.36it/s]

Epoch 2, Average Test Loss: 0.0974


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  5.33it/s]

Epoch 3, Average Train Loss: 0.0912
Epoch 3, Average Test Loss: 0.0750
Epoch 4, Average Train Loss: 0.0678


 80%|████████████████████████████████████         | 4/5 [00:00<00:00,  5.22it/s]

Epoch 4, Average Test Loss: 0.0546
Epoch 5, Average Train Loss: 0.0512


100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  5.30it/s]

Epoch 5, Average Test Loss: 0.0424
Evaluating hard_to_easy model (25.0% data) against MCTS_data_agent...





1395 2849
Win rate for hard_to_easy with 25.0% training data: 0.3287
Training hard_to_easy model with 50.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  3.32it/s]

Epoch 1, Average Train Loss: 0.0986
Epoch 1, Average Test Loss: 0.0894


 40%|██████████████████                           | 2/5 [00:00<00:00,  3.45it/s]

Epoch 2, Average Train Loss: 0.0621
Epoch 2, Average Test Loss: 0.0607


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  3.53it/s]

Epoch 3, Average Train Loss: 0.0485
Epoch 3, Average Test Loss: 0.0582


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  3.56it/s]

Epoch 4, Average Train Loss: 0.0472
Epoch 4, Average Test Loss: 0.0452


100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  3.52it/s]

Epoch 5, Average Train Loss: 0.0432
Epoch 5, Average Test Loss: 0.0484
Evaluating hard_to_easy model (50.0% data) against MCTS_data_agent...





1648 2562
Win rate for hard_to_easy with 50.0% training data: 0.3914
Training hard_to_easy model with 75.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.58it/s]

Epoch 1, Average Train Loss: 0.0949
Epoch 1, Average Test Loss: 0.0665


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.64it/s]

Epoch 2, Average Train Loss: 0.0415
Epoch 2, Average Test Loss: 0.0426


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.67it/s]

Epoch 3, Average Train Loss: 0.0323
Epoch 3, Average Test Loss: 0.0403


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.67it/s]

Epoch 4, Average Train Loss: 0.0319
Epoch 4, Average Test Loss: 0.0482


100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  2.67it/s]

Epoch 5, Average Train Loss: 0.0339
Epoch 5, Average Test Loss: 0.0702
Evaluating hard_to_easy model (75.0% data) against MCTS_data_agent...





2461 1666
Win rate for hard_to_easy with 75.0% training data: 0.5963
Training hard_to_easy model with 100.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:02,  1.97it/s]

Epoch 1, Average Train Loss: 0.1174
Epoch 1, Average Test Loss: 0.0859


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.05it/s]

Epoch 2, Average Train Loss: 0.0596
Epoch 2, Average Test Loss: 0.0526


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.06it/s]

Epoch 3, Average Train Loss: 0.0391
Epoch 3, Average Test Loss: 0.0553


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.07it/s]

Epoch 4, Average Train Loss: 0.0359
Epoch 4, Average Test Loss: 0.0566


100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  2.07it/s]

Epoch 5, Average Train Loss: 0.0376
Epoch 5, Average Test Loss: 0.0558
Evaluating hard_to_easy model (100.0% data) against MCTS_data_agent...





1422 2763
Win rate for hard_to_easy with 100.0% training data: 0.3398
Training random model with 10.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:00,  8.25it/s]

Epoch 1, Average Train Loss: 0.1422
Epoch 1, Average Test Loss: 0.1151


 40%|██████████████████                           | 2/5 [00:00<00:00,  8.41it/s]

Epoch 2, Average Train Loss: 0.1069
Epoch 2, Average Test Loss: 0.1073


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  8.43it/s]

Epoch 3, Average Train Loss: 0.1029
Epoch 3, Average Test Loss: 0.1020


 80%|████████████████████████████████████         | 4/5 [00:00<00:00,  8.47it/s]

Epoch 4, Average Train Loss: 0.0956
Epoch 4, Average Test Loss: 0.0983


100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  8.48it/s]

Epoch 5, Average Train Loss: 0.0900
Epoch 5, Average Test Loss: 0.0911
Evaluating random model (10.0% data) against MCTS_data_agent...





1356 2854
Win rate for random with 10.0% training data: 0.3221
Training random model with 25.0% of training data...


  0%|                                                     | 0/5 [00:00<?, ?it/s]

Epoch 1, Average Train Loss: 0.1211


 20%|█████████                                    | 1/5 [00:00<00:00,  5.47it/s]

Epoch 1, Average Test Loss: 0.1053
Epoch 2, Average Train Loss: 0.1054


 40%|██████████████████                           | 2/5 [00:00<00:00,  5.51it/s]

Epoch 2, Average Test Loss: 0.1013


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  5.59it/s]

Epoch 3, Average Train Loss: 0.0959
Epoch 3, Average Test Loss: 0.0848
Epoch 4, Average Train Loss: 0.0738


 80%|████████████████████████████████████         | 4/5 [00:00<00:00,  5.61it/s]

Epoch 4, Average Test Loss: 0.0614
Epoch 5, Average Train Loss: 0.0542


100%|█████████████████████████████████████████████| 5/5 [00:00<00:00,  5.59it/s]

Epoch 5, Average Test Loss: 0.0478
Evaluating random model (25.0% data) against MCTS_data_agent...





1345 2896
Win rate for random with 25.0% training data: 0.3171
Training random model with 50.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  3.45it/s]

Epoch 1, Average Train Loss: 0.1002
Epoch 1, Average Test Loss: 0.0931


 40%|██████████████████                           | 2/5 [00:00<00:00,  3.54it/s]

Epoch 2, Average Train Loss: 0.0687
Epoch 2, Average Test Loss: 0.0578


 60%|███████████████████████████                  | 3/5 [00:00<00:00,  3.60it/s]

Epoch 3, Average Train Loss: 0.0510
Epoch 3, Average Test Loss: 0.0601


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  3.62it/s]

Epoch 4, Average Train Loss: 0.0482
Epoch 4, Average Test Loss: 0.0557


100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  3.60it/s]

Epoch 5, Average Train Loss: 0.0448
Epoch 5, Average Test Loss: 0.0467
Evaluating random model (50.0% data) against MCTS_data_agent...





1572 2593
Win rate for random with 50.0% training data: 0.3774
Training random model with 75.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.52it/s]

Epoch 1, Average Train Loss: 0.0921
Epoch 1, Average Test Loss: 0.0630


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.63it/s]

Epoch 2, Average Train Loss: 0.0451
Epoch 2, Average Test Loss: 0.0389


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.66it/s]

Epoch 3, Average Train Loss: 0.0343
Epoch 3, Average Test Loss: 0.0356


 80%|████████████████████████████████████         | 4/5 [00:01<00:00,  2.67it/s]

Epoch 4, Average Train Loss: 0.0324
Epoch 4, Average Test Loss: 0.0440


100%|█████████████████████████████████████████████| 5/5 [00:01<00:00,  2.66it/s]

Epoch 5, Average Train Loss: 0.0351
Epoch 5, Average Test Loss: 0.0692
Evaluating random model (75.0% data) against MCTS_data_agent...





2325 1828
Win rate for random with 75.0% training data: 0.5598
Training random model with 100.0% of training data...


 20%|█████████                                    | 1/5 [00:00<00:01,  2.03it/s]

Epoch 1, Average Train Loss: 0.0834
Epoch 1, Average Test Loss: 0.0409


 40%|██████████████████                           | 2/5 [00:00<00:01,  2.10it/s]

Epoch 2, Average Train Loss: 0.0354
Epoch 2, Average Test Loss: 0.0314


 60%|███████████████████████████                  | 3/5 [00:01<00:00,  2.12it/s]

Epoch 3, Average Train Loss: 0.0287
Epoch 3, Average Test Loss: 0.0258


 80%|████████████████████████████████████         | 4/5 [00:02<00:00,  1.87it/s]

Epoch 4, Average Train Loss: 0.0225
Epoch 4, Average Test Loss: 0.0206


100%|█████████████████████████████████████████████| 5/5 [00:02<00:00,  1.98it/s]

Epoch 5, Average Train Loss: 0.0176
Epoch 5, Average Test Loss: 0.0162
Evaluating random model (100.0% data) against MCTS_data_agent...





1565 2675
Win rate for random with 100.0% training data: 0.3691


In [5]:
# --- Plotting ---
print("\n--- Plotting Data Portion Results ---")
plot_save_dir = os.path.join("results", "data_portion_plots")
os.makedirs(plot_save_dir, exist_ok=True)

for curriculum_type, percentages_data in data_portion_results.items():
    avg_win_rates = []
    std_dev_win_rates = []
    for percentage in data_percentages:
        rates = percentages_data[percentage]
        avg_win_rates.append(np.mean(rates))
        std_dev_win_rates.append(np.std(rates))

    plt.figure(figsize=(10, 6))
    plt.errorbar(data_percentages, avg_win_rates, yerr=std_dev_win_rates, fmt='-o', capsize=5)
    plt.title(f'Win Rate Against MCTS Data Agent for {curriculum_type} Curriculum')
    plt.xlabel('Percentage of Data Used for Training')
    plt.ylabel('Average Win Rate')
    plt.ylim(0, 1)
    plt.grid(True)
    plot_filename = os.path.join(plot_save_dir, f'{curriculum_type}_win_rate_vs_data_portion.png')
    plt.savefig(plot_filename)
    print(f"Saved plot: {plot_filename}")
    plt.close()

print("\nData portion experiment complete and plots generated.")


--- Plotting Data Portion Results ---
Saved plot: results/data_portion_plots/easy_to_hard_win_rate_vs_data_portion.png
Saved plot: results/data_portion_plots/hard_to_easy_win_rate_vs_data_portion.png
Saved plot: results/data_portion_plots/random_win_rate_vs_data_portion.png

Data portion experiment complete and plots generated.


In [6]:
# --- New Experiment: Random Perturbation of Model Weights ---
print("\n--- Running Model Perturbation Experiment ---")
perturbation_strength = 0.01 # Define the strength of the perturbation
perturbation_results = {curriculum_type: [] for curriculum_type in ["easy_to_hard", "hard_to_easy", "random"]}

def perturb_model_weights(model, strength):
    """Applies random perturbation to model weights."""
    perturbed_model = TicTacToeCNN(kl_div=(criterion_choice == "kl_div"))
    perturbed_model.load_state_dict(model.state_dict()) # Start with original weights
    with torch.no_grad():
        for param in perturbed_model.parameters():
            param.add_(torch.randn(param.size()) * strength)
        return perturbed_model

    # Load original models once for each curriculum type (from the first seed's results)
    print("\n--- Loading base models for perturbation experiment (from first seed) ---")
base_models = {}
base_seed_for_loading = seeds[0] # Use the first seed's trained models as base
base_model_dir = os.path.join("results", f'seed_{base_seed_for_loading}_{optimizer_choice}_{criterion_choice}_epoch_{epochs}')
for curriculum_type in ["easy_to_hard", "hard_to_easy", "random"]:
    model_name = f"model_{curriculum_type.replace('_to_', '_').replace('random', 'random_curriculum')}"
    model_path = os.path.join(base_model_dir, f"{model_name}.pth")

    model = TicTacToeCNN(kl_div=(criterion_choice == "kl_div"))
    model.load_state_dict(torch.load(model_path))
    model.eval()
    base_models[curriculum_type] = model
    print("Base models loaded.")

for seed in seeds: # This seed now controls the randomness of the perturbation
    print(f"\n--- Model Perturbation Experiment with perturbation seed: {seed} ---")
    set_seed(seed) # Set seed for the perturbation generation

    perturb_save_dir = os.path.join("results", f'perturbation_seed_{seed}_{optimizer_choice}_{criterion_choice}_epoch_{epochs}')
    os.makedirs(perturb_save_dir, exist_ok=True)

    for curriculum_type in ["easy_to_hard", "hard_to_easy", "random"]:
        original_model = base_models[curriculum_type]

        print(f"Perturbing and evaluating {curriculum_type} model with seed {seed}...")
        perturbed_model = perturb_model_weights(original_model, perturbation_strength)

        results = evaluate_agents(
            perturbed_model, 'mcts_data_agent',
            games=mcts_eval_games,
            agent1_criterion=criterion_choice,
            agent2_criterion=None,
            mcts_data=preloaded_mcts_data
        )

        a1_win = results["agent1_wins"]
        a2_win = results["agent2_wins"]
        win_rate = a1_win / (a1_win + a2_win) if (a1_win + a2_win) > 0 else 0
        perturbation_results[curriculum_type].append(win_rate)
        print(f"Win rate for perturbed {curriculum_type} model (perturbation seed {seed}): {win_rate:.4f}")

        # Optionally save the perturbed model (if needed for debugging/analysis)
        # model_name = f"model_{curriculum_type.replace('_to_', '_').replace('random', 'random_curriculum')}"
        # torch.save(perturbed_model.state_dict(), os.path.join(perturb_save_dir, f"{model_name}_perturbed_seed_{seed}.pth"))

    # Calculate average win rates and plot for perturbation experiment
    print("\n--- Plotting Perturbation Results ---")
perturb_plot_save_dir = os.path.join("results", "perturbation_plots")
os.makedirs(perturb_plot_save_dir, exist_ok=True)

avg_perturb_win_rates = []
std_perturb_win_rates = []
curriculum_labels = ["easy_to_hard", "hard_to_easy", "random"]

for ct in curriculum_labels:
    rates = perturbation_results[ct]
    avg_perturb_win_rates.append(np.mean(rates))
    std_perturb_win_rates.append(np.std(rates))

    x = np.arange(len(curriculum_labels))
width = 0.35

fig, ax = plt.subplots(figsize=(10, 6))
rects = ax.bar(x, avg_perturb_win_rates, width, yerr=std_perturb_win_rates, capsize=5)

ax.set_ylabel('Average Win Rate Against MCTS Data Agent')
ax.set_title(f'Win Rate of Perturbed Models (Strength: {perturbation_strength})')
ax.set_xticks(x)
ax.set_xticklabels([label.replace('_', ' ').title() for label in curriculum_labels]) # Nicer labels
ax.set_ylim(0, 1)
ax.grid(axis='y')

def autolabel(rects):
    for rect in rects:
        height = rect.get_height()
        ax.annotate(f'{height:.2f}',
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom')

    autolabel(rects)

plot_filename = os.path.join(perturb_plot_save_dir, f'perturbed_models_win_rate_strength_{perturbation_strength}.png')
plt.savefig(plot_filename)
print(f"Saved plot: {plot_filename}")
plt.close()

print("\nModel perturbation experiment complete and plots generated.")


--- Running Model Perturbation Experiment ---
Base models loaded.
Base models loaded.
Base models loaded.

--- Model Perturbation Experiment with perturbation seed: 42 ---
Perturbing and evaluating easy_to_hard model with seed 42...


  model.load_state_dict(torch.load(model_path))


Win rate for perturbed easy_to_hard model (perturbation seed 42): 0.6144
Perturbing and evaluating hard_to_easy model with seed 42...
Win rate for perturbed hard_to_easy model (perturbation seed 42): 0.3900
Perturbing and evaluating random model with seed 42...
Win rate for perturbed random model (perturbation seed 42): 0.3603

--- Plotting Perturbation Results ---

--- Model Perturbation Experiment with perturbation seed: 101 ---
Perturbing and evaluating easy_to_hard model with seed 101...
Win rate for perturbed easy_to_hard model (perturbation seed 101): 0.6276
Perturbing and evaluating hard_to_easy model with seed 101...
Win rate for perturbed hard_to_easy model (perturbation seed 101): 0.3828
Perturbing and evaluating random model with seed 101...
Win rate for perturbed random model (perturbation seed 101): 0.3812

--- Plotting Perturbation Results ---

--- Model Perturbation Experiment with perturbation seed: 202 ---
Perturbing and evaluating easy_to_hard model with seed 202...
W