Chess RL + Expert Learning + Weirdness 

In [1]:
import wandb
import gym
import chess
import os, sys, copy
import torch
from torch.utils.data import Dataset, DataLoader, random_split, Subset
from torch.cuda.amp import GradScaler
from torch.cuda.amp import autocast
from tqdm import tqdm
import numpy as np
import time
import adversarial_gym
from adversarial_gym.chess_env import ChessEnv

from OBM_ChessNetwork import Chess42069NetworkSimple

sys.path.append('../../chess_utils')
from chess_dataset import ChessDataset
from utils import RunningAverage

  from .autonotebook import tqdm as notebook_tqdm


Model Gameplay Functions

In [2]:
def play_game(env, white, black, perspective=None, sample_n=1, duel=False):
    """ Plays a game and returns data for chosen perspective.
    
    Perspective is either Chess.WHITE (1) or Chess.BLACK (0).
    """
    step = 0
    actions = []
    log_probs = []
    observations = []
    done = False
    obs = env.reset()[0]
    while not done:
        if perspective is None: # Self play
            # In this case white and black are the same model
            action, log_prob = white.get_action(obs[0], env.board.legal_moves, sample_n=sample_n)
            observations.append(obs[0])
            actions.append(action)
            log_probs.append(log_prob)
        else:
            if step % 2 == 0:
                action, log_prob = white.get_action(obs[0], env.board.legal_moves, sample_n=sample_n)
                if perspective == chess.WHITE:
                    observations.append(obs[0])
                    actions.append(action)
                    log_probs.append(log_prob)
            else:
                action, log_prob = black.get_action(obs[0], env.board.legal_moves, sample_n=sample_n)
                if perspective == chess.BLACK:
                    observations.append(obs[0])
                    actions.append(action)
                    log_probs.append(log_prob)

        obs, reward, done, _, info = env.step(action)
        step += 1

    # # If perspective is None (self-play) do nothing and return reward in [-1,0,1]. Otherwise return reward based
    # # on winning or losing from white/black perspective
    # if perspective == chess.BLACK and reward == -1:
    #     reward = 1

    # if reward == 1 or reward == -1:
    #     print("GAME WON!!")
    # if reward == 0:
    #     print("GAME DRAWN!!")

    # if duel and reward != 1: reward = 0

    return observations, actions, log_probs, reward


def duel(env, old_model, new_model, num_rounds):
    """ Duel against the previous best model and return the win ratio. """
    new_model.eval()
    with torch.no_grad():
        wins = 0
        for i in range(num_rounds):
            _, _, _, r_w = play_game(env, new_model, old_model, perspective=chess.WHITE, sample_n = 2, duel=True)
            _, _, _, r_b = play_game(env, old_model, new_model, perspective=chess.BLACK, sample_n = 2, duel=True)

            wins += r_w + r_b
    new_model.train()    
    return wins / (2 * num_rounds)


def self_play(env, model, num_games):
    """ Plays num_games against itself to gather obs, actions, log_probs, rewards data """
    # TODO: check if numpy array of shape (num_games, 4) is faster, each row could be output of play_game
    actions = []
    log_probs = []
    rewards = []
    observations = []
    for _ in range(num_games):
        g_obs, g_actions, g_log_probs, g_reward = play_game(env, model, model, perspective=None)
        actions.append(g_actions)
        log_probs.append(g_log_probs)
        rewards.append(g_reward)
        observations.append(g_obs)
    return observations, actions, log_probs, rewards

Expert Learning

In [3]:
def run_validation(model, val_loader, stats):
    model.eval()
    stats.reset("val_loss")
    t1 = time.perf_counter()
    with torch.no_grad():
        for i, (state, action, result) in enumerate(val_loader):
            state = state.float().to('cuda' if torch.cuda.is_available() else 'cpu')
            action = action.to('cuda' if torch.cuda.is_available() else 'cpu')
            result = result.float().to('cuda' if torch.cuda.is_available() else 'cpu')
            
            policy_output, value_output = model(state.unsqueeze(1))
            policy_loss = model.policy_loss(policy_output.squeeze(), action)
            value_loss = model.val_loss(value_output.squeeze(), result)
            
            loss = policy_loss + value_loss
            stats.update("val_loss", loss.item())
    
    print(f"Mean Validation Loss: {stats.get_average('val_loss')}, time elapsed: {time.perf_counter()-t1} seconds")
    return stats.get_average('val_loss')


def expert_study(model, dataset, percent_dataset=0.1):
    """ Trains on TCEC data in a supervised fashion (behaviour cloning)"""

    # Load random subset of dataset and split
    study_size = int(percent_dataset * len(dataset))
    random_indices = np.random.randint(0, study_size, study_size)
    study_dataset = Subset(dataset, random_indices)
    
    train_ratio = 0.9
    train_size = int(train_ratio * study_size)
    val_size = study_size - train_size
    train_dataset, val_dataset = random_split(study_dataset, [train_size, val_size])

    # Create data loaders for the training and validation sets
    train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, 
                            pin_memory=False, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, 
                            pin_memory=False, num_workers=2)

    stats = RunningAverage()
    stats.add(["train_loss", "val_loss", "train_p_loss", "train_v_loss"])

    model.train()
    t1 = time.perf_counter()
    for i, (state, action, result) in enumerate(train_loader):
        state = state.float().to(model.device)
        action = action.to(model.device)
        result = result.float().to(model.device)

        with autocast():
            policy_output, value_output = model(state.unsqueeze(1))
            policy_loss = model.policy_loss(policy_output.squeeze(), action)
            value_loss = model.val_loss(value_output.squeeze(), result)
            loss = policy_loss + value_loss
        
        # AMP with gradient clipping
        model.optimizer.zero_grad()
        model.grad_scaler.scale(loss).backward()
        model.grad_scaler.unscale_(model.optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        model.grad_scaler.step(model.optimizer)
        model.grad_scaler.update()

        stats.update({
            "train_loss": loss.item(),
            "train_p_loss": policy_loss.item(),
            "train_v_loss": value_loss.item()
            })
        
    print(f"Study Train Loss: {stats.get_average('train_loss')}")
    # wandb.log({"study_train_loss": stats.get_average('train_loss')})
    t2 = time.perf_counter()
    valid_loss = run_validation(model, val_loader, stats)
    # wandb.log({"val_loss": valid_loss, "iter": i})


Load Models

In [4]:
# Load Model
MODEL_PATH = '/home/kage/chess_workspace/simpler_SwinChessNet42069.pt'

model = Chess42069NetworkSimple(hidden_dim=256, device='cuda')
best_model = Chess42069NetworkSimple(hidden_dim=256, device='cuda')

if os.path.exists(MODEL_PATH):
    print("Loading model at: {MODEL_PATH}")
    model.load_state_dict(torch.load(MODEL_PATH))
    best_model.load_state_dict(torch.load(MODEL_PATH))

best_model.eval()

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Loading model at: {MODEL_PATH}


Chess42069NetworkSimple(
  (swin_transformer): SwinTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(1, 96, kernel_size=(1, 1), stride=(1, 1))
      (norm): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
    )
    (layers): Sequential(
      (0): SwinTransformerStage(
        (downsample): Identity()
        (blocks): Sequential(
          (0): SwinTransformerBlock(
            (norm1): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
            (attn): WindowAttention(
              (qkv): Linear(in_features=96, out_features=288, bias=True)
              (attn_drop): Dropout(p=0.0, inplace=False)
              (proj): Linear(in_features=96, out_features=96, bias=True)
              (proj_drop): Dropout(p=0.0, inplace=False)
              (softmax): Softmax(dim=-1)
            )
            (drop_path): Identity()
            (norm2): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
            (mlp): Mlp(
              (fc1): Linear(in_features=96, out_fe

Train - VPG with Self-Play and Dueling

In [5]:
# Train Params
PGN_FILE = '/home/kage/chess_workspace/PGN-data/alphazero_stockfish_all/alphazero_vs_stockfish_all.pgn'
MODEL_SAVEPATH = '/home/kage/chess_workspace/WACKY_RL_MODEL.pt'

NUM_EPOCHS = 100
STUDY_EVERY = 1 
DUEL_EVERY = 10

chess_dataset = ChessDataset(PGN_FILE)
env = gym.make('Chess-v0')

  logger.warn(


In [6]:
# def generate_rewards(game_reward, game_length, self_play = False):

# grad_scaler = GradScaler()


for i in tqdm(range(NUM_EPOCHS)):
    # Play games as white or black against the previous best model
    if i % 2 == 0:
        obs, _, log_probs, reward = play_game(env, model, best_model, perspective=chess.WHITE, sample_n=3)
    else:
        obs, _, log_probs, reward = play_game(env, best_model, model, perspective=chess.BLACK, sample_n=3)
    
    # Discount the reward backwards and update
    if reward == 1:
        rewards = list(reversed([(0.995**i)*reward for i in range(len(log_probs))]))
    else:
        rewards = [0] * len(log_probs)
    
    model.update_policy(log_probs, rewards)

    # Expert Study
    # if i % STUDY_EVERY == 0:
    if True:
        expert_study(model, chess_dataset, percent_dataset=0.05)

    # Duel to the death
    if i % DUEL_EVERY == 0:
        win_ratio = duel(env, best_model, model, num_rounds=10)
        print(f"Model win ratio: {win_ratio}")
        if win_ratio > 0.6:
            print("Best model was deafeted!")
            best_model = copy.deepcopy(model)
            torch.save(model.state_dict(), MODEL_SAVEPATH)
            best_model.eval()

    # # Self play
    # obss, actions, log_probs, reward = play_game(env, model, model, perspective=None)
    # model.update_policy(model, log_probs, rewards)

  0%|          | 0/100 [00:00<?, ?it/s]

GAME DRAWN!!
Study Train Loss: 4.31906755470935
Mean Validation Loss: 4.089915487501356, time elapsed: 0.5763793049991364 seconds
GAME WON!!
GAME WON!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME WON!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME WON!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME WON!!
GAME DRAWN!!
GAME DRAWN!!


  1%|          | 1/100 [01:29<2:27:10, 89.20s/it]

GAME DRAWN!!
Model win ratio: 0.1
GAME WON!!
Study Train Loss: 4.281427089078927


  2%|▏         | 2/100 [01:40<1:11:13, 43.61s/it]

Mean Validation Loss: 4.030757586161297, time elapsed: 0.573311432999617 seconds
GAME DRAWN!!
Study Train Loss: 4.11402584888317


  3%|▎         | 3/100 [01:56<49:34, 30.67s/it]  

Mean Validation Loss: 3.8428086704678006, time elapsed: 0.5150717810029164 seconds
GAME DRAWN!!
Study Train Loss: 3.9242285357581252


  4%|▍         | 4/100 [02:15<41:48, 26.13s/it]

Mean Validation Loss: 3.8204457759857178, time elapsed: 0.5046848749989294 seconds
GAME DRAWN!!
Study Train Loss: 3.8297720750172926


  5%|▌         | 5/100 [02:43<42:21, 26.75s/it]

Mean Validation Loss: 3.561065196990967, time elapsed: 0.5765838110019104 seconds
GAME DRAWN!!
Study Train Loss: 3.7133101857738726


  6%|▌         | 6/100 [03:10<42:25, 27.08s/it]

Mean Validation Loss: 3.6076747046576605, time elapsed: 0.6635855580025236 seconds
GAME DRAWN!!
Study Train Loss: 3.732717902572066


  7%|▋         | 7/100 [03:30<38:14, 24.67s/it]

Mean Validation Loss: 3.8381338119506836, time elapsed: 0.6656231399974786 seconds
GAME WON!!
Study Train Loss: 3.851238512698515


  8%|▊         | 8/100 [03:47<33:59, 22.17s/it]

Mean Validation Loss: 3.756683429082235, time elapsed: 0.6773788070058799 seconds
GAME DRAWN!!
Study Train Loss: 4.089818972128408


  9%|▉         | 9/100 [04:14<36:02, 23.77s/it]

Mean Validation Loss: 4.3730265564388695, time elapsed: 0.7049951380031416 seconds
GAME DRAWN!!
Study Train Loss: 4.405362514801967


 10%|█         | 10/100 [04:36<34:31, 23.02s/it]

Mean Validation Loss: 4.340428537792629, time elapsed: 0.6333502720008255 seconds
GAME WON!!
Study Train Loss: 4.601099653008544
Mean Validation Loss: 4.412098566691081, time elapsed: 0.6896796439978061 seconds
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!


 11%|█         | 11/100 [06:17<1:09:37, 46.94s/it]

GAME WON!!
Model win ratio: 0.05
GAME DRAWN!!
Study Train Loss: 4.474071217171939


 12%|█▏        | 12/100 [06:36<56:36, 38.60s/it]  

Mean Validation Loss: 4.34232227007548, time elapsed: 0.7025515610002913 seconds
GAME DRAWN!!
Study Train Loss: 4.376022162260831


 13%|█▎        | 13/100 [06:57<48:18, 33.32s/it]

Mean Validation Loss: 4.190015580919054, time elapsed: 0.73049589600123 seconds
GAME WON!!
Study Train Loss: 4.383427649368475


 14%|█▍        | 14/100 [07:10<38:55, 27.15s/it]

Mean Validation Loss: 4.255653381347656, time elapsed: 0.6611443180008791 seconds
GAME DRAWN!!
Study Train Loss: 4.269827822108327


 15%|█▌        | 15/100 [07:31<35:50, 25.30s/it]

Mean Validation Loss: 4.101264105902778, time elapsed: 0.6530303039980936 seconds
GAME WON!!
Study Train Loss: 4.3024989646158085


 16%|█▌        | 16/100 [07:43<29:51, 21.33s/it]

Mean Validation Loss: 4.1130041281382255, time elapsed: 0.6234130800003186 seconds
GAME DRAWN!!
Study Train Loss: 4.226616994834241


 17%|█▋        | 17/100 [08:03<28:36, 20.68s/it]

Mean Validation Loss: 4.076146999994914, time elapsed: 0.6300492639929871 seconds
GAME DRAWN!!
Study Train Loss: 4.231685635484295


 18%|█▊        | 18/100 [08:26<29:16, 21.43s/it]

Mean Validation Loss: 4.081437799665663, time elapsed: 0.7195842010041815 seconds
GAME DRAWN!!
Study Train Loss: 4.1899677559181505


 19%|█▉        | 19/100 [08:52<30:59, 22.96s/it]

Mean Validation Loss: 3.9958567089504666, time elapsed: 0.639725567001733 seconds
GAME DRAWN!!
Study Train Loss: 4.168882976343603


 20%|██        | 20/100 [09:19<32:08, 24.11s/it]

Mean Validation Loss: 4.123417245017158, time elapsed: 0.6524948619990028 seconds
GAME DRAWN!!
Study Train Loss: 4.093833796771956
Mean Validation Loss: 4.1003346178266735, time elapsed: 0.6699526360025629 seconds
GAME DRAWN!!
GAME DRAWN!!
GAME WON!!
GAME DRAWN!!
GAME WON!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!


 21%|██        | 21/100 [10:43<55:16, 41.98s/it]

GAME DRAWN!!
Model win ratio: 0.0
GAME DRAWN!!
Study Train Loss: 4.136401718045461


 22%|██▏       | 22/100 [11:16<51:04, 39.29s/it]

Mean Validation Loss: 3.9639554818471274, time elapsed: 0.7388132630003383 seconds
GAME DRAWN!!
Study Train Loss: 4.082086954587771


 23%|██▎       | 23/100 [11:55<50:18, 39.20s/it]

Mean Validation Loss: 3.9202361901601157, time elapsed: 0.762118468999688 seconds
GAME DRAWN!!
Study Train Loss: 4.008814682195215


 24%|██▍       | 24/100 [12:13<41:39, 32.88s/it]

Mean Validation Loss: 3.968998220231798, time elapsed: 0.5649591170003987 seconds
GAME DRAWN!!
Study Train Loss: 4.038962729183245


 25%|██▌       | 25/100 [12:44<40:32, 32.44s/it]

Mean Validation Loss: 3.7577611340416803, time elapsed: 0.6121175290027168 seconds
GAME DRAWN!!
Study Train Loss: 3.936007520298899


 26%|██▌       | 26/100 [12:58<33:08, 26.88s/it]

Mean Validation Loss: 3.9107540713416205, time elapsed: 0.6391940060057095 seconds
GAME DRAWN!!
Study Train Loss: 3.9297888426133136


 27%|██▋       | 27/100 [13:19<30:35, 25.15s/it]

Mean Validation Loss: 3.6771852705213757, time elapsed: 0.8034696440008702 seconds
GAME DRAWN!!
Study Train Loss: 3.942133835804316


 28%|██▊       | 28/100 [13:37<27:22, 22.82s/it]

Mean Validation Loss: 3.8432903024885388, time elapsed: 0.660912264000217 seconds
GAME DRAWN!!
Study Train Loss: 3.8663691238120745


 29%|██▉       | 29/100 [14:06<29:19, 24.78s/it]

Mean Validation Loss: 3.6336344612969294, time elapsed: 0.7967703550020815 seconds
GAME DRAWN!!
Study Train Loss: 3.8257728918099114


 30%|███       | 30/100 [14:33<29:38, 25.40s/it]

Mean Validation Loss: 3.9297505484686956, time elapsed: 0.6159773079998558 seconds
GAME DRAWN!!
Study Train Loss: 3.8653494104926973
Mean Validation Loss: 3.6916041374206543, time elapsed: 0.6583252700002049 seconds
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME WON!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!


 31%|███       | 31/100 [15:51<47:29, 41.29s/it]

GAME WON!!
Model win ratio: 0.1
GAME DRAWN!!
Study Train Loss: 3.768473978395816


 32%|███▏      | 32/100 [16:15<40:51, 36.05s/it]

Mean Validation Loss: 3.5593880547417536, time elapsed: 0.5590081669943174 seconds
GAME DRAWN!!
Study Train Loss: 3.733806710184356


 33%|███▎      | 33/100 [16:33<34:18, 30.72s/it]

Mean Validation Loss: 3.6530706617567272, time elapsed: 0.5459816779984976 seconds
GAME DRAWN!!
Study Train Loss: 3.771535955829385


 34%|███▍      | 34/100 [16:50<29:09, 26.50s/it]

Mean Validation Loss: 3.61055228445265, time elapsed: 0.5470019380009035 seconds
GAME DRAWN!!
Study Train Loss: 3.7677975201312397


 35%|███▌      | 35/100 [17:00<23:25, 21.63s/it]

Mean Validation Loss: 3.313575797610813, time elapsed: 0.5314979830000084 seconds
GAME DRAWN!!
Study Train Loss: 3.707277068385371


 36%|███▌      | 36/100 [17:21<22:38, 21.23s/it]

Mean Validation Loss: 3.5288211504618325, time elapsed: 0.6063991839982918 seconds
GAME WON!!
Study Train Loss: 3.6271736445250338


 37%|███▋      | 37/100 [17:32<19:04, 18.16s/it]

Mean Validation Loss: 3.6398036744859485, time elapsed: 0.54029311100021 seconds
GAME WON!!
Study Train Loss: 3.6421766163390354


 38%|███▊      | 38/100 [17:40<15:47, 15.29s/it]

Mean Validation Loss: 3.542807181676229, time elapsed: 0.6138969499952509 seconds
GAME DRAWN!!
Study Train Loss: 3.5927691753999693


 39%|███▉      | 39/100 [17:53<14:44, 14.50s/it]

Mean Validation Loss: 3.5117511219448514, time elapsed: 0.5785063750008703 seconds
GAME WON!!
Study Train Loss: 3.6380597249961193


 40%|████      | 40/100 [18:05<13:53, 13.90s/it]

Mean Validation Loss: 3.3470612631903753, time elapsed: 0.6364815650013043 seconds
GAME WON!!
Study Train Loss: 3.5310141657605585
Mean Validation Loss: 3.431538661321005, time elapsed: 0.7313856740001938 seconds
GAME DRAWN!!
GAME WON!!
GAME DRAWN!!
GAME DRAWN!!
GAME WON!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME WON!!
GAME WON!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!


 41%|████      | 41/100 [19:30<34:25, 35.01s/it]

GAME WON!!
Model win ratio: 0.15
GAME DRAWN!!
Study Train Loss: 3.5393664807449152


 42%|████▏     | 42/100 [19:54<30:38, 31.71s/it]

Mean Validation Loss: 3.4111664030287003, time elapsed: 0.7907685530008166 seconds
GAME DRAWN!!
Study Train Loss: 3.532204828144592


 43%|████▎     | 43/100 [20:06<24:40, 25.97s/it]

Mean Validation Loss: 3.3257098727756076, time elapsed: 0.804274827001791 seconds
GAME WON!!
Study Train Loss: 3.50891261924932


 44%|████▍     | 44/100 [20:19<20:32, 22.01s/it]

Mean Validation Loss: 3.15462244881524, time elapsed: 0.7427437939986703 seconds
GAME DRAWN!!
Study Train Loss: 3.4791917771468928


 45%|████▌     | 45/100 [20:51<22:54, 24.99s/it]

Mean Validation Loss: 3.250693294737074, time elapsed: 0.7002988169988384 seconds
GAME DRAWN!!
Study Train Loss: 3.4813458801787585


 46%|████▌     | 46/100 [21:11<21:08, 23.50s/it]

Mean Validation Loss: 3.5787561734517417, time elapsed: 0.8337386050043278 seconds
GAME DRAWN!!
Study Train Loss: 3.4785122783095748


 47%|████▋     | 47/100 [21:36<21:16, 24.09s/it]

Mean Validation Loss: 3.2325797080993652, time elapsed: 0.598090093000792 seconds
GAME DRAWN!!
Study Train Loss: 3.4606732497980564


 48%|████▊     | 48/100 [22:05<21:55, 25.31s/it]

Mean Validation Loss: 3.2344265249040394, time elapsed: 0.5644815470004687 seconds
GAME DRAWN!!
Study Train Loss: 3.434528094750864


 49%|████▉     | 49/100 [22:21<19:21, 22.77s/it]

Mean Validation Loss: 3.2852923605177136, time elapsed: 0.6287653819963452 seconds
GAME WON!!
Study Train Loss: 3.382026130770457


 50%|█████     | 50/100 [22:37<17:14, 20.69s/it]

Mean Validation Loss: 3.187950902514988, time elapsed: 0.6192941660046927 seconds
GAME DRAWN!!
Study Train Loss: 3.306833791144101
Mean Validation Loss: 3.0728099346160893, time elapsed: 0.6350240200044937 seconds
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME WON!!
GAME DRAWN!!
GAME DRAWN!!


 51%|█████     | 51/100 [24:13<35:22, 43.31s/it]

GAME DRAWN!!
Model win ratio: 0.0
GAME DRAWN!!
Study Train Loss: 3.437899845617788


 52%|█████▏    | 52/100 [24:34<29:18, 36.64s/it]

Mean Validation Loss: 3.0887071821424694, time elapsed: 0.6436160869998275 seconds
GAME DRAWN!!
Study Train Loss: 3.363352163338367


 53%|█████▎    | 53/100 [24:49<23:34, 30.09s/it]

Mean Validation Loss: 3.146557569503784, time elapsed: 0.6254166060025454 seconds
GAME DRAWN!!
Study Train Loss: 3.3070176883980076


 54%|█████▍    | 54/100 [25:16<22:18, 29.10s/it]

Mean Validation Loss: 3.10556607776218, time elapsed: 0.6230615410022438 seconds
GAME DRAWN!!
Study Train Loss: 3.329554066245937


 55%|█████▌    | 55/100 [25:35<19:38, 26.19s/it]

Mean Validation Loss: 3.17511608865526, time elapsed: 0.6282673519963282 seconds
GAME WON!!
Study Train Loss: 3.2408355194845315


 56%|█████▌    | 56/100 [25:46<15:52, 21.65s/it]

Mean Validation Loss: 3.217780696021186, time elapsed: 0.6709776529969531 seconds
GAME DRAWN!!
Study Train Loss: 3.2506632186748363


 57%|█████▋    | 57/100 [26:07<15:18, 21.36s/it]

Mean Validation Loss: 3.009931961695353, time elapsed: 0.7143355210064328 seconds
GAME DRAWN!!
Study Train Loss: 3.2399378970817283


 58%|█████▊    | 58/100 [26:28<14:55, 21.31s/it]

Mean Validation Loss: 3.192462205886841, time elapsed: 0.7386746769989259 seconds
GAME DRAWN!!
Study Train Loss: 3.219036008104866


 59%|█████▉    | 59/100 [26:51<14:46, 21.62s/it]

Mean Validation Loss: 3.027945068147447, time elapsed: 0.5246492560036131 seconds
GAME DRAWN!!
Study Train Loss: 3.2360381020439997


 60%|██████    | 60/100 [27:14<14:49, 22.24s/it]

Mean Validation Loss: 3.1365944014655214, time elapsed: 0.5979850430012448 seconds
GAME DRAWN!!
Study Train Loss: 3.23448779259199
Mean Validation Loss: 2.964666578504774, time elapsed: 0.6428910380054731 seconds
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME WON!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME WON!!
GAME DRAWN!!
GAME WON!!
GAME DRAWN!!
GAME DRAWN!!


 61%|██████    | 61/100 [28:41<26:56, 41.44s/it]

GAME DRAWN!!
Model win ratio: 0.05
GAME DRAWN!!
Study Train Loss: 3.2078578060055953


 62%|██████▏   | 62/100 [28:54<20:56, 33.07s/it]

Mean Validation Loss: 3.299382209777832, time elapsed: 0.5557886070018867 seconds
GAME DRAWN!!
Study Train Loss: 3.1276647367595167


 63%|██████▎   | 63/100 [29:10<17:12, 27.90s/it]

Mean Validation Loss: 3.0902777512868247, time elapsed: 0.5793064530007541 seconds
GAME WON!!
Study Train Loss: 3.1261061827341705


 64%|██████▍   | 64/100 [29:20<13:28, 22.45s/it]

Mean Validation Loss: 2.928077644772, time elapsed: 0.5785120099972119 seconds
GAME DRAWN!!
Study Train Loss: 3.111450739848761


 65%|██████▌   | 65/100 [29:39<12:33, 21.52s/it]

Mean Validation Loss: 2.9420832792917886, time elapsed: 0.3006833679974079 seconds
GAME DRAWN!!
Study Train Loss: 3.111619395974242


 66%|██████▌   | 66/100 [30:01<12:11, 21.51s/it]

Mean Validation Loss: 2.7914616796705456, time elapsed: 0.4995485350009403 seconds
GAME DRAWN!!
Study Train Loss: 3.125414038881843


 67%|██████▋   | 67/100 [30:15<10:38, 19.35s/it]

Mean Validation Loss: 3.0079349411858454, time elapsed: 0.5837266919988906 seconds
GAME DRAWN!!
Study Train Loss: 3.038360734044769


 68%|██████▊   | 68/100 [30:40<11:15, 21.12s/it]

Mean Validation Loss: 2.9303864902920194, time elapsed: 0.5517666460000328 seconds
GAME DRAWN!!
Study Train Loss: 3.0730804484567518


 69%|██████▉   | 69/100 [30:55<10:00, 19.36s/it]

Mean Validation Loss: 2.875299745135837, time elapsed: 0.54984855800285 seconds
GAME DRAWN!!
Study Train Loss: 3.0209083380522563


 70%|███████   | 70/100 [31:11<09:08, 18.28s/it]

Mean Validation Loss: 3.0406245390574136, time elapsed: 0.5316947649989743 seconds
GAME DRAWN!!
Study Train Loss: 3.0590053899788563
Mean Validation Loss: 2.932081937789917, time elapsed: 0.5763289320020704 seconds
GAME DRAWN!!
GAME DRAWN!!
GAME WON!!
GAME WON!!
GAME DRAWN!!
GAME WON!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME WON!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME WON!!


 71%|███████   | 71/100 [32:32<17:57, 37.15s/it]

GAME WON!!
Model win ratio: 0.2
GAME WON!!
Study Train Loss: 3.032985163323673


 72%|███████▏  | 72/100 [32:44<13:43, 29.43s/it]

Mean Validation Loss: 2.810455057356093, time elapsed: 0.6297539070001221 seconds
GAME DRAWN!!
Study Train Loss: 2.9952516379179777


 73%|███████▎  | 73/100 [32:54<10:38, 23.65s/it]

Mean Validation Loss: 2.9641937414805093, time elapsed: 0.691476659005275 seconds
GAME DRAWN!!
Study Train Loss: 2.977288458082411


 74%|███████▍  | 74/100 [33:16<10:01, 23.14s/it]

Mean Validation Loss: 2.726882881588406, time elapsed: 0.6695245199953206 seconds
GAME DRAWN!!
Study Train Loss: 2.991537205966902


 75%|███████▌  | 75/100 [33:40<09:48, 23.52s/it]

Mean Validation Loss: 2.876691155963474, time elapsed: 0.6499478690020624 seconds
GAME WON!!
Study Train Loss: 2.9280013361094914


 76%|███████▌  | 76/100 [33:58<08:42, 21.79s/it]

Mean Validation Loss: 2.84123248524136, time elapsed: 0.7046052169971517 seconds
GAME DRAWN!!
Study Train Loss: 2.930615745944742


 77%|███████▋  | 77/100 [34:23<08:45, 22.86s/it]

Mean Validation Loss: 2.6840368111928306, time elapsed: 0.6339149900013581 seconds
GAME DRAWN!!
Study Train Loss: 2.9177232200716765


 78%|███████▊  | 78/100 [34:49<08:44, 23.83s/it]

Mean Validation Loss: 2.6756590207417807, time elapsed: 0.6669480670025223 seconds
GAME DRAWN!!
Study Train Loss: 2.906307750278048


 79%|███████▉  | 79/100 [35:07<07:42, 22.00s/it]

Mean Validation Loss: 2.7790476746029324, time elapsed: 0.5885795220019645 seconds
GAME DRAWN!!
Study Train Loss: 2.8317406560167844


 80%|████████  | 80/100 [35:32<07:39, 23.00s/it]

Mean Validation Loss: 2.636542664633857, time elapsed: 0.6727622889957274 seconds
GAME DRAWN!!
Study Train Loss: 2.89491938072958
Mean Validation Loss: 2.7820810741848416, time elapsed: 0.7031255740002962 seconds
GAME DRAWN!!
GAME WON!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME WON!!
GAME DRAWN!!
GAME WON!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!


 81%|████████  | 81/100 [37:31<16:21, 51.65s/it]

GAME WON!!
Model win ratio: 0.2
GAME WON!!
Study Train Loss: 2.865816160484597


 82%|████████▏ | 82/100 [37:41<11:46, 39.27s/it]

Mean Validation Loss: 2.6466915607452393, time elapsed: 0.6950926060017082 seconds
GAME DRAWN!!
Study Train Loss: 2.825404523331442


 83%|████████▎ | 83/100 [37:53<08:45, 30.90s/it]

Mean Validation Loss: 2.638578176498413, time elapsed: 0.7028089579980588 seconds
GAME WON!!
Study Train Loss: 2.7850247871728584


 84%|████████▍ | 84/100 [38:00<06:23, 23.95s/it]

Mean Validation Loss: 2.7046921253204346, time elapsed: 0.6889952509955037 seconds
GAME WON!!
Study Train Loss: 2.8197515584804385


 85%|████████▌ | 85/100 [38:11<04:58, 19.92s/it]

Mean Validation Loss: 2.873505062527127, time elapsed: 0.7010876709973672 seconds
GAME DRAWN!!
Study Train Loss: 2.883064908745848


 86%|████████▌ | 86/100 [38:32<04:45, 20.36s/it]

Mean Validation Loss: 2.751083427005344, time elapsed: 0.7032586759960395 seconds
GAME DRAWN!!
Study Train Loss: 2.8338919598379246


 87%|████████▋ | 87/100 [38:56<04:38, 21.44s/it]

Mean Validation Loss: 2.6160704029930963, time elapsed: 0.7074426070030313 seconds
GAME DRAWN!!
Study Train Loss: 2.750749952999162


 88%|████████▊ | 88/100 [39:14<04:03, 20.26s/it]

Mean Validation Loss: 2.6705658965640597, time elapsed: 0.6999074999985169 seconds
GAME DRAWN!!
Study Train Loss: 2.834833392390499


 89%|████████▉ | 89/100 [39:41<04:05, 22.28s/it]

Mean Validation Loss: 2.367328855726454, time elapsed: 0.6823768370013568 seconds
GAME DRAWN!!
Study Train Loss: 2.6765351604532315


 90%|█████████ | 90/100 [40:06<03:52, 23.22s/it]

Mean Validation Loss: 2.5924169222513833, time elapsed: 0.6553051950031659 seconds
GAME WON!!
Study Train Loss: 2.732386318253882
Mean Validation Loss: 2.744493007659912, time elapsed: 0.7191358440031763 seconds
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!
GAME WON!!
GAME DRAWN!!
GAME DRAWN!!
GAME DRAWN!!


 91%|█████████ | 91/100 [41:45<06:53, 45.97s/it]

GAME DRAWN!!
Model win ratio: 0.05
GAME DRAWN!!
Study Train Loss: 2.789892017105479


 92%|█████████▏| 92/100 [42:05<05:05, 38.21s/it]

Mean Validation Loss: 2.4997832245296903, time elapsed: 0.6104411119958968 seconds
GAME DRAWN!!
Study Train Loss: 2.7466884130313067


 93%|█████████▎| 93/100 [42:25<03:48, 32.60s/it]

Mean Validation Loss: 2.567388508054945, time elapsed: 0.8197006469999906 seconds
GAME DRAWN!!
Study Train Loss: 2.694831071076569


 94%|█████████▍| 94/100 [42:54<03:08, 31.47s/it]

Mean Validation Loss: 2.4596804512871633, time elapsed: 0.7396834360042703 seconds
GAME DRAWN!!
Study Train Loss: 2.6580091552969867


 95%|█████████▌| 95/100 [43:16<02:23, 28.73s/it]

Mean Validation Loss: 2.595542430877685, time elapsed: 0.7861987750002299 seconds
GAME DRAWN!!
Study Train Loss: 2.669498863043608


 96%|█████████▌| 96/100 [43:40<01:49, 27.36s/it]

Mean Validation Loss: 2.5999159945382013, time elapsed: 0.7055649530011578 seconds
GAME WON!!
Study Train Loss: 2.7157588476016197


 97%|█████████▋| 97/100 [43:53<01:08, 22.93s/it]

Mean Validation Loss: 2.345960643556383, time elapsed: 0.6949995739996666 seconds
GAME WON!!
Study Train Loss: 2.6276948569733407


 98%|█████████▊| 98/100 [44:04<00:39, 19.55s/it]

Mean Validation Loss: 2.4579917589823403, time elapsed: 0.7240242990010302 seconds
GAME DRAWN!!
Study Train Loss: 2.61080711123384


 99%|█████████▉| 99/100 [44:18<00:17, 17.75s/it]

Mean Validation Loss: 2.5171647601657443, time elapsed: 0.7098035700037144 seconds
GAME DRAWN!!
Study Train Loss: 2.6274128033791055


100%|██████████| 100/100 [44:44<00:00, 26.84s/it]

Mean Validation Loss: 2.344901031917996, time elapsed: 0.7098856519951369 seconds



