## Build Model from weights files

In [1]:
## To run adversey we need model file not the weight file, so first we have to convert them

from rnn_learner import RNNAgent

# Inferred from weights file
n_actions = 2
state_size = 0
n_cells = 5

#learner_model_path = 'trained_model/RNN_learner_single/gemini/cells_5_gemini/model-49900.weights.h5'
learner_model_path = 'trained_model/RNN_learner_single/cells_5gpt/model-10000.weights.h5'
agent = RNNAgent(n_actions=n_actions, state_size=state_size, n_cells=n_cells, model_path=learner_model_path)
model = agent.model
model.summary()

model.save("trained_model/RNN_learner_single/cells_5gpt/model-10000.weights_final.h5")

2025-07-16 00:14:49,719 - ADV - DEBUG - Weights loaded from trained_model/RNN_learner_single/cells_5gpt/model-10000.weights.h5
2025-07-16 00:14:49,720 - ADV - DEBUG - Model created with 2 actions and 5 cells




## Training Adversery

In [2]:
import os
import json
import h5py
import numpy as np
import multiprocessing

from tensorflow.keras.models import load_model, Model
from learner_env import LearnverEnv
from ddqn import DQNAgent
from util.logger import LogFile, DLogger


def load_h5_model_patch_input_layer(path):
    with h5py.File(path, 'r+') as f:
        model_config = f.attrs.get('model_config')
        if model_config is None:
            raise ValueError("No model config found in file.")
        if isinstance(model_config, bytes):
            model_config = model_config.decode('utf-8')
        model_config_json = json.loads(model_config)

        for layer in model_config_json['config']['layers']:
            config = layer['config']
            if 'batch_shape' in config:
                config['batch_input_shape'] = config.pop('batch_shape')

        f.attrs.modify('model_config', json.dumps(model_config_json).encode('utf-8'))

    return load_model(path, compile=False, custom_objects={'Functional': Model})


configs = [{'b': b, 'lr': lr, 'eps': eps}
           for b in [200000, 400000]
           for lr in [0.001, 0.0001, 1e-5]
           for eps in [0.01, 0.1, 0.2]]


def run_adv():
    np.set_printoptions(precision=3)

    buf = configs[9]['b']
    lr = configs[9]['lr']
    eps = configs[9]['eps']

    learner_model_path = 'trained_model/RNN_learner_single/cells_5gpt/model-10000.weights_final.h5'

    # ✅ Construct a clean absolute output path
    base_dir = os.getcwd()
    output_path = os.path.join(base_dir, 'trained_model', f'test_adv_RL_{buf}_eps_{eps}_lr_{lr}', 'gpt')
    os.makedirs(output_path, exist_ok=True)
    print(f'[INFO] Output path: {output_path}')

    with LogFile(output_path, 'run.log'):
        DLogger.logger().debug("Learner model loaded from path {}".format(learner_model_path))
        model = load_h5_model_patch_input_layer(learner_model_path)

        print(model.summary())  # Optional: can be removed if logging is too large

        le = LearnverEnv(model, 2, 1000)
        le.reset()

        # ✅ Create agent and pass correct path
        agent = DQNAgent(
            state_size=le.observation_space.shape[0],
            action_size=4,
            buffer_size=buf,
            epsilon=eps,
            lr=lr
        )

        agent.train(env=le, output_path=output_path, batch_size=1000, total_episodes=int(1e10))


def run(f, n_proc, chunk):
    p = multiprocessing.Pool(n_proc)
    start = min(len(configs), (chunk - 1) * n_proc)
    end = min(len(configs), chunk * n_proc)
    p.map(f, range(start, end))
    p.close()
    p.join()


if __name__ == '__main__':
    run_adv()


[INFO] Output path: C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt


2025-07-16 00:17:06,630 - ADV - DEBUG - version control: (None, None)
DEBUG:ADV:version control: (None, None)
2025-07-16 00:17:06,633 - ADV - DEBUG - Learner model loaded from path trained_model/RNN_learner_single/cells_5gpt/model-10000.weights_final.h5
DEBUG:ADV:Learner model loaded from path trained_model/RNN_learner_single/cells_5gpt/model-10000.weights_final.h5


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


None


2025-07-16 00:17:07,017 - ADV - DEBUG - Trained model saved to: C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-0.keras
DEBUG:ADV:Trained model saved to: C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-0.keras


dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-0.keras


2025-07-16 00:17:13,584 - ADV - DEBUG - episode: 99/10000000000, score: 0, e: 0.01, reward: 29.527, seudo-rew:0.0 max: 0.8500117659568787 init-act: [0.392 1.421 0.522 0.458]
DEBUG:ADV:episode: 99/10000000000, score: 0, e: 0.01, reward: 29.527, seudo-rew:0.0 max: 0.8500117659568787 init-act: [0.392 1.421 0.522 0.458]
2025-07-16 00:17:19,853 - ADV - DEBUG - episode: 199/10000000000, score: 0, e: 0.01, reward: 30.265, seudo-rew:0.0 max: 1.3253772258758545 init-act: [0.786 2.172 1.09  0.997]
DEBUG:ADV:episode: 199/10000000000, score: 0, e: 0.01, reward: 30.265, seudo-rew:0.0 max: 1.3253772258758545 init-act: [0.786 2.172 1.09  0.997]
2025-07-16 00:17:25,647 - ADV - DEBUG - episode: 299/10000000000, score: 0, e: 0.01, reward: 30.838, seudo-rew:0.0 max: 1.9609593152999878 init-act: [1.298 2.854 1.601 1.569]
DEBUG:ADV:episode: 299/10000000000, score: 0, e: 0.01, reward: 30.838, seudo-rew:0.0 max: 1.9609593152999878 init-act: [1.298 2.854 1.601 1.569]
2025-07-16 00:17:31,443 - ADV - DEBUG - ep

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-2000.keras


2025-07-16 00:20:26,528 - ADV - DEBUG - episode: 2099/10000000000, score: 0, e: 0.01, reward: 42.164, seudo-rew:0.0 max: 19.800533294677734 init-act: [14.395 20.504 13.681 19.311]
DEBUG:ADV:episode: 2099/10000000000, score: 0, e: 0.01, reward: 42.164, seudo-rew:0.0 max: 19.800533294677734 init-act: [14.395 20.504 13.681 19.311]
2025-07-16 00:20:35,089 - ADV - DEBUG - episode: 2199/10000000000, score: 0, e: 0.01, reward: 40.032, seudo-rew:0.0 max: 20.926790237426758 init-act: [14.526 21.623 14.331 20.221]
DEBUG:ADV:episode: 2199/10000000000, score: 0, e: 0.01, reward: 40.032, seudo-rew:0.0 max: 20.926790237426758 init-act: [14.526 21.623 14.331 20.221]
2025-07-16 00:20:43,822 - ADV - DEBUG - episode: 2299/10000000000, score: 0, e: 0.01, reward: 41.224, seudo-rew:0.0 max: 21.94673728942871 init-act: [15.262 22.607 15.076 21.563]
DEBUG:ADV:episode: 2299/10000000000, score: 0, e: 0.01, reward: 41.224, seudo-rew:0.0 max: 21.94673728942871 init-act: [15.262 22.607 15.076 21.563]
2025-07-16 0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-4000.keras


2025-07-16 00:23:34,776 - ADV - DEBUG - episode: 4099/10000000000, score: 0, e: 0.01, reward: 42.445, seudo-rew:0.0 max: 33.93815231323242 init-act: [19.059 34.449 18.375 35.   ]
DEBUG:ADV:episode: 4099/10000000000, score: 0, e: 0.01, reward: 42.445, seudo-rew:0.0 max: 33.93815231323242 init-act: [19.059 34.449 18.375 35.   ]
2025-07-16 00:23:45,152 - ADV - DEBUG - episode: 4199/10000000000, score: 0, e: 0.01, reward: 42.595, seudo-rew:0.0 max: 34.488555908203125 init-act: [19.095 34.764 18.758 35.488]
DEBUG:ADV:episode: 4199/10000000000, score: 0, e: 0.01, reward: 42.595, seudo-rew:0.0 max: 34.488555908203125 init-act: [19.095 34.764 18.758 35.488]
2025-07-16 00:23:53,835 - ADV - DEBUG - episode: 4299/10000000000, score: 0, e: 0.01, reward: 43.112, seudo-rew:0.0 max: 34.83128356933594 init-act: [19.52  35.164 19.346 35.796]
DEBUG:ADV:episode: 4299/10000000000, score: 0, e: 0.01, reward: 43.112, seudo-rew:0.0 max: 34.83128356933594 init-act: [19.52  35.164 19.346 35.796]
2025-07-16 00:

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-6000.keras


2025-07-16 00:26:50,545 - ADV - DEBUG - episode: 6099/10000000000, score: 0, e: 0.01, reward: 44.23, seudo-rew:0.0 max: 38.25739288330078 init-act: [22.184 39.141 22.849 39.058]
DEBUG:ADV:episode: 6099/10000000000, score: 0, e: 0.01, reward: 44.23, seudo-rew:0.0 max: 38.25739288330078 init-act: [22.184 39.141 22.849 39.058]
2025-07-16 00:27:00,454 - ADV - DEBUG - episode: 6199/10000000000, score: 0, e: 0.01, reward: 44.028, seudo-rew:0.0 max: 38.52640151977539 init-act: [22.704 39.27  23.076 39.142]
DEBUG:ADV:episode: 6199/10000000000, score: 0, e: 0.01, reward: 44.028, seudo-rew:0.0 max: 38.52640151977539 init-act: [22.704 39.27  23.076 39.142]
2025-07-16 00:27:10,341 - ADV - DEBUG - episode: 6299/10000000000, score: 0, e: 0.01, reward: 44.051, seudo-rew:0.0 max: 38.634910583496094 init-act: [22.861 39.692 22.986 39.474]
DEBUG:ADV:episode: 6299/10000000000, score: 0, e: 0.01, reward: 44.051, seudo-rew:0.0 max: 38.634910583496094 init-act: [22.861 39.692 22.986 39.474]
2025-07-16 00:27

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-8000.keras


2025-07-16 00:29:59,934 - ADV - DEBUG - episode: 8099/10000000000, score: 0, e: 0.01, reward: 44.493, seudo-rew:0.0 max: 42.02609634399414 init-act: [26.492 43.087 26.291 42.91 ]
DEBUG:ADV:episode: 8099/10000000000, score: 0, e: 0.01, reward: 44.493, seudo-rew:0.0 max: 42.02609634399414 init-act: [26.492 43.087 26.291 42.91 ]
2025-07-16 00:30:10,831 - ADV - DEBUG - episode: 8199/10000000000, score: 0, e: 0.01, reward: 44.702, seudo-rew:0.0 max: 42.093177795410156 init-act: [27.17  42.727 26.301 42.685]
DEBUG:ADV:episode: 8199/10000000000, score: 0, e: 0.01, reward: 44.702, seudo-rew:0.0 max: 42.093177795410156 init-act: [27.17  42.727 26.301 42.685]
2025-07-16 00:30:20,341 - ADV - DEBUG - episode: 8299/10000000000, score: 0, e: 0.01, reward: 44.525, seudo-rew:0.0 max: 42.27355194091797 init-act: [27.255 43.358 26.423 43.272]
DEBUG:ADV:episode: 8299/10000000000, score: 0, e: 0.01, reward: 44.525, seudo-rew:0.0 max: 42.27355194091797 init-act: [27.255 43.358 26.423 43.272]
2025-07-16 00:

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-10000.keras


2025-07-16 00:33:06,626 - ADV - DEBUG - episode: 10099/10000000000, score: 0, e: 0.01, reward: 43.971, seudo-rew:0.0 max: 44.550960540771484 init-act: [28.108 45.49  27.957 45.524]
DEBUG:ADV:episode: 10099/10000000000, score: 0, e: 0.01, reward: 43.971, seudo-rew:0.0 max: 44.550960540771484 init-act: [28.108 45.49  27.957 45.524]
2025-07-16 00:33:14,783 - ADV - DEBUG - episode: 10199/10000000000, score: 0, e: 0.01, reward: 44.482, seudo-rew:0.0 max: 44.72782897949219 init-act: [28.267 45.552 28.057 45.644]
DEBUG:ADV:episode: 10199/10000000000, score: 0, e: 0.01, reward: 44.482, seudo-rew:0.0 max: 44.72782897949219 init-act: [28.267 45.552 28.057 45.644]
2025-07-16 00:33:23,005 - ADV - DEBUG - episode: 10299/10000000000, score: 0, e: 0.01, reward: 44.324, seudo-rew:0.0 max: 44.73317337036133 init-act: [28.779 45.646 27.963 45.663]
DEBUG:ADV:episode: 10299/10000000000, score: 0, e: 0.01, reward: 44.324, seudo-rew:0.0 max: 44.73317337036133 init-act: [28.779 45.646 27.963 45.663]
2025-07-

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-12000.keras


2025-07-16 00:36:02,147 - ADV - DEBUG - episode: 12099/10000000000, score: 0, e: 0.01, reward: 44.809, seudo-rew:0.0 max: 46.094688415527344 init-act: [28.514 46.961 28.514 46.855]
DEBUG:ADV:episode: 12099/10000000000, score: 0, e: 0.01, reward: 44.809, seudo-rew:0.0 max: 46.094688415527344 init-act: [28.514 46.961 28.514 46.855]
2025-07-16 00:36:10,560 - ADV - DEBUG - episode: 12199/10000000000, score: 0, e: 0.01, reward: 45.165, seudo-rew:0.0 max: 46.00224304199219 init-act: [28.393 46.568 28.162 46.633]
DEBUG:ADV:episode: 12199/10000000000, score: 0, e: 0.01, reward: 45.165, seudo-rew:0.0 max: 46.00224304199219 init-act: [28.393 46.568 28.162 46.633]
2025-07-16 00:36:19,038 - ADV - DEBUG - episode: 12299/10000000000, score: 0, e: 0.01, reward: 45.072, seudo-rew:0.0 max: 45.948570251464844 init-act: [28.143 47.034 28.064 46.888]
DEBUG:ADV:episode: 12299/10000000000, score: 0, e: 0.01, reward: 45.072, seudo-rew:0.0 max: 45.948570251464844 init-act: [28.143 47.034 28.064 46.888]
2025-0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-14000.keras


2025-07-16 00:38:50,955 - ADV - DEBUG - episode: 14099/10000000000, score: 0, e: 0.01, reward: 44.901, seudo-rew:0.0 max: 46.58794021606445 init-act: [28.238 46.538 28.105 46.761]
DEBUG:ADV:episode: 14099/10000000000, score: 0, e: 0.01, reward: 44.901, seudo-rew:0.0 max: 46.58794021606445 init-act: [28.238 46.538 28.105 46.761]
2025-07-16 00:39:01,464 - ADV - DEBUG - episode: 14199/10000000000, score: 0, e: 0.01, reward: 45.347, seudo-rew:0.0 max: 46.69413375854492 init-act: [28.344 47.633 28.295 47.449]
DEBUG:ADV:episode: 14199/10000000000, score: 0, e: 0.01, reward: 45.347, seudo-rew:0.0 max: 46.69413375854492 init-act: [28.344 47.633 28.295 47.449]
2025-07-16 00:39:10,263 - ADV - DEBUG - episode: 14299/10000000000, score: 0, e: 0.01, reward: 45.187, seudo-rew:0.0 max: 46.638587951660156 init-act: [28.148 47.133 28.104 46.829]
DEBUG:ADV:episode: 14299/10000000000, score: 0, e: 0.01, reward: 45.187, seudo-rew:0.0 max: 46.638587951660156 init-act: [28.148 47.133 28.104 46.829]
2025-07-

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-16000.keras


2025-07-16 00:41:49,645 - ADV - DEBUG - episode: 16099/10000000000, score: 0, e: 0.01, reward: 45.243, seudo-rew:0.0 max: 46.00700759887695 init-act: [28.496 47.144 28.162 46.8  ]
DEBUG:ADV:episode: 16099/10000000000, score: 0, e: 0.01, reward: 45.243, seudo-rew:0.0 max: 46.00700759887695 init-act: [28.496 47.144 28.162 46.8  ]
2025-07-16 00:41:57,738 - ADV - DEBUG - episode: 16199/10000000000, score: 0, e: 0.01, reward: 45.679, seudo-rew:0.0 max: 46.00068283081055 init-act: [28.435 46.824 28.277 46.664]
DEBUG:ADV:episode: 16199/10000000000, score: 0, e: 0.01, reward: 45.679, seudo-rew:0.0 max: 46.00068283081055 init-act: [28.435 46.824 28.277 46.664]
2025-07-16 00:42:05,859 - ADV - DEBUG - episode: 16299/10000000000, score: 0, e: 0.01, reward: 45.305, seudo-rew:0.0 max: 46.2138786315918 init-act: [28.335 46.892 28.297 47.29 ]
DEBUG:ADV:episode: 16299/10000000000, score: 0, e: 0.01, reward: 45.305, seudo-rew:0.0 max: 46.2138786315918 init-act: [28.335 46.892 28.297 47.29 ]
2025-07-16 0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-18000.keras


2025-07-16 00:44:51,026 - ADV - DEBUG - episode: 18099/10000000000, score: 0, e: 0.01, reward: 45.786, seudo-rew:0.0 max: 46.51250457763672 init-act: [28.311 47.064 29.125 47.477]
DEBUG:ADV:episode: 18099/10000000000, score: 0, e: 0.01, reward: 45.786, seudo-rew:0.0 max: 46.51250457763672 init-act: [28.311 47.064 29.125 47.477]
2025-07-16 00:45:00,192 - ADV - DEBUG - episode: 18199/10000000000, score: 0, e: 0.01, reward: 46.075, seudo-rew:0.0 max: 46.57267761230469 init-act: [28.332 47.383 28.994 47.691]
DEBUG:ADV:episode: 18199/10000000000, score: 0, e: 0.01, reward: 46.075, seudo-rew:0.0 max: 46.57267761230469 init-act: [28.332 47.383 28.994 47.691]
2025-07-16 00:45:09,351 - ADV - DEBUG - episode: 18299/10000000000, score: 0, e: 0.01, reward: 45.465, seudo-rew:0.0 max: 46.67726135253906 init-act: [28.414 47.556 28.865 47.941]
DEBUG:ADV:episode: 18299/10000000000, score: 0, e: 0.01, reward: 45.465, seudo-rew:0.0 max: 46.67726135253906 init-act: [28.414 47.556 28.865 47.941]
2025-07-16

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-20000.keras


2025-07-16 00:47:57,827 - ADV - DEBUG - episode: 20099/10000000000, score: 0, e: 0.01, reward: 46.103, seudo-rew:0.0 max: 46.71207046508789 init-act: [28.321 47.747 28.225 47.477]
DEBUG:ADV:episode: 20099/10000000000, score: 0, e: 0.01, reward: 46.103, seudo-rew:0.0 max: 46.71207046508789 init-act: [28.321 47.747 28.225 47.477]
2025-07-16 00:48:07,055 - ADV - DEBUG - episode: 20199/10000000000, score: 0, e: 0.01, reward: 45.907, seudo-rew:0.0 max: 46.835105895996094 init-act: [28.325 47.551 28.318 47.246]
DEBUG:ADV:episode: 20199/10000000000, score: 0, e: 0.01, reward: 45.907, seudo-rew:0.0 max: 46.835105895996094 init-act: [28.325 47.551 28.318 47.246]
2025-07-16 00:48:16,295 - ADV - DEBUG - episode: 20299/10000000000, score: 0, e: 0.01, reward: 46.013, seudo-rew:0.0 max: 46.89080047607422 init-act: [27.954 46.908 28.043 46.868]
DEBUG:ADV:episode: 20299/10000000000, score: 0, e: 0.01, reward: 46.013, seudo-rew:0.0 max: 46.89080047607422 init-act: [27.954 46.908 28.043 46.868]
2025-07-

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-22000.keras


2025-07-16 00:50:53,227 - ADV - DEBUG - episode: 22099/10000000000, score: 0, e: 0.01, reward: 45.991, seudo-rew:0.0 max: 46.878807067871094 init-act: [28.353 47.996 28.189 47.962]
DEBUG:ADV:episode: 22099/10000000000, score: 0, e: 0.01, reward: 45.991, seudo-rew:0.0 max: 46.878807067871094 init-act: [28.353 47.996 28.189 47.962]
2025-07-16 00:51:01,796 - ADV - DEBUG - episode: 22199/10000000000, score: 0, e: 0.01, reward: 45.983, seudo-rew:0.0 max: 46.758975982666016 init-act: [28.168 47.664 27.949 47.724]
DEBUG:ADV:episode: 22199/10000000000, score: 0, e: 0.01, reward: 45.983, seudo-rew:0.0 max: 46.758975982666016 init-act: [28.168 47.664 27.949 47.724]
2025-07-16 00:51:10,386 - ADV - DEBUG - episode: 22299/10000000000, score: 0, e: 0.01, reward: 46.064, seudo-rew:0.0 max: 46.831485748291016 init-act: [28.151 47.258 27.919 47.512]
DEBUG:ADV:episode: 22299/10000000000, score: 0, e: 0.01, reward: 46.064, seudo-rew:0.0 max: 46.831485748291016 init-act: [28.151 47.258 27.919 47.512]
2025

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-24000.keras


2025-07-16 00:53:39,338 - ADV - DEBUG - episode: 24099/10000000000, score: 0, e: 0.01, reward: 46.278, seudo-rew:0.0 max: 46.65253829956055 init-act: [29.631 47.421 29.457 47.457]
DEBUG:ADV:episode: 24099/10000000000, score: 0, e: 0.01, reward: 46.278, seudo-rew:0.0 max: 46.65253829956055 init-act: [29.631 47.421 29.457 47.457]
2025-07-16 00:53:47,520 - ADV - DEBUG - episode: 24199/10000000000, score: 0, e: 0.01, reward: 46.618, seudo-rew:0.0 max: 46.60029220581055 init-act: [29.398 47.651 29.367 47.465]
DEBUG:ADV:episode: 24199/10000000000, score: 0, e: 0.01, reward: 46.618, seudo-rew:0.0 max: 46.60029220581055 init-act: [29.398 47.651 29.367 47.465]
2025-07-16 00:53:55,812 - ADV - DEBUG - episode: 24299/10000000000, score: 0, e: 0.01, reward: 46.738, seudo-rew:0.0 max: 46.5443229675293 init-act: [29.294 47.565 29.014 47.321]
DEBUG:ADV:episode: 24299/10000000000, score: 0, e: 0.01, reward: 46.738, seudo-rew:0.0 max: 46.5443229675293 init-act: [29.294 47.565 29.014 47.321]
2025-07-16 0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-26000.keras


2025-07-16 00:56:26,070 - ADV - DEBUG - episode: 26099/10000000000, score: 0, e: 0.01, reward: 46.906, seudo-rew:0.0 max: 47.1270751953125 init-act: [28.903 47.763 28.702 47.405]
DEBUG:ADV:episode: 26099/10000000000, score: 0, e: 0.01, reward: 46.906, seudo-rew:0.0 max: 47.1270751953125 init-act: [28.903 47.763 28.702 47.405]
2025-07-16 00:56:34,465 - ADV - DEBUG - episode: 26199/10000000000, score: 0, e: 0.01, reward: 46.931, seudo-rew:0.0 max: 47.16667938232422 init-act: [29.34  48.076 29.398 47.892]
DEBUG:ADV:episode: 26199/10000000000, score: 0, e: 0.01, reward: 46.931, seudo-rew:0.0 max: 47.16667938232422 init-act: [29.34  48.076 29.398 47.892]
2025-07-16 00:56:42,695 - ADV - DEBUG - episode: 26299/10000000000, score: 0, e: 0.01, reward: 46.743, seudo-rew:0.0 max: 47.48442459106445 init-act: [29.607 48.567 29.449 48.301]
DEBUG:ADV:episode: 26299/10000000000, score: 0, e: 0.01, reward: 46.743, seudo-rew:0.0 max: 47.48442459106445 init-act: [29.607 48.567 29.449 48.301]
2025-07-16 0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-28000.keras


2025-07-16 00:59:15,571 - ADV - DEBUG - episode: 28099/10000000000, score: 0, e: 0.01, reward: 47.415, seudo-rew:0.0 max: 48.1761360168457 init-act: [30.223 48.862 30.075 49.15 ]
DEBUG:ADV:episode: 28099/10000000000, score: 0, e: 0.01, reward: 47.415, seudo-rew:0.0 max: 48.1761360168457 init-act: [30.223 48.862 30.075 49.15 ]
2025-07-16 00:59:23,833 - ADV - DEBUG - episode: 28199/10000000000, score: 0, e: 0.01, reward: 47.129, seudo-rew:0.0 max: 48.096527099609375 init-act: [30.109 48.148 30.085 49.038]
DEBUG:ADV:episode: 28199/10000000000, score: 0, e: 0.01, reward: 47.129, seudo-rew:0.0 max: 48.096527099609375 init-act: [30.109 48.148 30.085 49.038]
2025-07-16 00:59:32,833 - ADV - DEBUG - episode: 28299/10000000000, score: 0, e: 0.01, reward: 47.456, seudo-rew:0.0 max: 48.07341766357422 init-act: [30.404 48.406 30.246 49.203]
DEBUG:ADV:episode: 28299/10000000000, score: 0, e: 0.01, reward: 47.456, seudo-rew:0.0 max: 48.07341766357422 init-act: [30.404 48.406 30.246 49.203]
2025-07-16

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-30000.keras


2025-07-16 01:02:06,018 - ADV - DEBUG - episode: 30099/10000000000, score: 0, e: 0.01, reward: 47.951, seudo-rew:0.0 max: 48.229042053222656 init-act: [30.518 48.309 30.664 48.306]
DEBUG:ADV:episode: 30099/10000000000, score: 0, e: 0.01, reward: 47.951, seudo-rew:0.0 max: 48.229042053222656 init-act: [30.518 48.309 30.664 48.306]
2025-07-16 01:02:14,515 - ADV - DEBUG - episode: 30199/10000000000, score: 0, e: 0.01, reward: 47.699, seudo-rew:0.0 max: 48.26889419555664 init-act: [32.811 48.753 33.846 48.625]
DEBUG:ADV:episode: 30199/10000000000, score: 0, e: 0.01, reward: 47.699, seudo-rew:0.0 max: 48.26889419555664 init-act: [32.811 48.753 33.846 48.625]
2025-07-16 01:02:22,834 - ADV - DEBUG - episode: 30299/10000000000, score: 0, e: 0.01, reward: 47.539, seudo-rew:0.0 max: 48.793216705322266 init-act: [31.32  49.415 32.652 49.261]
DEBUG:ADV:episode: 30299/10000000000, score: 0, e: 0.01, reward: 47.539, seudo-rew:0.0 max: 48.793216705322266 init-act: [31.32  49.415 32.652 49.261]
2025-0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-32000.keras


2025-07-16 01:04:53,533 - ADV - DEBUG - episode: 32099/10000000000, score: 0, e: 0.01, reward: 47.757, seudo-rew:0.0 max: 48.372459411621094 init-act: [30.289 49.047 30.161 49.483]
DEBUG:ADV:episode: 32099/10000000000, score: 0, e: 0.01, reward: 47.757, seudo-rew:0.0 max: 48.372459411621094 init-act: [30.289 49.047 30.161 49.483]
2025-07-16 01:05:01,784 - ADV - DEBUG - episode: 32199/10000000000, score: 0, e: 0.01, reward: 47.548, seudo-rew:0.0 max: 48.60084915161133 init-act: [30.285 49.527 30.344 49.328]
DEBUG:ADV:episode: 32199/10000000000, score: 0, e: 0.01, reward: 47.548, seudo-rew:0.0 max: 48.60084915161133 init-act: [30.285 49.527 30.344 49.328]
2025-07-16 01:05:10,052 - ADV - DEBUG - episode: 32299/10000000000, score: 0, e: 0.01, reward: 47.469, seudo-rew:0.0 max: 48.45353317260742 init-act: [29.964 49.007 29.804 49.197]
DEBUG:ADV:episode: 32299/10000000000, score: 0, e: 0.01, reward: 47.469, seudo-rew:0.0 max: 48.45353317260742 init-act: [29.964 49.007 29.804 49.197]
2025-07-

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-34000.keras


2025-07-16 01:07:40,741 - ADV - DEBUG - episode: 34099/10000000000, score: 0, e: 0.01, reward: 47.824, seudo-rew:0.0 max: 48.05912780761719 init-act: [30.584 48.831 30.213 48.634]
DEBUG:ADV:episode: 34099/10000000000, score: 0, e: 0.01, reward: 47.824, seudo-rew:0.0 max: 48.05912780761719 init-act: [30.584 48.831 30.213 48.634]
2025-07-16 01:07:49,094 - ADV - DEBUG - episode: 34199/10000000000, score: 0, e: 0.01, reward: 48.089, seudo-rew:0.0 max: 47.86115646362305 init-act: [30.361 48.133 30.057 48.272]
DEBUG:ADV:episode: 34199/10000000000, score: 0, e: 0.01, reward: 48.089, seudo-rew:0.0 max: 47.86115646362305 init-act: [30.361 48.133 30.057 48.272]
2025-07-16 01:07:57,394 - ADV - DEBUG - episode: 34299/10000000000, score: 0, e: 0.01, reward: 48.107, seudo-rew:0.0 max: 47.88081359863281 init-act: [30.291 48.617 29.822 48.466]
DEBUG:ADV:episode: 34299/10000000000, score: 0, e: 0.01, reward: 48.107, seudo-rew:0.0 max: 47.88081359863281 init-act: [30.291 48.617 29.822 48.466]
2025-07-16

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-36000.keras


2025-07-16 01:10:44,969 - ADV - DEBUG - episode: 36099/10000000000, score: 0, e: 0.01, reward: 47.654, seudo-rew:0.0 max: 48.20387268066406 init-act: [30.465 48.698 30.158 48.735]
DEBUG:ADV:episode: 36099/10000000000, score: 0, e: 0.01, reward: 47.654, seudo-rew:0.0 max: 48.20387268066406 init-act: [30.465 48.698 30.158 48.735]
2025-07-16 01:10:56,241 - ADV - DEBUG - episode: 36199/10000000000, score: 0, e: 0.01, reward: 47.332, seudo-rew:0.0 max: 48.183387756347656 init-act: [30.693 49.246 30.189 48.914]
DEBUG:ADV:episode: 36199/10000000000, score: 0, e: 0.01, reward: 47.332, seudo-rew:0.0 max: 48.183387756347656 init-act: [30.693 49.246 30.189 48.914]
2025-07-16 01:11:08,303 - ADV - DEBUG - episode: 36299/10000000000, score: 0, e: 0.01, reward: 47.409, seudo-rew:0.0 max: 48.21895980834961 init-act: [30.929 48.968 30.44  49.008]
DEBUG:ADV:episode: 36299/10000000000, score: 0, e: 0.01, reward: 47.409, seudo-rew:0.0 max: 48.21895980834961 init-act: [30.929 48.968 30.44  49.008]
2025-07-

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-38000.keras


2025-07-16 01:13:49,222 - ADV - DEBUG - episode: 38099/10000000000, score: 0, e: 0.01, reward: 47.916, seudo-rew:0.0 max: 48.204803466796875 init-act: [31.394 49.128 32.939 48.992]
DEBUG:ADV:episode: 38099/10000000000, score: 0, e: 0.01, reward: 47.916, seudo-rew:0.0 max: 48.204803466796875 init-act: [31.394 49.128 32.939 48.992]
2025-07-16 01:13:57,599 - ADV - DEBUG - episode: 38199/10000000000, score: 0, e: 0.01, reward: 47.91, seudo-rew:0.0 max: 48.240196228027344 init-act: [30.297 49.224 31.92  48.876]
DEBUG:ADV:episode: 38199/10000000000, score: 0, e: 0.01, reward: 47.91, seudo-rew:0.0 max: 48.240196228027344 init-act: [30.297 49.224 31.92  48.876]
2025-07-16 01:14:05,897 - ADV - DEBUG - episode: 38299/10000000000, score: 0, e: 0.01, reward: 47.39, seudo-rew:0.0 max: 48.35655212402344 init-act: [30.578 48.848 31.687 48.836]
DEBUG:ADV:episode: 38299/10000000000, score: 0, e: 0.01, reward: 47.39, seudo-rew:0.0 max: 48.35655212402344 init-act: [30.578 48.848 31.687 48.836]
2025-07-16

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-40000.keras


2025-07-16 01:16:40,563 - ADV - DEBUG - episode: 40099/10000000000, score: 0, e: 0.01, reward: 48.284, seudo-rew:0.0 max: 48.167842864990234 init-act: [31.635 48.976 31.059 49.016]
DEBUG:ADV:episode: 40099/10000000000, score: 0, e: 0.01, reward: 48.284, seudo-rew:0.0 max: 48.167842864990234 init-act: [31.635 48.976 31.059 49.016]
2025-07-16 01:16:49,998 - ADV - DEBUG - episode: 40199/10000000000, score: 0, e: 0.01, reward: 47.445, seudo-rew:0.0 max: 48.19551086425781 init-act: [31.608 49.254 31.154 48.914]
DEBUG:ADV:episode: 40199/10000000000, score: 0, e: 0.01, reward: 47.445, seudo-rew:0.0 max: 48.19551086425781 init-act: [31.608 49.254 31.154 48.914]
2025-07-16 01:16:58,330 - ADV - DEBUG - episode: 40299/10000000000, score: 0, e: 0.01, reward: 47.861, seudo-rew:0.0 max: 48.31079864501953 init-act: [31.779 49.223 31.355 49.061]
DEBUG:ADV:episode: 40299/10000000000, score: 0, e: 0.01, reward: 47.861, seudo-rew:0.0 max: 48.31079864501953 init-act: [31.779 49.223 31.355 49.061]
2025-07-

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-42000.keras


2025-07-16 01:19:48,369 - ADV - DEBUG - episode: 42099/10000000000, score: 0, e: 0.01, reward: 47.49, seudo-rew:0.0 max: 48.28105926513672 init-act: [32.498 49.082 32.172 49.186]
DEBUG:ADV:episode: 42099/10000000000, score: 0, e: 0.01, reward: 47.49, seudo-rew:0.0 max: 48.28105926513672 init-act: [32.498 49.082 32.172 49.186]
2025-07-16 01:19:56,724 - ADV - DEBUG - episode: 42199/10000000000, score: 0, e: 0.01, reward: 47.602, seudo-rew:0.0 max: 48.48185729980469 init-act: [32.668 49.488 32.048 49.245]
DEBUG:ADV:episode: 42199/10000000000, score: 0, e: 0.01, reward: 47.602, seudo-rew:0.0 max: 48.48185729980469 init-act: [32.668 49.488 32.048 49.245]
2025-07-16 01:20:05,031 - ADV - DEBUG - episode: 42299/10000000000, score: 0, e: 0.01, reward: 47.798, seudo-rew:0.0 max: 48.48101043701172 init-act: [32.156 49.137 31.592 49.   ]
DEBUG:ADV:episode: 42299/10000000000, score: 0, e: 0.01, reward: 47.798, seudo-rew:0.0 max: 48.48101043701172 init-act: [32.156 49.137 31.592 49.   ]
2025-07-16 0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-44000.keras


2025-07-16 01:22:57,320 - ADV - DEBUG - episode: 44099/10000000000, score: 0, e: 0.01, reward: 47.816, seudo-rew:0.0 max: 48.86355209350586 init-act: [31.441 49.317 31.986 49.564]
DEBUG:ADV:episode: 44099/10000000000, score: 0, e: 0.01, reward: 47.816, seudo-rew:0.0 max: 48.86355209350586 init-act: [31.441 49.317 31.986 49.564]
2025-07-16 01:23:05,687 - ADV - DEBUG - episode: 44199/10000000000, score: 0, e: 0.01, reward: 47.971, seudo-rew:0.0 max: 48.93025207519531 init-act: [31.555 49.429 31.736 49.655]
DEBUG:ADV:episode: 44199/10000000000, score: 0, e: 0.01, reward: 47.971, seudo-rew:0.0 max: 48.93025207519531 init-act: [31.555 49.429 31.736 49.655]
2025-07-16 01:23:13,965 - ADV - DEBUG - episode: 44299/10000000000, score: 0, e: 0.01, reward: 47.457, seudo-rew:0.0 max: 48.90958023071289 init-act: [31.508 49.301 31.707 49.604]
DEBUG:ADV:episode: 44299/10000000000, score: 0, e: 0.01, reward: 47.457, seudo-rew:0.0 max: 48.90958023071289 init-act: [31.508 49.301 31.707 49.604]
2025-07-16

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-46000.keras


2025-07-16 01:25:44,078 - ADV - DEBUG - episode: 46099/10000000000, score: 0, e: 0.01, reward: 47.417, seudo-rew:0.0 max: 49.153900146484375 init-act: [31.707 49.255 31.779 49.647]
DEBUG:ADV:episode: 46099/10000000000, score: 0, e: 0.01, reward: 47.417, seudo-rew:0.0 max: 49.153900146484375 init-act: [31.707 49.255 31.779 49.647]
2025-07-16 01:25:52,891 - ADV - DEBUG - episode: 46199/10000000000, score: 0, e: 0.01, reward: 47.664, seudo-rew:0.0 max: 48.967613220214844 init-act: [32.295 49.645 31.994 49.878]
DEBUG:ADV:episode: 46199/10000000000, score: 0, e: 0.01, reward: 47.664, seudo-rew:0.0 max: 48.967613220214844 init-act: [32.295 49.645 31.994 49.878]
2025-07-16 01:26:01,210 - ADV - DEBUG - episode: 46299/10000000000, score: 0, e: 0.01, reward: 47.898, seudo-rew:0.0 max: 49.11114501953125 init-act: [31.979 50.176 31.969 49.707]
DEBUG:ADV:episode: 46299/10000000000, score: 0, e: 0.01, reward: 47.898, seudo-rew:0.0 max: 49.11114501953125 init-act: [31.979 50.176 31.969 49.707]
2025-0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-48000.keras


2025-07-16 01:29:19,897 - ADV - DEBUG - episode: 48099/10000000000, score: 0, e: 0.01, reward: 48.174, seudo-rew:0.0 max: 49.247352600097656 init-act: [32.412 49.708 31.893 49.461]
DEBUG:ADV:episode: 48099/10000000000, score: 0, e: 0.01, reward: 48.174, seudo-rew:0.0 max: 49.247352600097656 init-act: [32.412 49.708 31.893 49.461]
2025-07-16 01:29:28,772 - ADV - DEBUG - episode: 48199/10000000000, score: 0, e: 0.01, reward: 48.071, seudo-rew:0.0 max: 49.026451110839844 init-act: [32.016 50.029 31.652 49.738]
DEBUG:ADV:episode: 48199/10000000000, score: 0, e: 0.01, reward: 48.071, seudo-rew:0.0 max: 49.026451110839844 init-act: [32.016 50.029 31.652 49.738]
2025-07-16 01:29:37,407 - ADV - DEBUG - episode: 48299/10000000000, score: 0, e: 0.01, reward: 47.808, seudo-rew:0.0 max: 48.96543884277344 init-act: [31.656 49.942 31.676 49.7  ]
DEBUG:ADV:episode: 48299/10000000000, score: 0, e: 0.01, reward: 47.808, seudo-rew:0.0 max: 48.96543884277344 init-act: [31.656 49.942 31.676 49.7  ]
2025-0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-50000.keras


2025-07-16 01:32:44,158 - ADV - DEBUG - episode: 50099/10000000000, score: 0, e: 0.01, reward: 48.218, seudo-rew:0.0 max: 48.672664642333984 init-act: [32.512 49.887 31.999 49.531]
DEBUG:ADV:episode: 50099/10000000000, score: 0, e: 0.01, reward: 48.218, seudo-rew:0.0 max: 48.672664642333984 init-act: [32.512 49.887 31.999 49.531]
2025-07-16 01:32:55,520 - ADV - DEBUG - episode: 50199/10000000000, score: 0, e: 0.01, reward: 47.926, seudo-rew:0.0 max: 48.87065887451172 init-act: [32.365 49.465 32.103 49.215]
DEBUG:ADV:episode: 50199/10000000000, score: 0, e: 0.01, reward: 47.926, seudo-rew:0.0 max: 48.87065887451172 init-act: [32.365 49.465 32.103 49.215]
2025-07-16 01:33:06,732 - ADV - DEBUG - episode: 50299/10000000000, score: 0, e: 0.01, reward: 48.067, seudo-rew:0.0 max: 48.81980514526367 init-act: [32.527 49.902 32.068 49.696]
DEBUG:ADV:episode: 50299/10000000000, score: 0, e: 0.01, reward: 48.067, seudo-rew:0.0 max: 48.81980514526367 init-act: [32.527 49.902 32.068 49.696]
2025-07-

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-52000.keras


2025-07-16 01:36:27,304 - ADV - DEBUG - episode: 52099/10000000000, score: 0, e: 0.01, reward: 47.954, seudo-rew:0.0 max: 49.28197479248047 init-act: [31.551 50.289 31.125 49.997]
DEBUG:ADV:episode: 52099/10000000000, score: 0, e: 0.01, reward: 47.954, seudo-rew:0.0 max: 49.28197479248047 init-act: [31.551 50.289 31.125 49.997]
2025-07-16 01:36:37,717 - ADV - DEBUG - episode: 52199/10000000000, score: 0, e: 0.01, reward: 47.581, seudo-rew:0.0 max: 49.5151481628418 init-act: [33.441 50.258 34.021 50.305]
DEBUG:ADV:episode: 52199/10000000000, score: 0, e: 0.01, reward: 47.581, seudo-rew:0.0 max: 49.5151481628418 init-act: [33.441 50.258 34.021 50.305]
2025-07-16 01:36:48,755 - ADV - DEBUG - episode: 52299/10000000000, score: 0, e: 0.01, reward: 47.475, seudo-rew:0.0 max: 49.327632904052734 init-act: [32.651 50.363 32.742 50.184]
DEBUG:ADV:episode: 52299/10000000000, score: 0, e: 0.01, reward: 47.475, seudo-rew:0.0 max: 49.327632904052734 init-act: [32.651 50.363 32.742 50.184]
2025-07-16

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-54000.keras


2025-07-16 01:39:53,531 - ADV - DEBUG - episode: 54099/10000000000, score: 0, e: 0.01, reward: 47.764, seudo-rew:0.0 max: 49.47368621826172 init-act: [33.564 50.445 34.246 50.242]
DEBUG:ADV:episode: 54099/10000000000, score: 0, e: 0.01, reward: 47.764, seudo-rew:0.0 max: 49.47368621826172 init-act: [33.564 50.445 34.246 50.242]
2025-07-16 01:40:03,904 - ADV - DEBUG - episode: 54199/10000000000, score: 0, e: 0.01, reward: 48.141, seudo-rew:0.0 max: 49.31484603881836 init-act: [32.682 49.768 32.888 49.453]
DEBUG:ADV:episode: 54199/10000000000, score: 0, e: 0.01, reward: 48.141, seudo-rew:0.0 max: 49.31484603881836 init-act: [32.682 49.768 32.888 49.453]
2025-07-16 01:40:14,193 - ADV - DEBUG - episode: 54299/10000000000, score: 0, e: 0.01, reward: 48.105, seudo-rew:0.0 max: 49.1212043762207 init-act: [32.352 49.478 32.596 49.336]
DEBUG:ADV:episode: 54299/10000000000, score: 0, e: 0.01, reward: 48.105, seudo-rew:0.0 max: 49.1212043762207 init-act: [32.352 49.478 32.596 49.336]
2025-07-16 0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-56000.keras


2025-07-16 01:43:09,566 - ADV - DEBUG - episode: 56099/10000000000, score: 0, e: 0.01, reward: 48.079, seudo-rew:0.0 max: 48.75263595581055 init-act: [31.431 49.219 31.035 49.117]
DEBUG:ADV:episode: 56099/10000000000, score: 0, e: 0.01, reward: 48.079, seudo-rew:0.0 max: 48.75263595581055 init-act: [31.431 49.219 31.035 49.117]
2025-07-16 01:43:19,238 - ADV - DEBUG - episode: 56199/10000000000, score: 0, e: 0.01, reward: 48.104, seudo-rew:0.0 max: 48.49168014526367 init-act: [31.532 49.43  31.162 48.879]
DEBUG:ADV:episode: 56199/10000000000, score: 0, e: 0.01, reward: 48.104, seudo-rew:0.0 max: 48.49168014526367 init-act: [31.532 49.43  31.162 48.879]
2025-07-16 01:43:29,016 - ADV - DEBUG - episode: 56299/10000000000, score: 0, e: 0.01, reward: 47.963, seudo-rew:0.0 max: 48.490596771240234 init-act: [31.586 49.458 31.219 49.251]
DEBUG:ADV:episode: 56299/10000000000, score: 0, e: 0.01, reward: 47.963, seudo-rew:0.0 max: 48.490596771240234 init-act: [31.586 49.458 31.219 49.251]
2025-07-

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-58000.keras


2025-07-16 01:46:22,534 - ADV - DEBUG - episode: 58099/10000000000, score: 0, e: 0.01, reward: 48.104, seudo-rew:0.0 max: 48.727294921875 init-act: [32.113 49.344 31.627 49.308]
DEBUG:ADV:episode: 58099/10000000000, score: 0, e: 0.01, reward: 48.104, seudo-rew:0.0 max: 48.727294921875 init-act: [32.113 49.344 31.627 49.308]
2025-07-16 01:46:31,774 - ADV - DEBUG - episode: 58199/10000000000, score: 0, e: 0.01, reward: 47.958, seudo-rew:0.0 max: 48.60255432128906 init-act: [32.002 49.578 31.507 49.41 ]
DEBUG:ADV:episode: 58199/10000000000, score: 0, e: 0.01, reward: 47.958, seudo-rew:0.0 max: 48.60255432128906 init-act: [32.002 49.578 31.507 49.41 ]
2025-07-16 01:46:41,081 - ADV - DEBUG - episode: 58299/10000000000, score: 0, e: 0.01, reward: 47.514, seudo-rew:0.0 max: 48.62559127807617 init-act: [31.959 49.419 31.438 49.082]
DEBUG:ADV:episode: 58299/10000000000, score: 0, e: 0.01, reward: 47.514, seudo-rew:0.0 max: 48.62559127807617 init-act: [31.959 49.419 31.438 49.082]
2025-07-16 01:

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-60000.keras


2025-07-16 01:49:28,630 - ADV - DEBUG - episode: 60099/10000000000, score: 0, e: 0.01, reward: 48.129, seudo-rew:0.0 max: 48.78434371948242 init-act: [32.442 49.559 31.844 49.543]
DEBUG:ADV:episode: 60099/10000000000, score: 0, e: 0.01, reward: 48.129, seudo-rew:0.0 max: 48.78434371948242 init-act: [32.442 49.559 31.844 49.543]
2025-07-16 01:49:38,400 - ADV - DEBUG - episode: 60199/10000000000, score: 0, e: 0.01, reward: 48.199, seudo-rew:0.0 max: 48.740760803222656 init-act: [32.843 49.572 32.32  49.458]
DEBUG:ADV:episode: 60199/10000000000, score: 0, e: 0.01, reward: 48.199, seudo-rew:0.0 max: 48.740760803222656 init-act: [32.843 49.572 32.32  49.458]
2025-07-16 01:49:48,084 - ADV - DEBUG - episode: 60299/10000000000, score: 0, e: 0.01, reward: 48.363, seudo-rew:0.0 max: 48.92652130126953 init-act: [32.983 49.942 32.54  49.793]
DEBUG:ADV:episode: 60299/10000000000, score: 0, e: 0.01, reward: 48.363, seudo-rew:0.0 max: 48.92652130126953 init-act: [32.983 49.942 32.54  49.793]
2025-07-

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-62000.keras


2025-07-16 01:52:51,511 - ADV - DEBUG - episode: 62099/10000000000, score: 0, e: 0.01, reward: 47.75, seudo-rew:0.0 max: 49.04730224609375 init-act: [32.718 49.875 32.383 49.656]
DEBUG:ADV:episode: 62099/10000000000, score: 0, e: 0.01, reward: 47.75, seudo-rew:0.0 max: 49.04730224609375 init-act: [32.718 49.875 32.383 49.656]
2025-07-16 01:53:01,472 - ADV - DEBUG - episode: 62199/10000000000, score: 0, e: 0.01, reward: 48.228, seudo-rew:0.0 max: 49.106815338134766 init-act: [32.647 49.613 32.252 49.391]
DEBUG:ADV:episode: 62199/10000000000, score: 0, e: 0.01, reward: 48.228, seudo-rew:0.0 max: 49.106815338134766 init-act: [32.647 49.613 32.252 49.391]
2025-07-16 01:53:11,087 - ADV - DEBUG - episode: 62299/10000000000, score: 0, e: 0.01, reward: 48.131, seudo-rew:0.0 max: 48.93482208251953 init-act: [32.921 49.68  32.127 49.564]
DEBUG:ADV:episode: 62299/10000000000, score: 0, e: 0.01, reward: 48.131, seudo-rew:0.0 max: 48.93482208251953 init-act: [32.921 49.68  32.127 49.564]
2025-07-16

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-64000.keras


2025-07-16 01:56:07,849 - ADV - DEBUG - episode: 64099/10000000000, score: 0, e: 0.01, reward: 47.835, seudo-rew:0.0 max: 49.320953369140625 init-act: [33.02  50.194 32.776 49.95 ]
DEBUG:ADV:episode: 64099/10000000000, score: 0, e: 0.01, reward: 47.835, seudo-rew:0.0 max: 49.320953369140625 init-act: [33.02  50.194 32.776 49.95 ]
2025-07-16 01:56:17,514 - ADV - DEBUG - episode: 64199/10000000000, score: 0, e: 0.01, reward: 48.034, seudo-rew:0.0 max: 49.22854995727539 init-act: [32.996 49.855 32.707 49.652]
DEBUG:ADV:episode: 64199/10000000000, score: 0, e: 0.01, reward: 48.034, seudo-rew:0.0 max: 49.22854995727539 init-act: [32.996 49.855 32.707 49.652]
2025-07-16 01:56:27,242 - ADV - DEBUG - episode: 64299/10000000000, score: 0, e: 0.01, reward: 47.727, seudo-rew:0.0 max: 49.15998458862305 init-act: [33.053 49.761 32.842 49.694]
DEBUG:ADV:episode: 64299/10000000000, score: 0, e: 0.01, reward: 47.727, seudo-rew:0.0 max: 49.15998458862305 init-act: [33.053 49.761 32.842 49.694]
2025-07-

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-66000.keras


2025-07-16 01:59:21,669 - ADV - DEBUG - episode: 66099/10000000000, score: 0, e: 0.01, reward: 47.428, seudo-rew:0.0 max: 48.995445251464844 init-act: [32.514 49.824 31.978 49.544]
DEBUG:ADV:episode: 66099/10000000000, score: 0, e: 0.01, reward: 47.428, seudo-rew:0.0 max: 48.995445251464844 init-act: [32.514 49.824 31.978 49.544]
2025-07-16 01:59:31,373 - ADV - DEBUG - episode: 66199/10000000000, score: 0, e: 0.01, reward: 47.787, seudo-rew:0.0 max: 48.889034271240234 init-act: [32.275 49.633 31.921 49.356]
DEBUG:ADV:episode: 66199/10000000000, score: 0, e: 0.01, reward: 47.787, seudo-rew:0.0 max: 48.889034271240234 init-act: [32.275 49.633 31.921 49.356]
2025-07-16 01:59:40,993 - ADV - DEBUG - episode: 66299/10000000000, score: 0, e: 0.01, reward: 47.64, seudo-rew:0.0 max: 48.665287017822266 init-act: [32.078 48.765 31.73  48.564]
DEBUG:ADV:episode: 66299/10000000000, score: 0, e: 0.01, reward: 47.64, seudo-rew:0.0 max: 48.665287017822266 init-act: [32.078 48.765 31.73  48.564]
2025-0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-68000.keras


2025-07-16 02:02:37,284 - ADV - DEBUG - episode: 68099/10000000000, score: 0, e: 0.01, reward: 48.466, seudo-rew:0.0 max: 47.80942916870117 init-act: [31.873 48.649 31.59  48.808]
DEBUG:ADV:episode: 68099/10000000000, score: 0, e: 0.01, reward: 48.466, seudo-rew:0.0 max: 47.80942916870117 init-act: [31.873 48.649 31.59  48.808]
2025-07-16 02:02:47,022 - ADV - DEBUG - episode: 68199/10000000000, score: 0, e: 0.01, reward: 47.655, seudo-rew:0.0 max: 47.87472915649414 init-act: [32.109 48.777 31.658 48.372]
DEBUG:ADV:episode: 68199/10000000000, score: 0, e: 0.01, reward: 47.655, seudo-rew:0.0 max: 47.87472915649414 init-act: [32.109 48.777 31.658 48.372]
2025-07-16 02:02:56,781 - ADV - DEBUG - episode: 68299/10000000000, score: 0, e: 0.01, reward: 47.397, seudo-rew:0.0 max: 47.85898208618164 init-act: [32.914 48.905 32.098 48.612]
DEBUG:ADV:episode: 68299/10000000000, score: 0, e: 0.01, reward: 47.397, seudo-rew:0.0 max: 47.85898208618164 init-act: [32.914 48.905 32.098 48.612]
2025-07-16

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-70000.keras


2025-07-16 02:05:52,379 - ADV - DEBUG - episode: 70099/10000000000, score: 0, e: 0.01, reward: 48.067, seudo-rew:0.0 max: 48.88288879394531 init-act: [33.797 49.366 33.23  49.25 ]
DEBUG:ADV:episode: 70099/10000000000, score: 0, e: 0.01, reward: 48.067, seudo-rew:0.0 max: 48.88288879394531 init-act: [33.797 49.366 33.23  49.25 ]
2025-07-16 02:06:02,271 - ADV - DEBUG - episode: 70199/10000000000, score: 0, e: 0.01, reward: 48.146, seudo-rew:0.0 max: 48.95415115356445 init-act: [33.67  49.923 33.289 49.738]
DEBUG:ADV:episode: 70199/10000000000, score: 0, e: 0.01, reward: 48.146, seudo-rew:0.0 max: 48.95415115356445 init-act: [33.67  49.923 33.289 49.738]
2025-07-16 02:06:11,992 - ADV - DEBUG - episode: 70299/10000000000, score: 0, e: 0.01, reward: 47.935, seudo-rew:0.0 max: 48.71661376953125 init-act: [33.44  49.368 32.877 49.269]
DEBUG:ADV:episode: 70299/10000000000, score: 0, e: 0.01, reward: 47.935, seudo-rew:0.0 max: 48.71661376953125 init-act: [33.44  49.368 32.877 49.269]
2025-07-16

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-72000.keras


2025-07-16 02:09:07,894 - ADV - DEBUG - episode: 72099/10000000000, score: 0, e: 0.01, reward: 48.377, seudo-rew:0.0 max: 49.428951263427734 init-act: [33.492 50.078 33.482 49.977]
DEBUG:ADV:episode: 72099/10000000000, score: 0, e: 0.01, reward: 48.377, seudo-rew:0.0 max: 49.428951263427734 init-act: [33.492 50.078 33.482 49.977]
2025-07-16 02:09:17,638 - ADV - DEBUG - episode: 72199/10000000000, score: 0, e: 0.01, reward: 48.375, seudo-rew:0.0 max: 49.426273345947266 init-act: [33.469 50.028 33.428 49.837]
DEBUG:ADV:episode: 72199/10000000000, score: 0, e: 0.01, reward: 48.375, seudo-rew:0.0 max: 49.426273345947266 init-act: [33.469 50.028 33.428 49.837]
2025-07-16 02:09:27,451 - ADV - DEBUG - episode: 72299/10000000000, score: 0, e: 0.01, reward: 48.378, seudo-rew:0.0 max: 49.48011779785156 init-act: [33.139 50.243 33.119 50.16 ]
DEBUG:ADV:episode: 72299/10000000000, score: 0, e: 0.01, reward: 48.378, seudo-rew:0.0 max: 49.48011779785156 init-act: [33.139 50.243 33.119 50.16 ]
2025-0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-74000.keras


2025-07-16 02:12:23,837 - ADV - DEBUG - episode: 74099/10000000000, score: 0, e: 0.01, reward: 48.064, seudo-rew:0.0 max: 49.4330940246582 init-act: [33.59  50.398 33.166 50.216]
DEBUG:ADV:episode: 74099/10000000000, score: 0, e: 0.01, reward: 48.064, seudo-rew:0.0 max: 49.4330940246582 init-act: [33.59  50.398 33.166 50.216]
2025-07-16 02:12:33,714 - ADV - DEBUG - episode: 74199/10000000000, score: 0, e: 0.01, reward: 48.104, seudo-rew:0.0 max: 49.49900817871094 init-act: [33.277 50.593 32.803 50.677]
DEBUG:ADV:episode: 74199/10000000000, score: 0, e: 0.01, reward: 48.104, seudo-rew:0.0 max: 49.49900817871094 init-act: [33.277 50.593 32.803 50.677]
2025-07-16 02:12:43,489 - ADV - DEBUG - episode: 74299/10000000000, score: 0, e: 0.01, reward: 47.525, seudo-rew:0.0 max: 49.51625061035156 init-act: [33.373 50.575 32.871 50.226]
DEBUG:ADV:episode: 74299/10000000000, score: 0, e: 0.01, reward: 47.525, seudo-rew:0.0 max: 49.51625061035156 init-act: [33.373 50.575 32.871 50.226]
2025-07-16 0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-76000.keras


2025-07-16 02:15:39,150 - ADV - DEBUG - episode: 76099/10000000000, score: 0, e: 0.01, reward: 48.122, seudo-rew:0.0 max: 49.35187530517578 init-act: [33.555 50.293 32.914 50.122]
DEBUG:ADV:episode: 76099/10000000000, score: 0, e: 0.01, reward: 48.122, seudo-rew:0.0 max: 49.35187530517578 init-act: [33.555 50.293 32.914 50.122]
2025-07-16 02:15:49,103 - ADV - DEBUG - episode: 76199/10000000000, score: 0, e: 0.01, reward: 48.119, seudo-rew:0.0 max: 49.32316970825195 init-act: [33.372 49.949 32.887 49.546]
DEBUG:ADV:episode: 76199/10000000000, score: 0, e: 0.01, reward: 48.119, seudo-rew:0.0 max: 49.32316970825195 init-act: [33.372 49.949 32.887 49.546]
2025-07-16 02:15:58,932 - ADV - DEBUG - episode: 76299/10000000000, score: 0, e: 0.01, reward: 48.65, seudo-rew:0.0 max: 49.19819259643555 init-act: [33.447 49.829 33.885 49.576]
DEBUG:ADV:episode: 76299/10000000000, score: 0, e: 0.01, reward: 48.65, seudo-rew:0.0 max: 49.19819259643555 init-act: [33.447 49.829 33.885 49.576]
2025-07-16 0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-78000.keras


2025-07-16 02:18:54,282 - ADV - DEBUG - episode: 78099/10000000000, score: 0, e: 0.01, reward: 47.831, seudo-rew:0.0 max: 49.18911361694336 init-act: [33.328 50.074 33.111 49.738]
DEBUG:ADV:episode: 78099/10000000000, score: 0, e: 0.01, reward: 47.831, seudo-rew:0.0 max: 49.18911361694336 init-act: [33.328 50.074 33.111 49.738]
2025-07-16 02:19:03,991 - ADV - DEBUG - episode: 78199/10000000000, score: 0, e: 0.01, reward: 48.316, seudo-rew:0.0 max: 49.28773880004883 init-act: [33.064 49.746 32.914 49.453]
DEBUG:ADV:episode: 78199/10000000000, score: 0, e: 0.01, reward: 48.316, seudo-rew:0.0 max: 49.28773880004883 init-act: [33.064 49.746 32.914 49.453]
2025-07-16 02:19:13,770 - ADV - DEBUG - episode: 78299/10000000000, score: 0, e: 0.01, reward: 48.204, seudo-rew:0.0 max: 49.25010299682617 init-act: [32.78  49.992 32.533 49.571]
DEBUG:ADV:episode: 78299/10000000000, score: 0, e: 0.01, reward: 48.204, seudo-rew:0.0 max: 49.25010299682617 init-act: [32.78  49.992 32.533 49.571]
2025-07-16

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-80000.keras


2025-07-16 02:22:10,465 - ADV - DEBUG - episode: 80099/10000000000, score: 0, e: 0.01, reward: 48.304, seudo-rew:0.0 max: 48.81133270263672 init-act: [32.976 49.403 32.943 49.231]
DEBUG:ADV:episode: 80099/10000000000, score: 0, e: 0.01, reward: 48.304, seudo-rew:0.0 max: 48.81133270263672 init-act: [32.976 49.403 32.943 49.231]
2025-07-16 02:22:20,327 - ADV - DEBUG - episode: 80199/10000000000, score: 0, e: 0.01, reward: 47.993, seudo-rew:0.0 max: 48.778934478759766 init-act: [33.07  49.82  33.102 49.739]
DEBUG:ADV:episode: 80199/10000000000, score: 0, e: 0.01, reward: 47.993, seudo-rew:0.0 max: 48.778934478759766 init-act: [33.07  49.82  33.102 49.739]
2025-07-16 02:22:30,150 - ADV - DEBUG - episode: 80299/10000000000, score: 0, e: 0.01, reward: 48.073, seudo-rew:0.0 max: 49.04021072387695 init-act: [33.176 49.793 33.16  49.771]
DEBUG:ADV:episode: 80299/10000000000, score: 0, e: 0.01, reward: 48.073, seudo-rew:0.0 max: 49.04021072387695 init-act: [33.176 49.793 33.16  49.771]
2025-07-

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-82000.keras


2025-07-16 02:25:25,901 - ADV - DEBUG - episode: 82099/10000000000, score: 0, e: 0.01, reward: 48.09, seudo-rew:0.0 max: 49.39778137207031 init-act: [33.477 50.265 33.356 50.238]
DEBUG:ADV:episode: 82099/10000000000, score: 0, e: 0.01, reward: 48.09, seudo-rew:0.0 max: 49.39778137207031 init-act: [33.477 50.265 33.356 50.238]
2025-07-16 02:25:35,723 - ADV - DEBUG - episode: 82199/10000000000, score: 0, e: 0.01, reward: 47.829, seudo-rew:0.0 max: 49.50796890258789 init-act: [33.375 49.698 33.291 49.637]
DEBUG:ADV:episode: 82199/10000000000, score: 0, e: 0.01, reward: 47.829, seudo-rew:0.0 max: 49.50796890258789 init-act: [33.375 49.698 33.291 49.637]
2025-07-16 02:25:45,615 - ADV - DEBUG - episode: 82299/10000000000, score: 0, e: 0.01, reward: 47.621, seudo-rew:0.0 max: 49.590641021728516 init-act: [33.354 50.361 33.277 50.228]
DEBUG:ADV:episode: 82299/10000000000, score: 0, e: 0.01, reward: 47.621, seudo-rew:0.0 max: 49.590641021728516 init-act: [33.354 50.361 33.277 50.228]
2025-07-16

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-84000.keras


2025-07-16 02:28:41,588 - ADV - DEBUG - episode: 84099/10000000000, score: 0, e: 0.01, reward: 48.344, seudo-rew:0.0 max: 49.478248596191406 init-act: [33.151 50.14  32.818 50.015]
DEBUG:ADV:episode: 84099/10000000000, score: 0, e: 0.01, reward: 48.344, seudo-rew:0.0 max: 49.478248596191406 init-act: [33.151 50.14  32.818 50.015]
2025-07-16 02:28:51,330 - ADV - DEBUG - episode: 84199/10000000000, score: 0, e: 0.01, reward: 47.886, seudo-rew:0.0 max: 49.73676300048828 init-act: [32.858 50.8   32.452 50.59 ]
DEBUG:ADV:episode: 84199/10000000000, score: 0, e: 0.01, reward: 47.886, seudo-rew:0.0 max: 49.73676300048828 init-act: [32.858 50.8   32.452 50.59 ]
2025-07-16 02:29:01,143 - ADV - DEBUG - episode: 84299/10000000000, score: 0, e: 0.01, reward: 47.869, seudo-rew:0.0 max: 49.61088943481445 init-act: [32.547 50.148 32.211 50.024]
DEBUG:ADV:episode: 84299/10000000000, score: 0, e: 0.01, reward: 47.869, seudo-rew:0.0 max: 49.61088943481445 init-act: [32.547 50.148 32.211 50.024]
2025-07-

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-86000.keras


2025-07-16 02:31:57,136 - ADV - DEBUG - episode: 86099/10000000000, score: 0, e: 0.01, reward: 47.97, seudo-rew:0.0 max: 49.008888244628906 init-act: [32.262 49.878 32.303 49.664]
DEBUG:ADV:episode: 86099/10000000000, score: 0, e: 0.01, reward: 47.97, seudo-rew:0.0 max: 49.008888244628906 init-act: [32.262 49.878 32.303 49.664]
2025-07-16 02:32:06,901 - ADV - DEBUG - episode: 86199/10000000000, score: 0, e: 0.01, reward: 48.23, seudo-rew:0.0 max: 49.03618240356445 init-act: [32.336 49.269 32.217 49.246]
DEBUG:ADV:episode: 86199/10000000000, score: 0, e: 0.01, reward: 48.23, seudo-rew:0.0 max: 49.03618240356445 init-act: [32.336 49.269 32.217 49.246]
2025-07-16 02:32:16,739 - ADV - DEBUG - episode: 86299/10000000000, score: 0, e: 0.01, reward: 48.405, seudo-rew:0.0 max: 48.96482849121094 init-act: [32.836 49.925 32.611 49.689]
DEBUG:ADV:episode: 86299/10000000000, score: 0, e: 0.01, reward: 48.405, seudo-rew:0.0 max: 48.96482849121094 init-act: [32.836 49.925 32.611 49.689]
2025-07-16 0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-88000.keras


2025-07-16 02:35:11,995 - ADV - DEBUG - episode: 88099/10000000000, score: 0, e: 0.01, reward: 48.126, seudo-rew:0.0 max: 48.492454528808594 init-act: [33.07  49.258 33.532 49.046]
DEBUG:ADV:episode: 88099/10000000000, score: 0, e: 0.01, reward: 48.126, seudo-rew:0.0 max: 48.492454528808594 init-act: [33.07  49.258 33.532 49.046]
2025-07-16 02:35:21,862 - ADV - DEBUG - episode: 88199/10000000000, score: 0, e: 0.01, reward: 47.576, seudo-rew:0.0 max: 48.58135986328125 init-act: [33.653 49.504 33.733 49.242]
DEBUG:ADV:episode: 88199/10000000000, score: 0, e: 0.01, reward: 47.576, seudo-rew:0.0 max: 48.58135986328125 init-act: [33.653 49.504 33.733 49.242]
2025-07-16 02:35:31,605 - ADV - DEBUG - episode: 88299/10000000000, score: 0, e: 0.01, reward: 47.901, seudo-rew:0.0 max: 48.5139274597168 init-act: [33.174 49.421 33.251 49.353]
DEBUG:ADV:episode: 88299/10000000000, score: 0, e: 0.01, reward: 47.901, seudo-rew:0.0 max: 48.5139274597168 init-act: [33.174 49.421 33.251 49.353]
2025-07-16

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-90000.keras


2025-07-16 02:38:27,727 - ADV - DEBUG - episode: 90099/10000000000, score: 0, e: 0.01, reward: 47.945, seudo-rew:0.0 max: 49.024803161621094 init-act: [33.586 49.871 32.809 49.496]
DEBUG:ADV:episode: 90099/10000000000, score: 0, e: 0.01, reward: 47.945, seudo-rew:0.0 max: 49.024803161621094 init-act: [33.586 49.871 32.809 49.496]
2025-07-16 02:38:37,542 - ADV - DEBUG - episode: 90199/10000000000, score: 0, e: 0.01, reward: 48.244, seudo-rew:0.0 max: 48.98751449584961 init-act: [35.43  49.839 35.43  49.55 ]
DEBUG:ADV:episode: 90199/10000000000, score: 0, e: 0.01, reward: 48.244, seudo-rew:0.0 max: 48.98751449584961 init-act: [35.43  49.839 35.43  49.55 ]
2025-07-16 02:38:47,461 - ADV - DEBUG - episode: 90299/10000000000, score: 0, e: 0.01, reward: 47.907, seudo-rew:0.0 max: 49.00382995605469 init-act: [34.799 49.622 34.408 49.405]
DEBUG:ADV:episode: 90299/10000000000, score: 0, e: 0.01, reward: 47.907, seudo-rew:0.0 max: 49.00382995605469 init-act: [34.799 49.622 34.408 49.405]
2025-07-

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-92000.keras


2025-07-16 02:41:44,638 - ADV - DEBUG - episode: 92099/10000000000, score: 0, e: 0.01, reward: 48.331, seudo-rew:0.0 max: 49.02077865600586 init-act: [32.534 49.693 32.443 49.47 ]
DEBUG:ADV:episode: 92099/10000000000, score: 0, e: 0.01, reward: 48.331, seudo-rew:0.0 max: 49.02077865600586 init-act: [32.534 49.693 32.443 49.47 ]
2025-07-16 02:41:54,359 - ADV - DEBUG - episode: 92199/10000000000, score: 0, e: 0.01, reward: 48.208, seudo-rew:0.0 max: 49.124324798583984 init-act: [32.816 50.066 32.487 49.84 ]
DEBUG:ADV:episode: 92199/10000000000, score: 0, e: 0.01, reward: 48.208, seudo-rew:0.0 max: 49.124324798583984 init-act: [32.816 50.066 32.487 49.84 ]
2025-07-16 02:42:04,157 - ADV - DEBUG - episode: 92299/10000000000, score: 0, e: 0.01, reward: 48.301, seudo-rew:0.0 max: 49.18423843383789 init-act: [32.807 49.821 32.466 49.867]
DEBUG:ADV:episode: 92299/10000000000, score: 0, e: 0.01, reward: 48.301, seudo-rew:0.0 max: 49.18423843383789 init-act: [32.807 49.821 32.466 49.867]
2025-07-

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-94000.keras


2025-07-16 02:45:00,700 - ADV - DEBUG - episode: 94099/10000000000, score: 0, e: 0.01, reward: 48.537, seudo-rew:0.0 max: 48.89229202270508 init-act: [32.764 49.304 32.265 49.203]
DEBUG:ADV:episode: 94099/10000000000, score: 0, e: 0.01, reward: 48.537, seudo-rew:0.0 max: 48.89229202270508 init-act: [32.764 49.304 32.265 49.203]
2025-07-16 02:45:10,294 - ADV - DEBUG - episode: 94199/10000000000, score: 0, e: 0.01, reward: 48.526, seudo-rew:0.0 max: 48.77684783935547 init-act: [32.731 49.509 32.336 49.367]
DEBUG:ADV:episode: 94199/10000000000, score: 0, e: 0.01, reward: 48.526, seudo-rew:0.0 max: 48.77684783935547 init-act: [32.731 49.509 32.336 49.367]
2025-07-16 02:45:19,888 - ADV - DEBUG - episode: 94299/10000000000, score: 0, e: 0.01, reward: 48.59, seudo-rew:0.0 max: 48.9708251953125 init-act: [32.735 49.552 32.24  49.324]
DEBUG:ADV:episode: 94299/10000000000, score: 0, e: 0.01, reward: 48.59, seudo-rew:0.0 max: 48.9708251953125 init-act: [32.735 49.552 32.24  49.324]
2025-07-16 02:

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-96000.keras


2025-07-16 02:48:15,243 - ADV - DEBUG - episode: 96099/10000000000, score: 0, e: 0.01, reward: 48.453, seudo-rew:0.0 max: 49.47319412231445 init-act: [32.512 50.367 32.328 50.3  ]
DEBUG:ADV:episode: 96099/10000000000, score: 0, e: 0.01, reward: 48.453, seudo-rew:0.0 max: 49.47319412231445 init-act: [32.512 50.367 32.328 50.3  ]
2025-07-16 02:48:24,995 - ADV - DEBUG - episode: 96199/10000000000, score: 0, e: 0.01, reward: 48.596, seudo-rew:0.0 max: 49.387935638427734 init-act: [32.576 50.18  32.311 50.116]
DEBUG:ADV:episode: 96199/10000000000, score: 0, e: 0.01, reward: 48.596, seudo-rew:0.0 max: 49.387935638427734 init-act: [32.576 50.18  32.311 50.116]
2025-07-16 02:48:34,630 - ADV - DEBUG - episode: 96299/10000000000, score: 0, e: 0.01, reward: 48.684, seudo-rew:0.0 max: 49.33882522583008 init-act: [32.883 49.914 32.51  49.53 ]
DEBUG:ADV:episode: 96299/10000000000, score: 0, e: 0.01, reward: 48.684, seudo-rew:0.0 max: 49.33882522583008 init-act: [32.883 49.914 32.51  49.53 ]
2025-07-

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-98000.keras


2025-07-16 02:51:28,905 - ADV - DEBUG - episode: 98099/10000000000, score: 0, e: 0.01, reward: 48.446, seudo-rew:0.0 max: 48.615177154541016 init-act: [33.301 49.443 32.645 49.421]
DEBUG:ADV:episode: 98099/10000000000, score: 0, e: 0.01, reward: 48.446, seudo-rew:0.0 max: 48.615177154541016 init-act: [33.301 49.443 32.645 49.421]
2025-07-16 02:51:38,685 - ADV - DEBUG - episode: 98199/10000000000, score: 0, e: 0.01, reward: 48.154, seudo-rew:0.0 max: 48.906925201416016 init-act: [33.127 49.855 32.562 49.665]
DEBUG:ADV:episode: 98199/10000000000, score: 0, e: 0.01, reward: 48.154, seudo-rew:0.0 max: 48.906925201416016 init-act: [33.127 49.855 32.562 49.665]
2025-07-16 02:51:48,281 - ADV - DEBUG - episode: 98299/10000000000, score: 0, e: 0.01, reward: 48.637, seudo-rew:0.0 max: 48.67462158203125 init-act: [32.899 49.171 32.365 49.21 ]
DEBUG:ADV:episode: 98299/10000000000, score: 0, e: 0.01, reward: 48.637, seudo-rew:0.0 max: 48.67462158203125 init-act: [32.899 49.171 32.365 49.21 ]
2025-0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-100000.keras


2025-07-16 02:54:43,123 - ADV - DEBUG - episode: 100099/10000000000, score: 0, e: 0.01, reward: 48.491, seudo-rew:0.0 max: 48.70559310913086 init-act: [34.281 49.618 35.44  49.441]
DEBUG:ADV:episode: 100099/10000000000, score: 0, e: 0.01, reward: 48.491, seudo-rew:0.0 max: 48.70559310913086 init-act: [34.281 49.618 35.44  49.441]
2025-07-16 02:54:52,789 - ADV - DEBUG - episode: 100199/10000000000, score: 0, e: 0.01, reward: 48.398, seudo-rew:0.0 max: 48.86217498779297 init-act: [33.592 49.152 35.307 49.009]
DEBUG:ADV:episode: 100199/10000000000, score: 0, e: 0.01, reward: 48.398, seudo-rew:0.0 max: 48.86217498779297 init-act: [33.592 49.152 35.307 49.009]
2025-07-16 02:55:02,705 - ADV - DEBUG - episode: 100299/10000000000, score: 0, e: 0.01, reward: 48.503, seudo-rew:0.0 max: 48.48874282836914 init-act: [32.676 49.393 34.453 49.524]
DEBUG:ADV:episode: 100299/10000000000, score: 0, e: 0.01, reward: 48.503, seudo-rew:0.0 max: 48.48874282836914 init-act: [32.676 49.393 34.453 49.524]
2025

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-102000.keras


2025-07-16 02:57:59,213 - ADV - DEBUG - episode: 102099/10000000000, score: 0, e: 0.01, reward: 47.687, seudo-rew:0.0 max: 47.8752326965332 init-act: [31.916 48.867 31.746 48.747]
DEBUG:ADV:episode: 102099/10000000000, score: 0, e: 0.01, reward: 47.687, seudo-rew:0.0 max: 47.8752326965332 init-act: [31.916 48.867 31.746 48.747]
2025-07-16 02:58:08,963 - ADV - DEBUG - episode: 102199/10000000000, score: 0, e: 0.01, reward: 48.566, seudo-rew:0.0 max: 47.94534683227539 init-act: [31.885 48.621 31.922 48.648]
DEBUG:ADV:episode: 102199/10000000000, score: 0, e: 0.01, reward: 48.566, seudo-rew:0.0 max: 47.94534683227539 init-act: [31.885 48.621 31.922 48.648]
2025-07-16 02:58:18,728 - ADV - DEBUG - episode: 102299/10000000000, score: 0, e: 0.01, reward: 48.119, seudo-rew:0.0 max: 47.99479675292969 init-act: [31.762 48.784 31.487 48.702]
DEBUG:ADV:episode: 102299/10000000000, score: 0, e: 0.01, reward: 48.119, seudo-rew:0.0 max: 47.99479675292969 init-act: [31.762 48.784 31.487 48.702]
2025-0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-104000.keras


2025-07-16 03:01:14,508 - ADV - DEBUG - episode: 104099/10000000000, score: 0, e: 0.01, reward: 48.327, seudo-rew:0.0 max: 48.779541015625 init-act: [32.007 49.616 31.553 49.457]
DEBUG:ADV:episode: 104099/10000000000, score: 0, e: 0.01, reward: 48.327, seudo-rew:0.0 max: 48.779541015625 init-act: [32.007 49.616 31.553 49.457]
2025-07-16 03:01:24,253 - ADV - DEBUG - episode: 104199/10000000000, score: 0, e: 0.01, reward: 47.558, seudo-rew:0.0 max: 48.85712814331055 init-act: [32.043 49.675 31.345 49.297]
DEBUG:ADV:episode: 104199/10000000000, score: 0, e: 0.01, reward: 47.558, seudo-rew:0.0 max: 48.85712814331055 init-act: [32.043 49.675 31.345 49.297]
2025-07-16 03:01:33,946 - ADV - DEBUG - episode: 104299/10000000000, score: 0, e: 0.01, reward: 48.885, seudo-rew:0.0 max: 48.750247955322266 init-act: [31.83  49.586 31.459 49.32 ]
DEBUG:ADV:episode: 104299/10000000000, score: 0, e: 0.01, reward: 48.885, seudo-rew:0.0 max: 48.750247955322266 init-act: [31.83  49.586 31.459 49.32 ]
2025-0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-106000.keras


2025-07-16 03:04:42,392 - ADV - DEBUG - episode: 106099/10000000000, score: 0, e: 0.01, reward: 48.089, seudo-rew:0.0 max: 49.10185241699219 init-act: [31.871 49.843 31.361 49.583]
DEBUG:ADV:episode: 106099/10000000000, score: 0, e: 0.01, reward: 48.089, seudo-rew:0.0 max: 49.10185241699219 init-act: [31.871 49.843 31.361 49.583]
2025-07-16 03:04:54,827 - ADV - DEBUG - episode: 106199/10000000000, score: 0, e: 0.01, reward: 48.79, seudo-rew:0.0 max: 49.063560485839844 init-act: [32.017 50.071 31.498 49.848]
DEBUG:ADV:episode: 106199/10000000000, score: 0, e: 0.01, reward: 48.79, seudo-rew:0.0 max: 49.063560485839844 init-act: [32.017 50.071 31.498 49.848]
2025-07-16 03:05:06,336 - ADV - DEBUG - episode: 106299/10000000000, score: 0, e: 0.01, reward: 48.382, seudo-rew:0.0 max: 49.1025276184082 init-act: [31.837 50.151 31.332 49.934]
DEBUG:ADV:episode: 106299/10000000000, score: 0, e: 0.01, reward: 48.382, seudo-rew:0.0 max: 49.1025276184082 init-act: [31.837 50.151 31.332 49.934]
2025-0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-108000.keras


2025-07-16 03:08:44,554 - ADV - DEBUG - episode: 108099/10000000000, score: 0, e: 0.01, reward: 48.338, seudo-rew:0.0 max: 48.03205490112305 init-act: [32.777 48.521 33.358 48.434]
DEBUG:ADV:episode: 108099/10000000000, score: 0, e: 0.01, reward: 48.338, seudo-rew:0.0 max: 48.03205490112305 init-act: [32.777 48.521 33.358 48.434]
2025-07-16 03:08:56,692 - ADV - DEBUG - episode: 108199/10000000000, score: 0, e: 0.01, reward: 48.36, seudo-rew:0.0 max: 47.884883880615234 init-act: [32.242 48.89  32.95  48.738]
DEBUG:ADV:episode: 108199/10000000000, score: 0, e: 0.01, reward: 48.36, seudo-rew:0.0 max: 47.884883880615234 init-act: [32.242 48.89  32.95  48.738]
2025-07-16 03:09:08,945 - ADV - DEBUG - episode: 108299/10000000000, score: 0, e: 0.01, reward: 48.183, seudo-rew:0.0 max: 47.94053649902344 init-act: [34.002 48.91  34.668 48.868]
DEBUG:ADV:episode: 108299/10000000000, score: 0, e: 0.01, reward: 48.183, seudo-rew:0.0 max: 47.94053649902344 init-act: [34.002 48.91  34.668 48.868]
2025

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-110000.keras


2025-07-16 03:12:32,635 - ADV - DEBUG - episode: 110099/10000000000, score: 0, e: 0.01, reward: 48.826, seudo-rew:0.0 max: 49.0790901184082 init-act: [32.991 49.407 32.271 49.827]
DEBUG:ADV:episode: 110099/10000000000, score: 0, e: 0.01, reward: 48.826, seudo-rew:0.0 max: 49.0790901184082 init-act: [32.991 49.407 32.271 49.827]
2025-07-16 03:12:42,320 - ADV - DEBUG - episode: 110199/10000000000, score: 0, e: 0.01, reward: 47.9, seudo-rew:0.0 max: 49.16202163696289 init-act: [32.785 50.097 32.165 49.897]
DEBUG:ADV:episode: 110199/10000000000, score: 0, e: 0.01, reward: 47.9, seudo-rew:0.0 max: 49.16202163696289 init-act: [32.785 50.097 32.165 49.897]
2025-07-16 03:12:52,226 - ADV - DEBUG - episode: 110299/10000000000, score: 0, e: 0.01, reward: 47.676, seudo-rew:0.0 max: 49.140846252441406 init-act: [32.809 49.979 32.037 49.75 ]
DEBUG:ADV:episode: 110299/10000000000, score: 0, e: 0.01, reward: 47.676, seudo-rew:0.0 max: 49.140846252441406 init-act: [32.809 49.979 32.037 49.75 ]
2025-07-

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-112000.keras


2025-07-16 03:15:48,427 - ADV - DEBUG - episode: 112099/10000000000, score: 0, e: 0.01, reward: 48.551, seudo-rew:0.0 max: 48.75804138183594 init-act: [32.791 49.645 32.459 49.311]
DEBUG:ADV:episode: 112099/10000000000, score: 0, e: 0.01, reward: 48.551, seudo-rew:0.0 max: 48.75804138183594 init-act: [32.791 49.645 32.459 49.311]
2025-07-16 03:15:58,432 - ADV - DEBUG - episode: 112199/10000000000, score: 0, e: 0.01, reward: 48.327, seudo-rew:0.0 max: 48.83778762817383 init-act: [32.609 49.684 32.135 49.517]
DEBUG:ADV:episode: 112199/10000000000, score: 0, e: 0.01, reward: 48.327, seudo-rew:0.0 max: 48.83778762817383 init-act: [32.609 49.684 32.135 49.517]
2025-07-16 03:16:08,274 - ADV - DEBUG - episode: 112299/10000000000, score: 0, e: 0.01, reward: 48.469, seudo-rew:0.0 max: 49.141334533691406 init-act: [32.764 49.983 32.501 49.7  ]
DEBUG:ADV:episode: 112299/10000000000, score: 0, e: 0.01, reward: 48.469, seudo-rew:0.0 max: 49.141334533691406 init-act: [32.764 49.983 32.501 49.7  ]
20

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-114000.keras


2025-07-16 03:19:03,851 - ADV - DEBUG - episode: 114099/10000000000, score: 0, e: 0.01, reward: 48.272, seudo-rew:0.0 max: 48.969364166259766 init-act: [33.205 49.688 33.065 49.454]
DEBUG:ADV:episode: 114099/10000000000, score: 0, e: 0.01, reward: 48.272, seudo-rew:0.0 max: 48.969364166259766 init-act: [33.205 49.688 33.065 49.454]
2025-07-16 03:19:13,691 - ADV - DEBUG - episode: 114199/10000000000, score: 0, e: 0.01, reward: 48.414, seudo-rew:0.0 max: 48.97199249267578 init-act: [33.303 49.972 32.506 49.753]
DEBUG:ADV:episode: 114199/10000000000, score: 0, e: 0.01, reward: 48.414, seudo-rew:0.0 max: 48.97199249267578 init-act: [33.303 49.972 32.506 49.753]
2025-07-16 03:19:23,363 - ADV - DEBUG - episode: 114299/10000000000, score: 0, e: 0.01, reward: 48.465, seudo-rew:0.0 max: 49.04718780517578 init-act: [33.309 50.016 33.287 49.939]
DEBUG:ADV:episode: 114299/10000000000, score: 0, e: 0.01, reward: 48.465, seudo-rew:0.0 max: 49.04718780517578 init-act: [33.309 50.016 33.287 49.939]
20

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-116000.keras


2025-07-16 03:22:18,793 - ADV - DEBUG - episode: 116099/10000000000, score: 0, e: 0.01, reward: 48.164, seudo-rew:0.0 max: 49.44557189941406 init-act: [32.723 50.515 32.523 50.255]
DEBUG:ADV:episode: 116099/10000000000, score: 0, e: 0.01, reward: 48.164, seudo-rew:0.0 max: 49.44557189941406 init-act: [32.723 50.515 32.523 50.255]
2025-07-16 03:22:28,445 - ADV - DEBUG - episode: 116199/10000000000, score: 0, e: 0.01, reward: 48.508, seudo-rew:0.0 max: 49.40546798706055 init-act: [33.017 50.085 32.504 49.887]
DEBUG:ADV:episode: 116199/10000000000, score: 0, e: 0.01, reward: 48.508, seudo-rew:0.0 max: 49.40546798706055 init-act: [33.017 50.085 32.504 49.887]
2025-07-16 03:22:38,339 - ADV - DEBUG - episode: 116299/10000000000, score: 0, e: 0.01, reward: 48.755, seudo-rew:0.0 max: 49.47673034667969 init-act: [32.828 50.319 32.447 50.016]
DEBUG:ADV:episode: 116299/10000000000, score: 0, e: 0.01, reward: 48.755, seudo-rew:0.0 max: 49.47673034667969 init-act: [32.828 50.319 32.447 50.016]
2025

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-118000.keras


2025-07-16 03:25:34,166 - ADV - DEBUG - episode: 118099/10000000000, score: 0, e: 0.01, reward: 48.376, seudo-rew:0.0 max: 49.2996826171875 init-act: [34.084 50.318 33.642 49.977]
DEBUG:ADV:episode: 118099/10000000000, score: 0, e: 0.01, reward: 48.376, seudo-rew:0.0 max: 49.2996826171875 init-act: [34.084 50.318 33.642 49.977]
2025-07-16 03:25:44,051 - ADV - DEBUG - episode: 118199/10000000000, score: 0, e: 0.01, reward: 48.274, seudo-rew:0.0 max: 49.41050338745117 init-act: [33.736 49.736 33.584 49.583]
DEBUG:ADV:episode: 118199/10000000000, score: 0, e: 0.01, reward: 48.274, seudo-rew:0.0 max: 49.41050338745117 init-act: [33.736 49.736 33.584 49.583]
2025-07-16 03:25:54,101 - ADV - DEBUG - episode: 118299/10000000000, score: 0, e: 0.01, reward: 48.672, seudo-rew:0.0 max: 49.350528717041016 init-act: [33.638 49.989 33.316 49.871]
DEBUG:ADV:episode: 118299/10000000000, score: 0, e: 0.01, reward: 48.672, seudo-rew:0.0 max: 49.350528717041016 init-act: [33.638 49.989 33.316 49.871]
2025

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-120000.keras


2025-07-16 03:28:50,011 - ADV - DEBUG - episode: 120099/10000000000, score: 0, e: 0.01, reward: 48.563, seudo-rew:0.0 max: 49.34074401855469 init-act: [33.201 50.197 32.937 50.255]
DEBUG:ADV:episode: 120099/10000000000, score: 0, e: 0.01, reward: 48.563, seudo-rew:0.0 max: 49.34074401855469 init-act: [33.201 50.197 32.937 50.255]
2025-07-16 03:28:59,784 - ADV - DEBUG - episode: 120199/10000000000, score: 0, e: 0.01, reward: 48.107, seudo-rew:0.0 max: 49.27752685546875 init-act: [33.385 50.229 32.85  49.731]
DEBUG:ADV:episode: 120199/10000000000, score: 0, e: 0.01, reward: 48.107, seudo-rew:0.0 max: 49.27752685546875 init-act: [33.385 50.229 32.85  49.731]
2025-07-16 03:29:09,409 - ADV - DEBUG - episode: 120299/10000000000, score: 0, e: 0.01, reward: 48.292, seudo-rew:0.0 max: 49.54279327392578 init-act: [32.572 50.54  32.131 50.22 ]
DEBUG:ADV:episode: 120299/10000000000, score: 0, e: 0.01, reward: 48.292, seudo-rew:0.0 max: 49.54279327392578 init-act: [32.572 50.54  32.131 50.22 ]
2025

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-122000.keras


2025-07-16 03:32:06,136 - ADV - DEBUG - episode: 122099/10000000000, score: 0, e: 0.01, reward: 48.319, seudo-rew:0.0 max: 48.8192253112793 init-act: [33.738 49.746 34.662 49.348]
DEBUG:ADV:episode: 122099/10000000000, score: 0, e: 0.01, reward: 48.319, seudo-rew:0.0 max: 48.8192253112793 init-act: [33.738 49.746 34.662 49.348]
2025-07-16 03:32:16,068 - ADV - DEBUG - episode: 122199/10000000000, score: 0, e: 0.01, reward: 47.782, seudo-rew:0.0 max: 49.73197937011719 init-act: [35.014 50.715 35.185 50.643]
DEBUG:ADV:episode: 122199/10000000000, score: 0, e: 0.01, reward: 47.782, seudo-rew:0.0 max: 49.73197937011719 init-act: [35.014 50.715 35.185 50.643]
2025-07-16 03:32:25,862 - ADV - DEBUG - episode: 122299/10000000000, score: 0, e: 0.01, reward: 47.711, seudo-rew:0.0 max: 49.296043395996094 init-act: [33.094 49.902 33.403 49.734]
DEBUG:ADV:episode: 122299/10000000000, score: 0, e: 0.01, reward: 47.711, seudo-rew:0.0 max: 49.296043395996094 init-act: [33.094 49.902 33.403 49.734]
2025

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-124000.keras


2025-07-16 03:35:21,692 - ADV - DEBUG - episode: 124099/10000000000, score: 0, e: 0.01, reward: 48.289, seudo-rew:0.0 max: 49.09626770019531 init-act: [32.58  49.934 32.449 49.824]
DEBUG:ADV:episode: 124099/10000000000, score: 0, e: 0.01, reward: 48.289, seudo-rew:0.0 max: 49.09626770019531 init-act: [32.58  49.934 32.449 49.824]
2025-07-16 03:35:31,497 - ADV - DEBUG - episode: 124199/10000000000, score: 0, e: 0.01, reward: 48.58, seudo-rew:0.0 max: 49.14360046386719 init-act: [32.799 49.789 32.545 49.803]
DEBUG:ADV:episode: 124199/10000000000, score: 0, e: 0.01, reward: 48.58, seudo-rew:0.0 max: 49.14360046386719 init-act: [32.799 49.789 32.545 49.803]
2025-07-16 03:35:41,165 - ADV - DEBUG - episode: 124299/10000000000, score: 0, e: 0.01, reward: 48.738, seudo-rew:0.0 max: 49.16598129272461 init-act: [32.912 49.794 32.824 49.825]
DEBUG:ADV:episode: 124299/10000000000, score: 0, e: 0.01, reward: 48.738, seudo-rew:0.0 max: 49.16598129272461 init-act: [32.912 49.794 32.824 49.825]
2025-0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-126000.keras


2025-07-16 03:38:37,440 - ADV - DEBUG - episode: 126099/10000000000, score: 0, e: 0.01, reward: 47.919, seudo-rew:0.0 max: 48.70037078857422 init-act: [33.519 49.465 33.229 49.236]
DEBUG:ADV:episode: 126099/10000000000, score: 0, e: 0.01, reward: 47.919, seudo-rew:0.0 max: 48.70037078857422 init-act: [33.519 49.465 33.229 49.236]
2025-07-16 03:38:47,097 - ADV - DEBUG - episode: 126199/10000000000, score: 0, e: 0.01, reward: 47.941, seudo-rew:0.0 max: 48.78319549560547 init-act: [33.476 49.466 33.129 49.344]
DEBUG:ADV:episode: 126199/10000000000, score: 0, e: 0.01, reward: 47.941, seudo-rew:0.0 max: 48.78319549560547 init-act: [33.476 49.466 33.129 49.344]
2025-07-16 03:38:56,809 - ADV - DEBUG - episode: 126299/10000000000, score: 0, e: 0.01, reward: 48.342, seudo-rew:0.0 max: 48.586639404296875 init-act: [33.646 49.646 32.863 49.371]
DEBUG:ADV:episode: 126299/10000000000, score: 0, e: 0.01, reward: 48.342, seudo-rew:0.0 max: 48.586639404296875 init-act: [33.646 49.646 32.863 49.371]
20

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-128000.keras


2025-07-16 03:41:53,237 - ADV - DEBUG - episode: 128099/10000000000, score: 0, e: 0.01, reward: 47.859, seudo-rew:0.0 max: 48.41216278076172 init-act: [32.984 49.375 32.445 49.068]
DEBUG:ADV:episode: 128099/10000000000, score: 0, e: 0.01, reward: 47.859, seudo-rew:0.0 max: 48.41216278076172 init-act: [32.984 49.375 32.445 49.068]
2025-07-16 03:42:02,996 - ADV - DEBUG - episode: 128199/10000000000, score: 0, e: 0.01, reward: 48.588, seudo-rew:0.0 max: 48.59376907348633 init-act: [33.385 49.492 33.332 49.323]
DEBUG:ADV:episode: 128199/10000000000, score: 0, e: 0.01, reward: 48.588, seudo-rew:0.0 max: 48.59376907348633 init-act: [33.385 49.492 33.332 49.323]
2025-07-16 03:42:12,717 - ADV - DEBUG - episode: 128299/10000000000, score: 0, e: 0.01, reward: 48.344, seudo-rew:0.0 max: 48.9072151184082 init-act: [33.514 49.586 33.222 49.535]
DEBUG:ADV:episode: 128299/10000000000, score: 0, e: 0.01, reward: 48.344, seudo-rew:0.0 max: 48.9072151184082 init-act: [33.514 49.586 33.222 49.535]
2025-0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-130000.keras


2025-07-16 03:45:08,853 - ADV - DEBUG - episode: 130099/10000000000, score: 0, e: 0.01, reward: 48.559, seudo-rew:0.0 max: 48.93949508666992 init-act: [33.663 49.789 33.193 49.485]
DEBUG:ADV:episode: 130099/10000000000, score: 0, e: 0.01, reward: 48.559, seudo-rew:0.0 max: 48.93949508666992 init-act: [33.663 49.789 33.193 49.485]
2025-07-16 03:45:18,671 - ADV - DEBUG - episode: 130199/10000000000, score: 0, e: 0.01, reward: 48.591, seudo-rew:0.0 max: 49.1937370300293 init-act: [33.764 50.015 33.306 49.624]
DEBUG:ADV:episode: 130199/10000000000, score: 0, e: 0.01, reward: 48.591, seudo-rew:0.0 max: 49.1937370300293 init-act: [33.764 50.015 33.306 49.624]
2025-07-16 03:45:28,453 - ADV - DEBUG - episode: 130299/10000000000, score: 0, e: 0.01, reward: 48.563, seudo-rew:0.0 max: 49.25912857055664 init-act: [36.927 50.289 38.418 50.325]
DEBUG:ADV:episode: 130299/10000000000, score: 0, e: 0.01, reward: 48.563, seudo-rew:0.0 max: 49.25912857055664 init-act: [36.927 50.289 38.418 50.325]
2025-0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-132000.keras


2025-07-16 03:48:24,737 - ADV - DEBUG - episode: 132099/10000000000, score: 0, e: 0.01, reward: 48.238, seudo-rew:0.0 max: 49.42720413208008 init-act: [34.508 50.214 34.223 49.837]
DEBUG:ADV:episode: 132099/10000000000, score: 0, e: 0.01, reward: 48.238, seudo-rew:0.0 max: 49.42720413208008 init-act: [34.508 50.214 34.223 49.837]
2025-07-16 03:48:34,458 - ADV - DEBUG - episode: 132199/10000000000, score: 0, e: 0.01, reward: 48.236, seudo-rew:0.0 max: 49.308616638183594 init-act: [34.881 50.21  34.298 49.98 ]
DEBUG:ADV:episode: 132199/10000000000, score: 0, e: 0.01, reward: 48.236, seudo-rew:0.0 max: 49.308616638183594 init-act: [34.881 50.21  34.298 49.98 ]
2025-07-16 03:48:44,175 - ADV - DEBUG - episode: 132299/10000000000, score: 0, e: 0.01, reward: 48.145, seudo-rew:0.0 max: 49.46342849731445 init-act: [34.928 50.379 34.928 50.199]
DEBUG:ADV:episode: 132299/10000000000, score: 0, e: 0.01, reward: 48.145, seudo-rew:0.0 max: 49.46342849731445 init-act: [34.928 50.379 34.928 50.199]
20

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-134000.keras


2025-07-16 03:51:39,566 - ADV - DEBUG - episode: 134099/10000000000, score: 0, e: 0.01, reward: 48.23, seudo-rew:0.0 max: 48.97711944580078 init-act: [33.878 49.864 33.45  49.742]
DEBUG:ADV:episode: 134099/10000000000, score: 0, e: 0.01, reward: 48.23, seudo-rew:0.0 max: 48.97711944580078 init-act: [33.878 49.864 33.45  49.742]
2025-07-16 03:51:49,259 - ADV - DEBUG - episode: 134199/10000000000, score: 0, e: 0.01, reward: 48.807, seudo-rew:0.0 max: 48.98347473144531 init-act: [33.618 50.059 33.535 49.765]
DEBUG:ADV:episode: 134199/10000000000, score: 0, e: 0.01, reward: 48.807, seudo-rew:0.0 max: 48.98347473144531 init-act: [33.618 50.059 33.535 49.765]
2025-07-16 03:51:59,043 - ADV - DEBUG - episode: 134299/10000000000, score: 0, e: 0.01, reward: 47.912, seudo-rew:0.0 max: 49.026641845703125 init-act: [33.611 49.957 33.414 49.719]
DEBUG:ADV:episode: 134299/10000000000, score: 0, e: 0.01, reward: 47.912, seudo-rew:0.0 max: 49.026641845703125 init-act: [33.611 49.957 33.414 49.719]
2025

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-136000.keras


2025-07-16 03:54:54,324 - ADV - DEBUG - episode: 136099/10000000000, score: 0, e: 0.01, reward: 48.011, seudo-rew:0.0 max: 49.05509948730469 init-act: [34.817 50.052 35.47  49.839]
DEBUG:ADV:episode: 136099/10000000000, score: 0, e: 0.01, reward: 48.011, seudo-rew:0.0 max: 49.05509948730469 init-act: [34.817 50.052 35.47  49.839]
2025-07-16 03:55:04,021 - ADV - DEBUG - episode: 136199/10000000000, score: 0, e: 0.01, reward: 47.981, seudo-rew:0.0 max: 49.05432891845703 init-act: [33.6   49.793 34.099 49.819]
DEBUG:ADV:episode: 136199/10000000000, score: 0, e: 0.01, reward: 47.981, seudo-rew:0.0 max: 49.05432891845703 init-act: [33.6   49.793 34.099 49.819]
2025-07-16 03:55:13,640 - ADV - DEBUG - episode: 136299/10000000000, score: 0, e: 0.01, reward: 48.243, seudo-rew:0.0 max: 49.05897521972656 init-act: [33.636 49.57  34.037 49.851]
DEBUG:ADV:episode: 136299/10000000000, score: 0, e: 0.01, reward: 48.243, seudo-rew:0.0 max: 49.05897521972656 init-act: [33.636 49.57  34.037 49.851]
2025

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-138000.keras


2025-07-16 03:58:09,295 - ADV - DEBUG - episode: 138099/10000000000, score: 0, e: 0.01, reward: 48.869, seudo-rew:0.0 max: 48.69450759887695 init-act: [34.    49.445 33.51  49.395]
DEBUG:ADV:episode: 138099/10000000000, score: 0, e: 0.01, reward: 48.869, seudo-rew:0.0 max: 48.69450759887695 init-act: [34.    49.445 33.51  49.395]
2025-07-16 03:58:18,947 - ADV - DEBUG - episode: 138199/10000000000, score: 0, e: 0.01, reward: 48.479, seudo-rew:0.0 max: 48.616397857666016 init-act: [33.929 49.66  33.529 49.407]
DEBUG:ADV:episode: 138199/10000000000, score: 0, e: 0.01, reward: 48.479, seudo-rew:0.0 max: 48.616397857666016 init-act: [33.929 49.66  33.529 49.407]
2025-07-16 03:58:28,696 - ADV - DEBUG - episode: 138299/10000000000, score: 0, e: 0.01, reward: 48.285, seudo-rew:0.0 max: 48.56922149658203 init-act: [33.81  49.399 33.31  49.324]
DEBUG:ADV:episode: 138299/10000000000, score: 0, e: 0.01, reward: 48.285, seudo-rew:0.0 max: 48.56922149658203 init-act: [33.81  49.399 33.31  49.324]
20

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-140000.keras


2025-07-16 04:01:25,233 - ADV - DEBUG - episode: 140099/10000000000, score: 0, e: 0.01, reward: 48.561, seudo-rew:0.0 max: 48.7157096862793 init-act: [34.    49.493 33.575 49.206]
DEBUG:ADV:episode: 140099/10000000000, score: 0, e: 0.01, reward: 48.561, seudo-rew:0.0 max: 48.7157096862793 init-act: [34.    49.493 33.575 49.206]
2025-07-16 04:01:35,051 - ADV - DEBUG - episode: 140199/10000000000, score: 0, e: 0.01, reward: 48.229, seudo-rew:0.0 max: 48.4050407409668 init-act: [34.18  49.262 33.562 48.95 ]
DEBUG:ADV:episode: 140199/10000000000, score: 0, e: 0.01, reward: 48.229, seudo-rew:0.0 max: 48.4050407409668 init-act: [34.18  49.262 33.562 48.95 ]
2025-07-16 04:01:44,857 - ADV - DEBUG - episode: 140299/10000000000, score: 0, e: 0.01, reward: 48.407, seudo-rew:0.0 max: 48.32311248779297 init-act: [33.828 48.992 33.603 48.847]
DEBUG:ADV:episode: 140299/10000000000, score: 0, e: 0.01, reward: 48.407, seudo-rew:0.0 max: 48.32311248779297 init-act: [33.828 48.992 33.603 48.847]
2025-07-

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-142000.keras


2025-07-16 04:04:40,328 - ADV - DEBUG - episode: 142099/10000000000, score: 0, e: 0.01, reward: 48.78, seudo-rew:0.0 max: 48.65684127807617 init-act: [34.092 49.544 34.06  49.198]
DEBUG:ADV:episode: 142099/10000000000, score: 0, e: 0.01, reward: 48.78, seudo-rew:0.0 max: 48.65684127807617 init-act: [34.092 49.544 34.06  49.198]
2025-07-16 04:04:50,126 - ADV - DEBUG - episode: 142199/10000000000, score: 0, e: 0.01, reward: 47.911, seudo-rew:0.0 max: 48.68711471557617 init-act: [33.801 49.394 33.933 49.103]
DEBUG:ADV:episode: 142199/10000000000, score: 0, e: 0.01, reward: 47.911, seudo-rew:0.0 max: 48.68711471557617 init-act: [33.801 49.394 33.933 49.103]
2025-07-16 04:04:59,933 - ADV - DEBUG - episode: 142299/10000000000, score: 0, e: 0.01, reward: 48.074, seudo-rew:0.0 max: 48.83121109008789 init-act: [33.752 49.808 33.762 49.695]
DEBUG:ADV:episode: 142299/10000000000, score: 0, e: 0.01, reward: 48.074, seudo-rew:0.0 max: 48.83121109008789 init-act: [33.752 49.808 33.762 49.695]
2025-0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-144000.keras


2025-07-16 04:07:56,747 - ADV - DEBUG - episode: 144099/10000000000, score: 0, e: 0.01, reward: 48.364, seudo-rew:0.0 max: 48.58268356323242 init-act: [33.81  49.322 33.37  49.415]
DEBUG:ADV:episode: 144099/10000000000, score: 0, e: 0.01, reward: 48.364, seudo-rew:0.0 max: 48.58268356323242 init-act: [33.81  49.322 33.37  49.415]
2025-07-16 04:08:06,525 - ADV - DEBUG - episode: 144199/10000000000, score: 0, e: 0.01, reward: 47.985, seudo-rew:0.0 max: 48.49139404296875 init-act: [33.945 49.098 33.996 49.025]
DEBUG:ADV:episode: 144199/10000000000, score: 0, e: 0.01, reward: 47.985, seudo-rew:0.0 max: 48.49139404296875 init-act: [33.945 49.098 33.996 49.025]
2025-07-16 04:08:16,334 - ADV - DEBUG - episode: 144299/10000000000, score: 0, e: 0.01, reward: 48.451, seudo-rew:0.0 max: 48.48183822631836 init-act: [33.75  49.147 33.393 48.864]
DEBUG:ADV:episode: 144299/10000000000, score: 0, e: 0.01, reward: 48.451, seudo-rew:0.0 max: 48.48183822631836 init-act: [33.75  49.147 33.393 48.864]
2025

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-146000.keras


2025-07-16 04:11:12,411 - ADV - DEBUG - episode: 146099/10000000000, score: 0, e: 0.01, reward: 48.293, seudo-rew:0.0 max: 48.6014518737793 init-act: [35.633 49.57  35.125 48.823]
DEBUG:ADV:episode: 146099/10000000000, score: 0, e: 0.01, reward: 48.293, seudo-rew:0.0 max: 48.6014518737793 init-act: [35.633 49.57  35.125 48.823]
2025-07-16 04:11:22,205 - ADV - DEBUG - episode: 146199/10000000000, score: 0, e: 0.01, reward: 48.261, seudo-rew:0.0 max: 48.60175704956055 init-act: [35.842 49.465 35.285 49.39 ]
DEBUG:ADV:episode: 146199/10000000000, score: 0, e: 0.01, reward: 48.261, seudo-rew:0.0 max: 48.60175704956055 init-act: [35.842 49.465 35.285 49.39 ]
2025-07-16 04:11:31,904 - ADV - DEBUG - episode: 146299/10000000000, score: 0, e: 0.01, reward: 48.347, seudo-rew:0.0 max: 48.84489822387695 init-act: [35.652 49.492 35.066 49.267]
DEBUG:ADV:episode: 146299/10000000000, score: 0, e: 0.01, reward: 48.347, seudo-rew:0.0 max: 48.84489822387695 init-act: [35.652 49.492 35.066 49.267]
2025-0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-148000.keras


2025-07-16 04:14:28,145 - ADV - DEBUG - episode: 148099/10000000000, score: 0, e: 0.01, reward: 48.299, seudo-rew:0.0 max: 48.919761657714844 init-act: [35.074 49.797 34.53  49.421]
DEBUG:ADV:episode: 148099/10000000000, score: 0, e: 0.01, reward: 48.299, seudo-rew:0.0 max: 48.919761657714844 init-act: [35.074 49.797 34.53  49.421]
2025-07-16 04:14:37,853 - ADV - DEBUG - episode: 148199/10000000000, score: 0, e: 0.01, reward: 48.441, seudo-rew:0.0 max: 48.91048812866211 init-act: [34.472 49.907 33.939 49.544]
DEBUG:ADV:episode: 148199/10000000000, score: 0, e: 0.01, reward: 48.441, seudo-rew:0.0 max: 48.91048812866211 init-act: [34.472 49.907 33.939 49.544]
2025-07-16 04:14:47,660 - ADV - DEBUG - episode: 148299/10000000000, score: 0, e: 0.01, reward: 48.568, seudo-rew:0.0 max: 48.90656661987305 init-act: [34.7   49.797 34.441 49.266]
DEBUG:ADV:episode: 148299/10000000000, score: 0, e: 0.01, reward: 48.568, seudo-rew:0.0 max: 48.90656661987305 init-act: [34.7   49.797 34.441 49.266]
20

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-150000.keras


2025-07-16 04:17:44,381 - ADV - DEBUG - episode: 150099/10000000000, score: 0, e: 0.01, reward: 48.605, seudo-rew:0.0 max: 48.245277404785156 init-act: [32.947 48.895 32.436 48.738]
DEBUG:ADV:episode: 150099/10000000000, score: 0, e: 0.01, reward: 48.605, seudo-rew:0.0 max: 48.245277404785156 init-act: [32.947 48.895 32.436 48.738]
2025-07-16 04:17:54,183 - ADV - DEBUG - episode: 150199/10000000000, score: 0, e: 0.01, reward: 48.347, seudo-rew:0.0 max: 48.11350631713867 init-act: [33.82  48.859 33.34  48.665]
DEBUG:ADV:episode: 150199/10000000000, score: 0, e: 0.01, reward: 48.347, seudo-rew:0.0 max: 48.11350631713867 init-act: [33.82  48.859 33.34  48.665]
2025-07-16 04:18:03,902 - ADV - DEBUG - episode: 150299/10000000000, score: 0, e: 0.01, reward: 48.261, seudo-rew:0.0 max: 48.17082595825195 init-act: [33.851 49.054 33.469 49.066]
DEBUG:ADV:episode: 150299/10000000000, score: 0, e: 0.01, reward: 48.261, seudo-rew:0.0 max: 48.17082595825195 init-act: [33.851 49.054 33.469 49.066]
20

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-152000.keras


2025-07-16 04:20:59,353 - ADV - DEBUG - episode: 152099/10000000000, score: 0, e: 0.01, reward: 48.513, seudo-rew:0.0 max: 48.63715362548828 init-act: [34.451 49.465 34.841 48.796]
DEBUG:ADV:episode: 152099/10000000000, score: 0, e: 0.01, reward: 48.513, seudo-rew:0.0 max: 48.63715362548828 init-act: [34.451 49.465 34.841 48.796]
2025-07-16 04:21:09,254 - ADV - DEBUG - episode: 152199/10000000000, score: 0, e: 0.01, reward: 48.195, seudo-rew:0.0 max: 48.696495056152344 init-act: [32.858 49.611 33.294 49.176]
DEBUG:ADV:episode: 152199/10000000000, score: 0, e: 0.01, reward: 48.195, seudo-rew:0.0 max: 48.696495056152344 init-act: [32.858 49.611 33.294 49.176]
2025-07-16 04:21:19,101 - ADV - DEBUG - episode: 152299/10000000000, score: 0, e: 0.01, reward: 48.9, seudo-rew:0.0 max: 48.888671875 init-act: [33.63  49.468 34.329 48.863]
DEBUG:ADV:episode: 152299/10000000000, score: 0, e: 0.01, reward: 48.9, seudo-rew:0.0 max: 48.888671875 init-act: [33.63  49.468 34.329 48.863]
2025-07-16 04:21

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-154000.keras


2025-07-16 04:24:15,067 - ADV - DEBUG - episode: 154099/10000000000, score: 0, e: 0.01, reward: 48.45, seudo-rew:0.0 max: 48.89518737792969 init-act: [33.123 49.891 32.926 49.863]
DEBUG:ADV:episode: 154099/10000000000, score: 0, e: 0.01, reward: 48.45, seudo-rew:0.0 max: 48.89518737792969 init-act: [33.123 49.891 32.926 49.863]
2025-07-16 04:24:24,774 - ADV - DEBUG - episode: 154199/10000000000, score: 0, e: 0.01, reward: 48.712, seudo-rew:0.0 max: 48.86296463012695 init-act: [33.42  49.684 33.147 49.395]
DEBUG:ADV:episode: 154199/10000000000, score: 0, e: 0.01, reward: 48.712, seudo-rew:0.0 max: 48.86296463012695 init-act: [33.42  49.684 33.147 49.395]
2025-07-16 04:24:34,674 - ADV - DEBUG - episode: 154299/10000000000, score: 0, e: 0.01, reward: 48.537, seudo-rew:0.0 max: 48.870357513427734 init-act: [33.38  49.606 33.091 49.427]
DEBUG:ADV:episode: 154299/10000000000, score: 0, e: 0.01, reward: 48.537, seudo-rew:0.0 max: 48.870357513427734 init-act: [33.38  49.606 33.091 49.427]
2025

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-156000.keras


2025-07-16 04:27:30,786 - ADV - DEBUG - episode: 156099/10000000000, score: 0, e: 0.01, reward: 48.481, seudo-rew:0.0 max: 49.38098907470703 init-act: [32.91  49.932 32.436 49.472]
DEBUG:ADV:episode: 156099/10000000000, score: 0, e: 0.01, reward: 48.481, seudo-rew:0.0 max: 49.38098907470703 init-act: [32.91  49.932 32.436 49.472]
2025-07-16 04:27:40,485 - ADV - DEBUG - episode: 156199/10000000000, score: 0, e: 0.01, reward: 48.82, seudo-rew:0.0 max: 49.47993469238281 init-act: [33.072 50.343 32.57  50.15 ]
DEBUG:ADV:episode: 156199/10000000000, score: 0, e: 0.01, reward: 48.82, seudo-rew:0.0 max: 49.47993469238281 init-act: [33.072 50.343 32.57  50.15 ]
2025-07-16 04:27:50,293 - ADV - DEBUG - episode: 156299/10000000000, score: 0, e: 0.01, reward: 48.754, seudo-rew:0.0 max: 49.53672790527344 init-act: [32.814 50.353 32.457 50.09 ]
DEBUG:ADV:episode: 156299/10000000000, score: 0, e: 0.01, reward: 48.754, seudo-rew:0.0 max: 49.53672790527344 init-act: [32.814 50.353 32.457 50.09 ]
2025-0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-158000.keras


2025-07-16 04:30:46,926 - ADV - DEBUG - episode: 158099/10000000000, score: 0, e: 0.01, reward: 48.368, seudo-rew:0.0 max: 49.2961540222168 init-act: [34.715 50.062 34.979 49.73 ]
DEBUG:ADV:episode: 158099/10000000000, score: 0, e: 0.01, reward: 48.368, seudo-rew:0.0 max: 49.2961540222168 init-act: [34.715 50.062 34.979 49.73 ]
2025-07-16 04:30:57,092 - ADV - DEBUG - episode: 158199/10000000000, score: 0, e: 0.01, reward: 48.357, seudo-rew:0.0 max: 49.26200485229492 init-act: [34.155 50.152 34.338 49.996]
DEBUG:ADV:episode: 158199/10000000000, score: 0, e: 0.01, reward: 48.357, seudo-rew:0.0 max: 49.26200485229492 init-act: [34.155 50.152 34.338 49.996]
2025-07-16 04:31:06,943 - ADV - DEBUG - episode: 158299/10000000000, score: 0, e: 0.01, reward: 48.446, seudo-rew:0.0 max: 49.45235061645508 init-act: [34.283 50.008 34.562 49.981]
DEBUG:ADV:episode: 158299/10000000000, score: 0, e: 0.01, reward: 48.446, seudo-rew:0.0 max: 49.45235061645508 init-act: [34.283 50.008 34.562 49.981]
2025-0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-160000.keras


2025-07-16 04:34:02,976 - ADV - DEBUG - episode: 160099/10000000000, score: 0, e: 0.01, reward: 48.476, seudo-rew:0.0 max: 49.28129196166992 init-act: [33.911 50.05  34.901 50.019]
DEBUG:ADV:episode: 160099/10000000000, score: 0, e: 0.01, reward: 48.476, seudo-rew:0.0 max: 49.28129196166992 init-act: [33.911 50.05  34.901 50.019]
2025-07-16 04:34:12,948 - ADV - DEBUG - episode: 160199/10000000000, score: 0, e: 0.01, reward: 48.465, seudo-rew:0.0 max: 49.35699462890625 init-act: [33.998 50.161 34.752 50.047]
DEBUG:ADV:episode: 160199/10000000000, score: 0, e: 0.01, reward: 48.465, seudo-rew:0.0 max: 49.35699462890625 init-act: [33.998 50.161 34.752 50.047]
2025-07-16 04:34:22,776 - ADV - DEBUG - episode: 160299/10000000000, score: 0, e: 0.01, reward: 48.425, seudo-rew:0.0 max: 49.33734893798828 init-act: [33.789 50.27  34.457 49.974]
DEBUG:ADV:episode: 160299/10000000000, score: 0, e: 0.01, reward: 48.425, seudo-rew:0.0 max: 49.33734893798828 init-act: [33.789 50.27  34.457 49.974]
2025

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-162000.keras


2025-07-16 04:37:18,763 - ADV - DEBUG - episode: 162099/10000000000, score: 0, e: 0.01, reward: 48.418, seudo-rew:0.0 max: 49.44882583618164 init-act: [34.471 50.137 34.006 49.672]
DEBUG:ADV:episode: 162099/10000000000, score: 0, e: 0.01, reward: 48.418, seudo-rew:0.0 max: 49.44882583618164 init-act: [34.471 50.137 34.006 49.672]
2025-07-16 04:37:28,492 - ADV - DEBUG - episode: 162199/10000000000, score: 0, e: 0.01, reward: 48.457, seudo-rew:0.0 max: 49.37778854370117 init-act: [34.52  50.289 33.893 49.847]
DEBUG:ADV:episode: 162199/10000000000, score: 0, e: 0.01, reward: 48.457, seudo-rew:0.0 max: 49.37778854370117 init-act: [34.52  50.289 33.893 49.847]
2025-07-16 04:37:38,173 - ADV - DEBUG - episode: 162299/10000000000, score: 0, e: 0.01, reward: 48.56, seudo-rew:0.0 max: 49.48311996459961 init-act: [34.1   50.469 33.856 50.067]
DEBUG:ADV:episode: 162299/10000000000, score: 0, e: 0.01, reward: 48.56, seudo-rew:0.0 max: 49.48311996459961 init-act: [34.1   50.469 33.856 50.067]
2025-0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-164000.keras


2025-07-16 04:40:35,908 - ADV - DEBUG - episode: 164099/10000000000, score: 0, e: 0.01, reward: 48.606, seudo-rew:0.0 max: 48.5722770690918 init-act: [33.344 49.269 32.731 49.114]
DEBUG:ADV:episode: 164099/10000000000, score: 0, e: 0.01, reward: 48.606, seudo-rew:0.0 max: 48.5722770690918 init-act: [33.344 49.269 32.731 49.114]
2025-07-16 04:40:45,668 - ADV - DEBUG - episode: 164199/10000000000, score: 0, e: 0.01, reward: 48.795, seudo-rew:0.0 max: 48.410194396972656 init-act: [33.442 49.371 32.682 49.156]
DEBUG:ADV:episode: 164199/10000000000, score: 0, e: 0.01, reward: 48.795, seudo-rew:0.0 max: 48.410194396972656 init-act: [33.442 49.371 32.682 49.156]
2025-07-16 04:40:55,679 - ADV - DEBUG - episode: 164299/10000000000, score: 0, e: 0.01, reward: 48.64, seudo-rew:0.0 max: 48.31358337402344 init-act: [33.289 49.169 32.502 48.824]
DEBUG:ADV:episode: 164299/10000000000, score: 0, e: 0.01, reward: 48.64, seudo-rew:0.0 max: 48.31358337402344 init-act: [33.289 49.169 32.502 48.824]
2025-0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-166000.keras


2025-07-16 04:43:51,837 - ADV - DEBUG - episode: 166099/10000000000, score: 0, e: 0.01, reward: 48.528, seudo-rew:0.0 max: 48.22645950317383 init-act: [32.332 49.004 31.758 48.613]
DEBUG:ADV:episode: 166099/10000000000, score: 0, e: 0.01, reward: 48.528, seudo-rew:0.0 max: 48.22645950317383 init-act: [32.332 49.004 31.758 48.613]
2025-07-16 04:44:01,593 - ADV - DEBUG - episode: 166199/10000000000, score: 0, e: 0.01, reward: 48.203, seudo-rew:0.0 max: 48.125938415527344 init-act: [32.791 49.113 32.24  48.77 ]
DEBUG:ADV:episode: 166199/10000000000, score: 0, e: 0.01, reward: 48.203, seudo-rew:0.0 max: 48.125938415527344 init-act: [32.791 49.113 32.24  48.77 ]
2025-07-16 04:44:11,313 - ADV - DEBUG - episode: 166299/10000000000, score: 0, e: 0.01, reward: 48.586, seudo-rew:0.0 max: 48.04130935668945 init-act: [33.032 48.695 32.654 48.407]
DEBUG:ADV:episode: 166299/10000000000, score: 0, e: 0.01, reward: 48.586, seudo-rew:0.0 max: 48.04130935668945 init-act: [33.032 48.695 32.654 48.407]
20

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-168000.keras


2025-07-16 04:47:08,509 - ADV - DEBUG - episode: 168099/10000000000, score: 0, e: 0.01, reward: 48.366, seudo-rew:0.0 max: 48.060245513916016 init-act: [33.447 49.066 33.017 48.855]
DEBUG:ADV:episode: 168099/10000000000, score: 0, e: 0.01, reward: 48.366, seudo-rew:0.0 max: 48.060245513916016 init-act: [33.447 49.066 33.017 48.855]
2025-07-16 04:47:18,328 - ADV - DEBUG - episode: 168199/10000000000, score: 0, e: 0.01, reward: 48.439, seudo-rew:0.0 max: 48.171592712402344 init-act: [32.904 49.071 32.603 48.915]
DEBUG:ADV:episode: 168199/10000000000, score: 0, e: 0.01, reward: 48.439, seudo-rew:0.0 max: 48.171592712402344 init-act: [32.904 49.071 32.603 48.915]
2025-07-16 04:47:28,243 - ADV - DEBUG - episode: 168299/10000000000, score: 0, e: 0.01, reward: 48.526, seudo-rew:0.0 max: 48.22213363647461 init-act: [33.047 49.286 32.538 48.993]
DEBUG:ADV:episode: 168299/10000000000, score: 0, e: 0.01, reward: 48.526, seudo-rew:0.0 max: 48.22213363647461 init-act: [33.047 49.286 32.538 48.993]


dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-170000.keras


2025-07-16 04:50:25,220 - ADV - DEBUG - episode: 170099/10000000000, score: 0, e: 0.01, reward: 48.824, seudo-rew:0.0 max: 49.4061164855957 init-act: [33.695 50.347 33.544 50.289]
DEBUG:ADV:episode: 170099/10000000000, score: 0, e: 0.01, reward: 48.824, seudo-rew:0.0 max: 49.4061164855957 init-act: [33.695 50.347 33.544 50.289]
2025-07-16 04:50:35,176 - ADV - DEBUG - episode: 170199/10000000000, score: 0, e: 0.01, reward: 48.344, seudo-rew:0.0 max: 49.33408737182617 init-act: [34.137 50.195 33.736 50.175]
DEBUG:ADV:episode: 170199/10000000000, score: 0, e: 0.01, reward: 48.344, seudo-rew:0.0 max: 49.33408737182617 init-act: [34.137 50.195 33.736 50.175]
2025-07-16 04:50:45,071 - ADV - DEBUG - episode: 170299/10000000000, score: 0, e: 0.01, reward: 48.417, seudo-rew:0.0 max: 49.47597122192383 init-act: [34.584 49.976 33.879 50.061]
DEBUG:ADV:episode: 170299/10000000000, score: 0, e: 0.01, reward: 48.417, seudo-rew:0.0 max: 49.47597122192383 init-act: [34.584 49.976 33.879 50.061]
2025-0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-172000.keras


2025-07-16 04:53:47,097 - ADV - DEBUG - episode: 172099/10000000000, score: 0, e: 0.01, reward: 48.515, seudo-rew:0.0 max: 49.39553451538086 init-act: [33.932 50.456 33.578 49.968]
DEBUG:ADV:episode: 172099/10000000000, score: 0, e: 0.01, reward: 48.515, seudo-rew:0.0 max: 49.39553451538086 init-act: [33.932 50.456 33.578 49.968]
2025-07-16 04:53:56,864 - ADV - DEBUG - episode: 172199/10000000000, score: 0, e: 0.01, reward: 48.673, seudo-rew:0.0 max: 49.307655334472656 init-act: [33.369 50.218 32.926 49.995]
DEBUG:ADV:episode: 172199/10000000000, score: 0, e: 0.01, reward: 48.673, seudo-rew:0.0 max: 49.307655334472656 init-act: [33.369 50.218 32.926 49.995]
2025-07-16 04:54:06,595 - ADV - DEBUG - episode: 172299/10000000000, score: 0, e: 0.01, reward: 48.579, seudo-rew:0.0 max: 49.4024543762207 init-act: [33.198 50.289 32.633 49.856]
DEBUG:ADV:episode: 172299/10000000000, score: 0, e: 0.01, reward: 48.579, seudo-rew:0.0 max: 49.4024543762207 init-act: [33.198 50.289 32.633 49.856]
2025

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-174000.keras


2025-07-16 04:57:04,021 - ADV - DEBUG - episode: 174099/10000000000, score: 0, e: 0.01, reward: 48.432, seudo-rew:0.0 max: 49.318450927734375 init-act: [32.957 50.082 32.416 49.773]
DEBUG:ADV:episode: 174099/10000000000, score: 0, e: 0.01, reward: 48.432, seudo-rew:0.0 max: 49.318450927734375 init-act: [32.957 50.082 32.416 49.773]
2025-07-16 04:57:13,730 - ADV - DEBUG - episode: 174199/10000000000, score: 0, e: 0.01, reward: 48.616, seudo-rew:0.0 max: 49.373504638671875 init-act: [32.626 50.007 32.086 49.783]
DEBUG:ADV:episode: 174199/10000000000, score: 0, e: 0.01, reward: 48.616, seudo-rew:0.0 max: 49.373504638671875 init-act: [32.626 50.007 32.086 49.783]
2025-07-16 04:57:23,549 - ADV - DEBUG - episode: 174299/10000000000, score: 0, e: 0.01, reward: 48.365, seudo-rew:0.0 max: 49.43484115600586 init-act: [32.877 50.234 32.477 49.914]
DEBUG:ADV:episode: 174299/10000000000, score: 0, e: 0.01, reward: 48.365, seudo-rew:0.0 max: 49.43484115600586 init-act: [32.877 50.234 32.477 49.914]


dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-176000.keras


2025-07-16 05:00:20,384 - ADV - DEBUG - episode: 176099/10000000000, score: 0, e: 0.01, reward: 48.591, seudo-rew:0.0 max: 49.40605926513672 init-act: [34.772 50.401 34.047 49.855]
DEBUG:ADV:episode: 176099/10000000000, score: 0, e: 0.01, reward: 48.591, seudo-rew:0.0 max: 49.40605926513672 init-act: [34.772 50.401 34.047 49.855]
2025-07-16 05:00:30,300 - ADV - DEBUG - episode: 176199/10000000000, score: 0, e: 0.01, reward: 48.185, seudo-rew:0.0 max: 49.429969787597656 init-act: [35.178 49.905 34.806 49.788]
DEBUG:ADV:episode: 176199/10000000000, score: 0, e: 0.01, reward: 48.185, seudo-rew:0.0 max: 49.429969787597656 init-act: [35.178 49.905 34.806 49.788]
2025-07-16 05:00:40,551 - ADV - DEBUG - episode: 176299/10000000000, score: 0, e: 0.01, reward: 48.405, seudo-rew:0.0 max: 49.48127746582031 init-act: [34.465 50.48  34.127 50.042]
DEBUG:ADV:episode: 176299/10000000000, score: 0, e: 0.01, reward: 48.405, seudo-rew:0.0 max: 49.48127746582031 init-act: [34.465 50.48  34.127 50.042]
20

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-178000.keras


2025-07-16 05:03:20,334 - ADV - DEBUG - episode: 178099/10000000000, score: 0, e: 0.01, reward: 48.778, seudo-rew:0.0 max: 49.50834274291992 init-act: [33.16  50.523 32.869 50.126]
DEBUG:ADV:episode: 178099/10000000000, score: 0, e: 0.01, reward: 48.778, seudo-rew:0.0 max: 49.50834274291992 init-act: [33.16  50.523 32.869 50.126]
2025-07-16 05:03:28,533 - ADV - DEBUG - episode: 178199/10000000000, score: 0, e: 0.01, reward: 48.453, seudo-rew:0.0 max: 49.39459991455078 init-act: [32.91  50.431 33.122 50.188]
DEBUG:ADV:episode: 178199/10000000000, score: 0, e: 0.01, reward: 48.453, seudo-rew:0.0 max: 49.39459991455078 init-act: [32.91  50.431 33.122 50.188]
2025-07-16 05:03:36,751 - ADV - DEBUG - episode: 178299/10000000000, score: 0, e: 0.01, reward: 48.704, seudo-rew:0.0 max: 49.482669830322266 init-act: [33.    50.397 33.464 50.3  ]
DEBUG:ADV:episode: 178299/10000000000, score: 0, e: 0.01, reward: 48.704, seudo-rew:0.0 max: 49.482669830322266 init-act: [33.    50.397 33.464 50.3  ]
20

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-180000.keras


2025-07-16 05:06:04,476 - ADV - DEBUG - episode: 180099/10000000000, score: 0, e: 0.01, reward: 48.551, seudo-rew:0.0 max: 48.334991455078125 init-act: [32.912 49.034 32.838 48.797]
DEBUG:ADV:episode: 180099/10000000000, score: 0, e: 0.01, reward: 48.551, seudo-rew:0.0 max: 48.334991455078125 init-act: [32.912 49.034 32.838 48.797]
2025-07-16 05:06:12,636 - ADV - DEBUG - episode: 180199/10000000000, score: 0, e: 0.01, reward: 48.129, seudo-rew:0.0 max: 48.53146743774414 init-act: [32.857 49.427 32.843 49.2  ]
DEBUG:ADV:episode: 180199/10000000000, score: 0, e: 0.01, reward: 48.129, seudo-rew:0.0 max: 48.53146743774414 init-act: [32.857 49.427 32.843 49.2  ]
2025-07-16 05:06:20,828 - ADV - DEBUG - episode: 180299/10000000000, score: 0, e: 0.01, reward: 48.515, seudo-rew:0.0 max: 48.351051330566406 init-act: [32.828 48.868 32.637 48.55 ]
DEBUG:ADV:episode: 180299/10000000000, score: 0, e: 0.01, reward: 48.515, seudo-rew:0.0 max: 48.351051330566406 init-act: [32.828 48.868 32.637 48.55 ]


dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-182000.keras


2025-07-16 05:08:55,730 - ADV - DEBUG - episode: 182099/10000000000, score: 0, e: 0.01, reward: 48.562, seudo-rew:0.0 max: 49.18011474609375 init-act: [32.529 49.823 32.304 49.841]
DEBUG:ADV:episode: 182099/10000000000, score: 0, e: 0.01, reward: 48.562, seudo-rew:0.0 max: 49.18011474609375 init-act: [32.529 49.823 32.304 49.841]
2025-07-16 05:09:03,846 - ADV - DEBUG - episode: 182199/10000000000, score: 0, e: 0.01, reward: 48.245, seudo-rew:0.0 max: 49.0943603515625 init-act: [32.404 49.859 32.227 49.648]
DEBUG:ADV:episode: 182199/10000000000, score: 0, e: 0.01, reward: 48.245, seudo-rew:0.0 max: 49.0943603515625 init-act: [32.404 49.859 32.227 49.648]
2025-07-16 05:09:12,050 - ADV - DEBUG - episode: 182299/10000000000, score: 0, e: 0.01, reward: 48.306, seudo-rew:0.0 max: 49.00736999511719 init-act: [32.483 49.966 32.12  49.704]
DEBUG:ADV:episode: 182299/10000000000, score: 0, e: 0.01, reward: 48.306, seudo-rew:0.0 max: 49.00736999511719 init-act: [32.483 49.966 32.12  49.704]
2025-0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-184000.keras


2025-07-16 05:11:40,169 - ADV - DEBUG - episode: 184099/10000000000, score: 0, e: 0.01, reward: 48.534, seudo-rew:0.0 max: 49.06644821166992 init-act: [33.381 49.961 32.827 49.837]
DEBUG:ADV:episode: 184099/10000000000, score: 0, e: 0.01, reward: 48.534, seudo-rew:0.0 max: 49.06644821166992 init-act: [33.381 49.961 32.827 49.837]
2025-07-16 05:11:48,419 - ADV - DEBUG - episode: 184199/10000000000, score: 0, e: 0.01, reward: 48.393, seudo-rew:0.0 max: 48.99953842163086 init-act: [33.047 49.852 32.984 49.64 ]
DEBUG:ADV:episode: 184199/10000000000, score: 0, e: 0.01, reward: 48.393, seudo-rew:0.0 max: 48.99953842163086 init-act: [33.047 49.852 32.984 49.64 ]
2025-07-16 05:11:56,605 - ADV - DEBUG - episode: 184299/10000000000, score: 0, e: 0.01, reward: 48.454, seudo-rew:0.0 max: 49.0969123840332 init-act: [33.276 50.235 32.785 49.776]
DEBUG:ADV:episode: 184299/10000000000, score: 0, e: 0.01, reward: 48.454, seudo-rew:0.0 max: 49.0969123840332 init-act: [33.276 50.235 32.785 49.776]
2025-0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-186000.keras


2025-07-16 05:14:24,418 - ADV - DEBUG - episode: 186099/10000000000, score: 0, e: 0.01, reward: 48.335, seudo-rew:0.0 max: 48.87942886352539 init-act: [32.953 49.825 32.852 49.693]
DEBUG:ADV:episode: 186099/10000000000, score: 0, e: 0.01, reward: 48.335, seudo-rew:0.0 max: 48.87942886352539 init-act: [32.953 49.825 32.852 49.693]
2025-07-16 05:14:32,640 - ADV - DEBUG - episode: 186199/10000000000, score: 0, e: 0.01, reward: 48.427, seudo-rew:0.0 max: 48.97315979003906 init-act: [33.073 49.368 32.868 49.469]
DEBUG:ADV:episode: 186199/10000000000, score: 0, e: 0.01, reward: 48.427, seudo-rew:0.0 max: 48.97315979003906 init-act: [33.073 49.368 32.868 49.469]
2025-07-16 05:14:40,867 - ADV - DEBUG - episode: 186299/10000000000, score: 0, e: 0.01, reward: 48.461, seudo-rew:0.0 max: 49.28984832763672 init-act: [33.182 50.282 33.541 49.927]
DEBUG:ADV:episode: 186299/10000000000, score: 0, e: 0.01, reward: 48.461, seudo-rew:0.0 max: 49.28984832763672 init-act: [33.182 50.282 33.541 49.927]
2025

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-188000.keras


2025-07-16 05:17:09,198 - ADV - DEBUG - episode: 188099/10000000000, score: 0, e: 0.01, reward: 48.541, seudo-rew:0.0 max: 49.36758804321289 init-act: [33.486 50.371 33.366 50.09 ]
DEBUG:ADV:episode: 188099/10000000000, score: 0, e: 0.01, reward: 48.541, seudo-rew:0.0 max: 49.36758804321289 init-act: [33.486 50.371 33.366 50.09 ]
2025-07-16 05:17:17,393 - ADV - DEBUG - episode: 188199/10000000000, score: 0, e: 0.01, reward: 48.0, seudo-rew:0.0 max: 49.38633728027344 init-act: [33.713 50.133 33.326 50.04 ]
DEBUG:ADV:episode: 188199/10000000000, score: 0, e: 0.01, reward: 48.0, seudo-rew:0.0 max: 49.38633728027344 init-act: [33.713 50.133 33.326 50.04 ]
2025-07-16 05:17:25,546 - ADV - DEBUG - episode: 188299/10000000000, score: 0, e: 0.01, reward: 48.338, seudo-rew:0.0 max: 49.31721496582031 init-act: [34.085 50.285 33.783 50.147]
DEBUG:ADV:episode: 188299/10000000000, score: 0, e: 0.01, reward: 48.338, seudo-rew:0.0 max: 49.31721496582031 init-act: [34.085 50.285 33.783 50.147]
2025-07-

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-190000.keras


2025-07-16 05:19:53,172 - ADV - DEBUG - episode: 190099/10000000000, score: 0, e: 0.01, reward: 48.431, seudo-rew:0.0 max: 48.71418380737305 init-act: [32.887 49.66  32.171 49.437]
DEBUG:ADV:episode: 190099/10000000000, score: 0, e: 0.01, reward: 48.431, seudo-rew:0.0 max: 48.71418380737305 init-act: [32.887 49.66  32.171 49.437]
2025-07-16 05:20:01,416 - ADV - DEBUG - episode: 190199/10000000000, score: 0, e: 0.01, reward: 48.612, seudo-rew:0.0 max: 48.99188232421875 init-act: [33.715 49.969 32.986 49.875]
DEBUG:ADV:episode: 190199/10000000000, score: 0, e: 0.01, reward: 48.612, seudo-rew:0.0 max: 48.99188232421875 init-act: [33.715 49.969 32.986 49.875]
2025-07-16 05:20:09,639 - ADV - DEBUG - episode: 190299/10000000000, score: 0, e: 0.01, reward: 48.71, seudo-rew:0.0 max: 48.94699478149414 init-act: [33.467 49.665 32.74  49.522]
DEBUG:ADV:episode: 190299/10000000000, score: 0, e: 0.01, reward: 48.71, seudo-rew:0.0 max: 48.94699478149414 init-act: [33.467 49.665 32.74  49.522]
2025-0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-192000.keras


2025-07-16 05:22:38,026 - ADV - DEBUG - episode: 192099/10000000000, score: 0, e: 0.01, reward: 48.29, seudo-rew:0.0 max: 49.39510726928711 init-act: [33.287 50.027 33.585 49.965]
DEBUG:ADV:episode: 192099/10000000000, score: 0, e: 0.01, reward: 48.29, seudo-rew:0.0 max: 49.39510726928711 init-act: [33.287 50.027 33.585 49.965]
2025-07-16 05:22:46,186 - ADV - DEBUG - episode: 192199/10000000000, score: 0, e: 0.01, reward: 48.004, seudo-rew:0.0 max: 49.43426513671875 init-act: [33.096 50.197 33.031 50.008]
DEBUG:ADV:episode: 192199/10000000000, score: 0, e: 0.01, reward: 48.004, seudo-rew:0.0 max: 49.43426513671875 init-act: [33.096 50.197 33.031 50.008]
2025-07-16 05:22:54,426 - ADV - DEBUG - episode: 192299/10000000000, score: 0, e: 0.01, reward: 48.389, seudo-rew:0.0 max: 49.682926177978516 init-act: [33.428 50.68  33.26  50.379]
DEBUG:ADV:episode: 192299/10000000000, score: 0, e: 0.01, reward: 48.389, seudo-rew:0.0 max: 49.682926177978516 init-act: [33.428 50.68  33.26  50.379]
2025

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-194000.keras


2025-07-16 05:25:21,947 - ADV - DEBUG - episode: 194099/10000000000, score: 0, e: 0.01, reward: 48.593, seudo-rew:0.0 max: 49.78020095825195 init-act: [32.631 50.512 32.285 50.235]
DEBUG:ADV:episode: 194099/10000000000, score: 0, e: 0.01, reward: 48.593, seudo-rew:0.0 max: 49.78020095825195 init-act: [32.631 50.512 32.285 50.235]
2025-07-16 05:25:30,144 - ADV - DEBUG - episode: 194199/10000000000, score: 0, e: 0.01, reward: 48.609, seudo-rew:0.0 max: 49.671390533447266 init-act: [32.424 50.694 32.299 50.106]
DEBUG:ADV:episode: 194199/10000000000, score: 0, e: 0.01, reward: 48.609, seudo-rew:0.0 max: 49.671390533447266 init-act: [32.424 50.694 32.299 50.106]
2025-07-16 05:25:38,316 - ADV - DEBUG - episode: 194299/10000000000, score: 0, e: 0.01, reward: 48.421, seudo-rew:0.0 max: 49.57649612426758 init-act: [32.709 50.501 32.644 50.374]
DEBUG:ADV:episode: 194299/10000000000, score: 0, e: 0.01, reward: 48.421, seudo-rew:0.0 max: 49.57649612426758 init-act: [32.709 50.501 32.644 50.374]
20

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-196000.keras


2025-07-16 05:28:06,826 - ADV - DEBUG - episode: 196099/10000000000, score: 0, e: 0.01, reward: 48.699, seudo-rew:0.0 max: 49.32615280151367 init-act: [33.326 50.05  33.076 49.805]
DEBUG:ADV:episode: 196099/10000000000, score: 0, e: 0.01, reward: 48.699, seudo-rew:0.0 max: 49.32615280151367 init-act: [33.326 50.05  33.076 49.805]
2025-07-16 05:28:15,052 - ADV - DEBUG - episode: 196199/10000000000, score: 0, e: 0.01, reward: 48.473, seudo-rew:0.0 max: 49.2574462890625 init-act: [33.092 49.742 33.16  49.648]
DEBUG:ADV:episode: 196199/10000000000, score: 0, e: 0.01, reward: 48.473, seudo-rew:0.0 max: 49.2574462890625 init-act: [33.092 49.742 33.16  49.648]
2025-07-16 05:28:23,285 - ADV - DEBUG - episode: 196299/10000000000, score: 0, e: 0.01, reward: 48.656, seudo-rew:0.0 max: 49.36082458496094 init-act: [32.523 50.322 32.199 50.226]
DEBUG:ADV:episode: 196299/10000000000, score: 0, e: 0.01, reward: 48.656, seudo-rew:0.0 max: 49.36082458496094 init-act: [32.523 50.322 32.199 50.226]
2025-0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-198000.keras


2025-07-16 05:30:51,342 - ADV - DEBUG - episode: 198099/10000000000, score: 0, e: 0.01, reward: 48.619, seudo-rew:0.0 max: 48.95562744140625 init-act: [33.791 49.807 33.179 49.423]
DEBUG:ADV:episode: 198099/10000000000, score: 0, e: 0.01, reward: 48.619, seudo-rew:0.0 max: 48.95562744140625 init-act: [33.791 49.807 33.179 49.423]
2025-07-16 05:30:59,617 - ADV - DEBUG - episode: 198199/10000000000, score: 0, e: 0.01, reward: 48.636, seudo-rew:0.0 max: 49.03593063354492 init-act: [33.734 50.004 33.389 49.779]
DEBUG:ADV:episode: 198199/10000000000, score: 0, e: 0.01, reward: 48.636, seudo-rew:0.0 max: 49.03593063354492 init-act: [33.734 50.004 33.389 49.779]
2025-07-16 05:31:07,861 - ADV - DEBUG - episode: 198299/10000000000, score: 0, e: 0.01, reward: 48.714, seudo-rew:0.0 max: 49.061119079589844 init-act: [33.411 49.843 33.18  49.685]
DEBUG:ADV:episode: 198299/10000000000, score: 0, e: 0.01, reward: 48.714, seudo-rew:0.0 max: 49.061119079589844 init-act: [33.411 49.843 33.18  49.685]
20

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-200000.keras


2025-07-16 05:33:36,379 - ADV - DEBUG - episode: 200099/10000000000, score: 0, e: 0.01, reward: 47.811, seudo-rew:0.0 max: 49.08259963989258 init-act: [33.863 49.836 35.215 49.692]
DEBUG:ADV:episode: 200099/10000000000, score: 0, e: 0.01, reward: 47.811, seudo-rew:0.0 max: 49.08259963989258 init-act: [33.863 49.836 35.215 49.692]
2025-07-16 05:33:44,618 - ADV - DEBUG - episode: 200199/10000000000, score: 0, e: 0.01, reward: 48.114, seudo-rew:0.0 max: 48.942039489746094 init-act: [33.    49.798 33.984 49.453]
DEBUG:ADV:episode: 200199/10000000000, score: 0, e: 0.01, reward: 48.114, seudo-rew:0.0 max: 48.942039489746094 init-act: [33.    49.798 33.984 49.453]
2025-07-16 05:33:52,854 - ADV - DEBUG - episode: 200299/10000000000, score: 0, e: 0.01, reward: 48.716, seudo-rew:0.0 max: 49.010623931884766 init-act: [32.719 49.836 33.471 49.586]
DEBUG:ADV:episode: 200299/10000000000, score: 0, e: 0.01, reward: 48.716, seudo-rew:0.0 max: 49.010623931884766 init-act: [32.719 49.836 33.471 49.586]


dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-202000.keras


2025-07-16 05:36:20,936 - ADV - DEBUG - episode: 202099/10000000000, score: 0, e: 0.01, reward: 48.468, seudo-rew:0.0 max: 49.06190490722656 init-act: [33.805 50.211 33.662 49.665]
DEBUG:ADV:episode: 202099/10000000000, score: 0, e: 0.01, reward: 48.468, seudo-rew:0.0 max: 49.06190490722656 init-act: [33.805 50.211 33.662 49.665]
2025-07-16 05:36:29,120 - ADV - DEBUG - episode: 202199/10000000000, score: 0, e: 0.01, reward: 48.398, seudo-rew:0.0 max: 49.10044860839844 init-act: [33.878 50.047 33.339 49.915]
DEBUG:ADV:episode: 202199/10000000000, score: 0, e: 0.01, reward: 48.398, seudo-rew:0.0 max: 49.10044860839844 init-act: [33.878 50.047 33.339 49.915]
2025-07-16 05:36:37,592 - ADV - DEBUG - episode: 202299/10000000000, score: 0, e: 0.01, reward: 48.454, seudo-rew:0.0 max: 49.190521240234375 init-act: [33.687 50.091 33.42  49.943]
DEBUG:ADV:episode: 202299/10000000000, score: 0, e: 0.01, reward: 48.454, seudo-rew:0.0 max: 49.190521240234375 init-act: [33.687 50.091 33.42  49.943]
20

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-204000.keras


2025-07-16 05:39:05,375 - ADV - DEBUG - episode: 204099/10000000000, score: 0, e: 0.01, reward: 48.281, seudo-rew:0.0 max: 49.14876174926758 init-act: [32.574 50.138 32.016 49.75 ]
DEBUG:ADV:episode: 204099/10000000000, score: 0, e: 0.01, reward: 48.281, seudo-rew:0.0 max: 49.14876174926758 init-act: [32.574 50.138 32.016 49.75 ]
2025-07-16 05:39:13,575 - ADV - DEBUG - episode: 204199/10000000000, score: 0, e: 0.01, reward: 48.365, seudo-rew:0.0 max: 49.0295524597168 init-act: [32.632 49.906 31.912 49.521]
DEBUG:ADV:episode: 204199/10000000000, score: 0, e: 0.01, reward: 48.365, seudo-rew:0.0 max: 49.0295524597168 init-act: [32.632 49.906 31.912 49.521]
2025-07-16 05:39:21,757 - ADV - DEBUG - episode: 204299/10000000000, score: 0, e: 0.01, reward: 48.325, seudo-rew:0.0 max: 49.20808792114258 init-act: [32.432 50.18  32.014 49.829]
DEBUG:ADV:episode: 204299/10000000000, score: 0, e: 0.01, reward: 48.325, seudo-rew:0.0 max: 49.20808792114258 init-act: [32.432 50.18  32.014 49.829]
2025-0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-206000.keras


2025-07-16 05:41:50,098 - ADV - DEBUG - episode: 206099/10000000000, score: 0, e: 0.01, reward: 47.272, seudo-rew:0.0 max: 49.012535095214844 init-act: [34.023 49.625 33.677 49.223]
DEBUG:ADV:episode: 206099/10000000000, score: 0, e: 0.01, reward: 47.272, seudo-rew:0.0 max: 49.012535095214844 init-act: [34.023 49.625 33.677 49.223]
2025-07-16 05:41:58,303 - ADV - DEBUG - episode: 206199/10000000000, score: 0, e: 0.01, reward: 48.877, seudo-rew:0.0 max: 49.01758575439453 init-act: [34.247 49.766 33.793 49.686]
DEBUG:ADV:episode: 206199/10000000000, score: 0, e: 0.01, reward: 48.877, seudo-rew:0.0 max: 49.01758575439453 init-act: [34.247 49.766 33.793 49.686]
2025-07-16 05:42:06,511 - ADV - DEBUG - episode: 206299/10000000000, score: 0, e: 0.01, reward: 48.645, seudo-rew:0.0 max: 48.982276916503906 init-act: [34.218 49.811 34.131 49.66 ]
DEBUG:ADV:episode: 206299/10000000000, score: 0, e: 0.01, reward: 48.645, seudo-rew:0.0 max: 48.982276916503906 init-act: [34.218 49.811 34.131 49.66 ]


dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-208000.keras


2025-07-16 05:44:34,430 - ADV - DEBUG - episode: 208099/10000000000, score: 0, e: 0.01, reward: 48.711, seudo-rew:0.0 max: 49.21669006347656 init-act: [33.007 50.031 32.51  49.689]
DEBUG:ADV:episode: 208099/10000000000, score: 0, e: 0.01, reward: 48.711, seudo-rew:0.0 max: 49.21669006347656 init-act: [33.007 50.031 32.51  49.689]
2025-07-16 05:44:42,675 - ADV - DEBUG - episode: 208199/10000000000, score: 0, e: 0.01, reward: 48.174, seudo-rew:0.0 max: 49.26333999633789 init-act: [34.451 50.22  33.744 49.835]
DEBUG:ADV:episode: 208199/10000000000, score: 0, e: 0.01, reward: 48.174, seudo-rew:0.0 max: 49.26333999633789 init-act: [34.451 50.22  33.744 49.835]
2025-07-16 05:44:50,860 - ADV - DEBUG - episode: 208299/10000000000, score: 0, e: 0.01, reward: 48.735, seudo-rew:0.0 max: 49.15749740600586 init-act: [33.346 50.097 32.824 49.789]
DEBUG:ADV:episode: 208299/10000000000, score: 0, e: 0.01, reward: 48.735, seudo-rew:0.0 max: 49.15749740600586 init-act: [33.346 50.097 32.824 49.789]
2025

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-210000.keras


2025-07-16 05:47:19,560 - ADV - DEBUG - episode: 210099/10000000000, score: 0, e: 0.01, reward: 48.709, seudo-rew:0.0 max: 49.14305877685547 init-act: [33.829 50.134 33.099 49.918]
DEBUG:ADV:episode: 210099/10000000000, score: 0, e: 0.01, reward: 48.709, seudo-rew:0.0 max: 49.14305877685547 init-act: [33.829 50.134 33.099 49.918]
2025-07-16 05:47:27,794 - ADV - DEBUG - episode: 210199/10000000000, score: 0, e: 0.01, reward: 48.6, seudo-rew:0.0 max: 49.22924041748047 init-act: [34.094 49.965 33.54  49.952]
DEBUG:ADV:episode: 210199/10000000000, score: 0, e: 0.01, reward: 48.6, seudo-rew:0.0 max: 49.22924041748047 init-act: [34.094 49.965 33.54  49.952]
2025-07-16 05:47:35,980 - ADV - DEBUG - episode: 210299/10000000000, score: 0, e: 0.01, reward: 48.205, seudo-rew:0.0 max: 49.239593505859375 init-act: [34.051 50.191 33.363 49.835]
DEBUG:ADV:episode: 210299/10000000000, score: 0, e: 0.01, reward: 48.205, seudo-rew:0.0 max: 49.239593505859375 init-act: [34.051 50.191 33.363 49.835]
2025-0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-212000.keras


2025-07-16 05:50:04,742 - ADV - DEBUG - episode: 212099/10000000000, score: 0, e: 0.01, reward: 48.565, seudo-rew:0.0 max: 49.28959274291992 init-act: [33.972 50.155 33.745 50.189]
DEBUG:ADV:episode: 212099/10000000000, score: 0, e: 0.01, reward: 48.565, seudo-rew:0.0 max: 49.28959274291992 init-act: [33.972 50.155 33.745 50.189]
2025-07-16 05:50:12,910 - ADV - DEBUG - episode: 212199/10000000000, score: 0, e: 0.01, reward: 48.34, seudo-rew:0.0 max: 49.3825569152832 init-act: [32.988 50.323 32.789 50.05 ]
DEBUG:ADV:episode: 212199/10000000000, score: 0, e: 0.01, reward: 48.34, seudo-rew:0.0 max: 49.3825569152832 init-act: [32.988 50.323 32.789 50.05 ]
2025-07-16 05:50:21,172 - ADV - DEBUG - episode: 212299/10000000000, score: 0, e: 0.01, reward: 48.423, seudo-rew:0.0 max: 49.317543029785156 init-act: [32.933 50.322 32.596 50.005]
DEBUG:ADV:episode: 212299/10000000000, score: 0, e: 0.01, reward: 48.423, seudo-rew:0.0 max: 49.317543029785156 init-act: [32.933 50.322 32.596 50.005]
2025-0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-214000.keras


2025-07-16 05:52:49,450 - ADV - DEBUG - episode: 214099/10000000000, score: 0, e: 0.01, reward: 48.919, seudo-rew:0.0 max: 49.56193923950195 init-act: [32.232 50.105 31.877 49.917]
DEBUG:ADV:episode: 214099/10000000000, score: 0, e: 0.01, reward: 48.919, seudo-rew:0.0 max: 49.56193923950195 init-act: [32.232 50.105 31.877 49.917]
2025-07-16 05:52:57,637 - ADV - DEBUG - episode: 214199/10000000000, score: 0, e: 0.01, reward: 48.601, seudo-rew:0.0 max: 49.66022872924805 init-act: [32.715 50.548 32.406 50.397]
DEBUG:ADV:episode: 214199/10000000000, score: 0, e: 0.01, reward: 48.601, seudo-rew:0.0 max: 49.66022872924805 init-act: [32.715 50.548 32.406 50.397]
2025-07-16 05:53:05,903 - ADV - DEBUG - episode: 214299/10000000000, score: 0, e: 0.01, reward: 48.761, seudo-rew:0.0 max: 49.68338394165039 init-act: [33.207 50.652 32.689 50.465]
DEBUG:ADV:episode: 214299/10000000000, score: 0, e: 0.01, reward: 48.761, seudo-rew:0.0 max: 49.68338394165039 init-act: [33.207 50.652 32.689 50.465]
2025

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-216000.keras


2025-07-16 05:55:33,919 - ADV - DEBUG - episode: 216099/10000000000, score: 0, e: 0.01, reward: 48.565, seudo-rew:0.0 max: 49.72124481201172 init-act: [33.229 50.691 32.897 50.2  ]
DEBUG:ADV:episode: 216099/10000000000, score: 0, e: 0.01, reward: 48.565, seudo-rew:0.0 max: 49.72124481201172 init-act: [33.229 50.691 32.897 50.2  ]
2025-07-16 05:55:42,183 - ADV - DEBUG - episode: 216199/10000000000, score: 0, e: 0.01, reward: 48.151, seudo-rew:0.0 max: 49.62729263305664 init-act: [33.613 50.457 33.442 50.432]
DEBUG:ADV:episode: 216199/10000000000, score: 0, e: 0.01, reward: 48.151, seudo-rew:0.0 max: 49.62729263305664 init-act: [33.613 50.457 33.442 50.432]
2025-07-16 05:55:50,533 - ADV - DEBUG - episode: 216299/10000000000, score: 0, e: 0.01, reward: 48.665, seudo-rew:0.0 max: 49.78261184692383 init-act: [33.524 50.54  32.875 50.595]
DEBUG:ADV:episode: 216299/10000000000, score: 0, e: 0.01, reward: 48.665, seudo-rew:0.0 max: 49.78261184692383 init-act: [33.524 50.54  32.875 50.595]
2025

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-218000.keras


2025-07-16 05:58:18,579 - ADV - DEBUG - episode: 218099/10000000000, score: 0, e: 0.01, reward: 48.694, seudo-rew:0.0 max: 49.66677474975586 init-act: [34.492 50.405 35.072 50.066]
DEBUG:ADV:episode: 218099/10000000000, score: 0, e: 0.01, reward: 48.694, seudo-rew:0.0 max: 49.66677474975586 init-act: [34.492 50.405 35.072 50.066]
2025-07-16 05:58:26,835 - ADV - DEBUG - episode: 218199/10000000000, score: 0, e: 0.01, reward: 48.336, seudo-rew:0.0 max: 49.7943000793457 init-act: [34.188 50.613 35.221 50.176]
DEBUG:ADV:episode: 218199/10000000000, score: 0, e: 0.01, reward: 48.336, seudo-rew:0.0 max: 49.7943000793457 init-act: [34.188 50.613 35.221 50.176]
2025-07-16 05:58:35,042 - ADV - DEBUG - episode: 218299/10000000000, score: 0, e: 0.01, reward: 48.749, seudo-rew:0.0 max: 49.562042236328125 init-act: [33.717 50.316 34.746 50.292]
DEBUG:ADV:episode: 218299/10000000000, score: 0, e: 0.01, reward: 48.749, seudo-rew:0.0 max: 49.562042236328125 init-act: [33.717 50.316 34.746 50.292]
2025

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-220000.keras


2025-07-16 06:01:03,845 - ADV - DEBUG - episode: 220099/10000000000, score: 0, e: 0.01, reward: 48.863, seudo-rew:0.0 max: 49.129859924316406 init-act: [32.328 49.915 32.246 49.617]
DEBUG:ADV:episode: 220099/10000000000, score: 0, e: 0.01, reward: 48.863, seudo-rew:0.0 max: 49.129859924316406 init-act: [32.328 49.915 32.246 49.617]
2025-07-16 06:01:12,205 - ADV - DEBUG - episode: 220199/10000000000, score: 0, e: 0.01, reward: 48.483, seudo-rew:0.0 max: 49.04850769042969 init-act: [32.916 49.975 32.742 49.71 ]
DEBUG:ADV:episode: 220199/10000000000, score: 0, e: 0.01, reward: 48.483, seudo-rew:0.0 max: 49.04850769042969 init-act: [32.916 49.975 32.742 49.71 ]
2025-07-16 06:01:20,496 - ADV - DEBUG - episode: 220299/10000000000, score: 0, e: 0.01, reward: 48.611, seudo-rew:0.0 max: 49.00780487060547 init-act: [32.875 49.718 32.553 49.468]
DEBUG:ADV:episode: 220299/10000000000, score: 0, e: 0.01, reward: 48.611, seudo-rew:0.0 max: 49.00780487060547 init-act: [32.875 49.718 32.553 49.468]
20

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-222000.keras


2025-07-16 06:03:48,021 - ADV - DEBUG - episode: 222099/10000000000, score: 0, e: 0.01, reward: 48.399, seudo-rew:0.0 max: 48.59602355957031 init-act: [33.656 49.249 33.555 49.044]
DEBUG:ADV:episode: 222099/10000000000, score: 0, e: 0.01, reward: 48.399, seudo-rew:0.0 max: 48.59602355957031 init-act: [33.656 49.249 33.555 49.044]
2025-07-16 06:03:56,232 - ADV - DEBUG - episode: 222199/10000000000, score: 0, e: 0.01, reward: 47.831, seudo-rew:0.0 max: 48.53212356567383 init-act: [33.868 49.404 33.713 49.035]
DEBUG:ADV:episode: 222199/10000000000, score: 0, e: 0.01, reward: 47.831, seudo-rew:0.0 max: 48.53212356567383 init-act: [33.868 49.404 33.713 49.035]
2025-07-16 06:04:04,497 - ADV - DEBUG - episode: 222299/10000000000, score: 0, e: 0.01, reward: 48.486, seudo-rew:0.0 max: 48.515541076660156 init-act: [33.909 49.46  33.625 49.026]
DEBUG:ADV:episode: 222299/10000000000, score: 0, e: 0.01, reward: 48.486, seudo-rew:0.0 max: 48.515541076660156 init-act: [33.909 49.46  33.625 49.026]
20

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-224000.keras


2025-07-16 06:06:32,560 - ADV - DEBUG - episode: 224099/10000000000, score: 0, e: 0.01, reward: 48.217, seudo-rew:0.0 max: 49.20705795288086 init-act: [32.92  49.901 32.825 49.597]
DEBUG:ADV:episode: 224099/10000000000, score: 0, e: 0.01, reward: 48.217, seudo-rew:0.0 max: 49.20705795288086 init-act: [32.92  49.901 32.825 49.597]
2025-07-16 06:06:40,872 - ADV - DEBUG - episode: 224199/10000000000, score: 0, e: 0.01, reward: 48.591, seudo-rew:0.0 max: 49.10881423950195 init-act: [33.514 50.046 33.271 49.823]
DEBUG:ADV:episode: 224199/10000000000, score: 0, e: 0.01, reward: 48.591, seudo-rew:0.0 max: 49.10881423950195 init-act: [33.514 50.046 33.271 49.823]
2025-07-16 06:06:49,002 - ADV - DEBUG - episode: 224299/10000000000, score: 0, e: 0.01, reward: 48.618, seudo-rew:0.0 max: 49.53319549560547 init-act: [33.098 50.109 32.998 49.887]
DEBUG:ADV:episode: 224299/10000000000, score: 0, e: 0.01, reward: 48.618, seudo-rew:0.0 max: 49.53319549560547 init-act: [33.098 50.109 32.998 49.887]
2025

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-226000.keras


2025-07-16 06:09:16,554 - ADV - DEBUG - episode: 226099/10000000000, score: 0, e: 0.01, reward: 48.644, seudo-rew:0.0 max: 49.25614547729492 init-act: [32.561 49.938 32.279 49.614]
DEBUG:ADV:episode: 226099/10000000000, score: 0, e: 0.01, reward: 48.644, seudo-rew:0.0 max: 49.25614547729492 init-act: [32.561 49.938 32.279 49.614]
2025-07-16 06:09:24,750 - ADV - DEBUG - episode: 226199/10000000000, score: 0, e: 0.01, reward: 48.476, seudo-rew:0.0 max: 49.16114044189453 init-act: [32.653 49.986 32.19  49.672]
DEBUG:ADV:episode: 226199/10000000000, score: 0, e: 0.01, reward: 48.476, seudo-rew:0.0 max: 49.16114044189453 init-act: [32.653 49.986 32.19  49.672]
2025-07-16 06:09:32,928 - ADV - DEBUG - episode: 226299/10000000000, score: 0, e: 0.01, reward: 48.825, seudo-rew:0.0 max: 49.22942352294922 init-act: [32.838 50.134 32.2   49.831]
DEBUG:ADV:episode: 226299/10000000000, score: 0, e: 0.01, reward: 48.825, seudo-rew:0.0 max: 49.22942352294922 init-act: [32.838 50.134 32.2   49.831]
2025

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-228000.keras


2025-07-16 06:12:01,440 - ADV - DEBUG - episode: 228099/10000000000, score: 0, e: 0.01, reward: 48.293, seudo-rew:0.0 max: 49.76042175292969 init-act: [33.92  50.53  33.238 49.993]
DEBUG:ADV:episode: 228099/10000000000, score: 0, e: 0.01, reward: 48.293, seudo-rew:0.0 max: 49.76042175292969 init-act: [33.92  50.53  33.238 49.993]
2025-07-16 06:12:09,617 - ADV - DEBUG - episode: 228199/10000000000, score: 0, e: 0.01, reward: 48.27, seudo-rew:0.0 max: 49.75524139404297 init-act: [34.82  50.356 33.824 50.078]
DEBUG:ADV:episode: 228199/10000000000, score: 0, e: 0.01, reward: 48.27, seudo-rew:0.0 max: 49.75524139404297 init-act: [34.82  50.356 33.824 50.078]
2025-07-16 06:12:17,742 - ADV - DEBUG - episode: 228299/10000000000, score: 0, e: 0.01, reward: 48.266, seudo-rew:0.0 max: 49.724754333496094 init-act: [34.285 50.685 33.323 50.511]
DEBUG:ADV:episode: 228299/10000000000, score: 0, e: 0.01, reward: 48.266, seudo-rew:0.0 max: 49.724754333496094 init-act: [34.285 50.685 33.323 50.511]
2025

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-230000.keras


2025-07-16 06:14:44,877 - ADV - DEBUG - episode: 230099/10000000000, score: 0, e: 0.01, reward: 48.463, seudo-rew:0.0 max: 49.792179107666016 init-act: [33.217 50.503 32.724 50.457]
DEBUG:ADV:episode: 230099/10000000000, score: 0, e: 0.01, reward: 48.463, seudo-rew:0.0 max: 49.792179107666016 init-act: [33.217 50.503 32.724 50.457]
2025-07-16 06:14:52,987 - ADV - DEBUG - episode: 230199/10000000000, score: 0, e: 0.01, reward: 48.598, seudo-rew:0.0 max: 49.56609344482422 init-act: [33.154 50.559 32.992 50.246]
DEBUG:ADV:episode: 230199/10000000000, score: 0, e: 0.01, reward: 48.598, seudo-rew:0.0 max: 49.56609344482422 init-act: [33.154 50.559 32.992 50.246]
2025-07-16 06:15:01,177 - ADV - DEBUG - episode: 230299/10000000000, score: 0, e: 0.01, reward: 48.553, seudo-rew:0.0 max: 49.53578186035156 init-act: [33.078 50.179 32.715 49.961]
DEBUG:ADV:episode: 230299/10000000000, score: 0, e: 0.01, reward: 48.553, seudo-rew:0.0 max: 49.53578186035156 init-act: [33.078 50.179 32.715 49.961]
20

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-232000.keras


2025-07-16 06:17:30,016 - ADV - DEBUG - episode: 232099/10000000000, score: 0, e: 0.01, reward: 48.42, seudo-rew:0.0 max: 49.485694885253906 init-act: [34.324 50.151 34.213 49.964]
DEBUG:ADV:episode: 232099/10000000000, score: 0, e: 0.01, reward: 48.42, seudo-rew:0.0 max: 49.485694885253906 init-act: [34.324 50.151 34.213 49.964]
2025-07-16 06:17:38,183 - ADV - DEBUG - episode: 232199/10000000000, score: 0, e: 0.01, reward: 48.687, seudo-rew:0.0 max: 49.50177001953125 init-act: [34.01  50.402 33.683 50.163]
DEBUG:ADV:episode: 232199/10000000000, score: 0, e: 0.01, reward: 48.687, seudo-rew:0.0 max: 49.50177001953125 init-act: [34.01  50.402 33.683 50.163]
2025-07-16 06:17:46,378 - ADV - DEBUG - episode: 232299/10000000000, score: 0, e: 0.01, reward: 48.448, seudo-rew:0.0 max: 49.348533630371094 init-act: [33.937 50.227 33.625 50.047]
DEBUG:ADV:episode: 232299/10000000000, score: 0, e: 0.01, reward: 48.448, seudo-rew:0.0 max: 49.348533630371094 init-act: [33.937 50.227 33.625 50.047]
20

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-234000.keras


2025-07-16 06:20:13,712 - ADV - DEBUG - episode: 234099/10000000000, score: 0, e: 0.01, reward: 48.757, seudo-rew:0.0 max: 49.68583297729492 init-act: [31.99  50.352 32.086 49.663]
DEBUG:ADV:episode: 234099/10000000000, score: 0, e: 0.01, reward: 48.757, seudo-rew:0.0 max: 49.68583297729492 init-act: [31.99  50.352 32.086 49.663]
2025-07-16 06:20:21,909 - ADV - DEBUG - episode: 234199/10000000000, score: 0, e: 0.01, reward: 48.454, seudo-rew:0.0 max: 49.68217468261719 init-act: [31.816 50.708 31.561 50.388]
DEBUG:ADV:episode: 234199/10000000000, score: 0, e: 0.01, reward: 48.454, seudo-rew:0.0 max: 49.68217468261719 init-act: [31.816 50.708 31.561 50.388]
2025-07-16 06:20:30,091 - ADV - DEBUG - episode: 234299/10000000000, score: 0, e: 0.01, reward: 48.625, seudo-rew:0.0 max: 49.6071662902832 init-act: [32.32  50.382 32.532 50.133]
DEBUG:ADV:episode: 234299/10000000000, score: 0, e: 0.01, reward: 48.625, seudo-rew:0.0 max: 49.6071662902832 init-act: [32.32  50.382 32.532 50.133]
2025-0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-236000.keras


2025-07-16 06:23:02,745 - ADV - DEBUG - episode: 236099/10000000000, score: 0, e: 0.01, reward: 48.455, seudo-rew:0.0 max: 49.454566955566406 init-act: [35.597 50.261 36.213 50.22 ]
DEBUG:ADV:episode: 236099/10000000000, score: 0, e: 0.01, reward: 48.455, seudo-rew:0.0 max: 49.454566955566406 init-act: [35.597 50.261 36.213 50.22 ]
2025-07-16 06:23:10,947 - ADV - DEBUG - episode: 236199/10000000000, score: 0, e: 0.01, reward: 48.007, seudo-rew:0.0 max: 49.6795768737793 init-act: [33.546 50.489 34.025 50.361]
DEBUG:ADV:episode: 236199/10000000000, score: 0, e: 0.01, reward: 48.007, seudo-rew:0.0 max: 49.6795768737793 init-act: [33.546 50.489 34.025 50.361]
2025-07-16 06:23:19,183 - ADV - DEBUG - episode: 236299/10000000000, score: 0, e: 0.01, reward: 48.237, seudo-rew:0.0 max: 49.329952239990234 init-act: [33.916 49.621 34.553 49.621]
DEBUG:ADV:episode: 236299/10000000000, score: 0, e: 0.01, reward: 48.237, seudo-rew:0.0 max: 49.329952239990234 init-act: [33.916 49.621 34.553 49.621]
20

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-238000.keras


2025-07-16 06:25:46,885 - ADV - DEBUG - episode: 238099/10000000000, score: 0, e: 0.01, reward: 48.732, seudo-rew:0.0 max: 48.78145217895508 init-act: [32.098 49.61  31.922 49.513]
DEBUG:ADV:episode: 238099/10000000000, score: 0, e: 0.01, reward: 48.732, seudo-rew:0.0 max: 48.78145217895508 init-act: [32.098 49.61  31.922 49.513]
2025-07-16 06:25:55,126 - ADV - DEBUG - episode: 238199/10000000000, score: 0, e: 0.01, reward: 48.27, seudo-rew:0.0 max: 48.86508560180664 init-act: [32.118 49.618 31.949 49.324]
DEBUG:ADV:episode: 238199/10000000000, score: 0, e: 0.01, reward: 48.27, seudo-rew:0.0 max: 48.86508560180664 init-act: [32.118 49.618 31.949 49.324]
2025-07-16 06:26:03,387 - ADV - DEBUG - episode: 238299/10000000000, score: 0, e: 0.01, reward: 48.529, seudo-rew:0.0 max: 48.87235641479492 init-act: [31.651 49.606 31.811 49.504]
DEBUG:ADV:episode: 238299/10000000000, score: 0, e: 0.01, reward: 48.529, seudo-rew:0.0 max: 48.87235641479492 init-act: [31.651 49.606 31.811 49.504]
2025-0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-240000.keras


2025-07-16 06:28:31,272 - ADV - DEBUG - episode: 240099/10000000000, score: 0, e: 0.01, reward: 48.662, seudo-rew:0.0 max: 48.417884826660156 init-act: [32.541 49.361 31.613 48.851]
DEBUG:ADV:episode: 240099/10000000000, score: 0, e: 0.01, reward: 48.662, seudo-rew:0.0 max: 48.417884826660156 init-act: [32.541 49.361 31.613 48.851]
2025-07-16 06:28:39,509 - ADV - DEBUG - episode: 240199/10000000000, score: 0, e: 0.01, reward: 48.5, seudo-rew:0.0 max: 48.31633758544922 init-act: [31.953 49.095 31.518 48.988]
DEBUG:ADV:episode: 240199/10000000000, score: 0, e: 0.01, reward: 48.5, seudo-rew:0.0 max: 48.31633758544922 init-act: [31.953 49.095 31.518 48.988]
2025-07-16 06:28:47,674 - ADV - DEBUG - episode: 240299/10000000000, score: 0, e: 0.01, reward: 48.48, seudo-rew:0.0 max: 48.35555648803711 init-act: [32.045 49.36  32.064 48.937]
DEBUG:ADV:episode: 240299/10000000000, score: 0, e: 0.01, reward: 48.48, seudo-rew:0.0 max: 48.35555648803711 init-act: [32.045 49.36  32.064 48.937]
2025-07-

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-242000.keras


2025-07-16 06:31:15,747 - ADV - DEBUG - episode: 242099/10000000000, score: 0, e: 0.01, reward: 48.328, seudo-rew:0.0 max: 48.86609649658203 init-act: [32.731 49.389 32.734 49.382]
DEBUG:ADV:episode: 242099/10000000000, score: 0, e: 0.01, reward: 48.328, seudo-rew:0.0 max: 48.86609649658203 init-act: [32.731 49.389 32.734 49.382]
2025-07-16 06:31:24,047 - ADV - DEBUG - episode: 242199/10000000000, score: 0, e: 0.01, reward: 48.616, seudo-rew:0.0 max: 48.8846321105957 init-act: [33.314 49.882 33.293 49.466]
DEBUG:ADV:episode: 242199/10000000000, score: 0, e: 0.01, reward: 48.616, seudo-rew:0.0 max: 48.8846321105957 init-act: [33.314 49.882 33.293 49.466]
2025-07-16 06:31:32,231 - ADV - DEBUG - episode: 242299/10000000000, score: 0, e: 0.01, reward: 48.51, seudo-rew:0.0 max: 48.869136810302734 init-act: [33.537 49.688 33.549 49.399]
DEBUG:ADV:episode: 242299/10000000000, score: 0, e: 0.01, reward: 48.51, seudo-rew:0.0 max: 48.869136810302734 init-act: [33.537 49.688 33.549 49.399]
2025-0

dir crearted here -- C:\Practicum\Practicum_ Adversarial Decision-making of LLMs-20250513T162528Z-1-001\Final_HN\Final_HN\code_H\trained_model\test_adv_RL_400000_eps_0.01_lr_0.001\gpt\model-244000.keras


2025-07-16 06:34:00,718 - ADV - DEBUG - episode: 244099/10000000000, score: 0, e: 0.01, reward: 48.065, seudo-rew:0.0 max: 49.439048767089844 init-act: [33.394 50.37  33.605 50.465]
DEBUG:ADV:episode: 244099/10000000000, score: 0, e: 0.01, reward: 48.065, seudo-rew:0.0 max: 49.439048767089844 init-act: [33.394 50.37  33.605 50.465]
2025-07-16 06:34:08,916 - ADV - DEBUG - episode: 244199/10000000000, score: 0, e: 0.01, reward: 48.312, seudo-rew:0.0 max: 49.052635192871094 init-act: [31.906 49.776 31.897 49.585]
DEBUG:ADV:episode: 244199/10000000000, score: 0, e: 0.01, reward: 48.312, seudo-rew:0.0 max: 49.052635192871094 init-act: [31.906 49.776 31.897 49.585]
2025-07-16 06:34:17,157 - ADV - DEBUG - episode: 244299/10000000000, score: 0, e: 0.01, reward: 48.586, seudo-rew:0.0 max: 49.23897933959961 init-act: [32.1   49.775 32.029 49.703]
DEBUG:ADV:episode: 244299/10000000000, score: 0, e: 0.01, reward: 48.586, seudo-rew:0.0 max: 49.23897933959961 init-act: [32.1   49.775 32.029 49.703]


KeyboardInterrupt: 

In [1]:
import os
import json
import h5py
import numpy as np
from tensorflow.keras.models import load_model, Model
from learner_env import LearnverEnv
from ddqn import DQNAgent
from util.logger import LogFile, DLogger

# Utility: fix Keras HDF5 compatibility
def load_h5_model_patch_input_layer(path):
    with h5py.File(path, 'r+') as f:
        model_config = f.attrs.get('model_config')
        if model_config is None:
            raise ValueError("No model config found in file.")
        if isinstance(model_config, bytes):
            model_config = model_config.decode('utf-8')
        model_config_json = json.loads(model_config)
        for layer in model_config_json['config']['layers']:
            config = layer['config']
            if 'batch_shape' in config:
                config['batch_input_shape'] = config.pop('batch_shape')
        f.attrs.modify('model_config', json.dumps(model_config_json).encode('utf-8'))
    return load_model(path, compile=False, custom_objects={'Functional': Model})

# Training function for one config
def train_adv_model(buf, lr, eps, learner_name="gpt", total_episodes=50000):
    np.set_printoptions(precision=3)

    # Path to learner model (RNN policy)
    learner_model_path = f'trained_model/RNN_learner_single/cells_5{learner_name}/model-49900.weights_final.h5'
    if not os.path.exists(learner_model_path):
        print(f"⚠ Learner model not found: {learner_model_path}")
        return

    # Output path
    output_path = os.path.join(
        'trained_model',
        f'test_adv_RL_{buf}eps{eps}lr{lr}',
        learner_name
    )
    os.makedirs(output_path, exist_ok=True)
    print(f'📦 Training: buf={buf}, lr={lr}, eps={eps} → {output_path}')

    with LogFile(output_path, 'run.log'):
        DLogger.logger().debug("Loading learner model from {}".format(learner_model_path))
        model = load_h5_model_patch_input_layer(learner_model_path)

        env = LearnverEnv(model, 2, 1000)
        env.reset()

        agent = DQNAgent(
            state_size=env.observation_space.shape[0],
            action_size=4,
            buffer_size=buf,
            epsilon=eps,
            lr=lr
        )

        agent.train(
            env=env,
            output_path=output_path,
            batch_size=1000,
            total_episodes=total_episodes
        )

# ------------------------------
# 🔧 Hyperparameter Grid
# ------------------------------
buffers = [400000]
learning_rates = [0.001, 0.0001, 1e-5]
epsilons = [0.01, 0.1, 0.2]

# Choose learner: 'gpt', 'gemini', etc.
learner_name = "gpt"  # or "gemini"
episodes_per_run = 50000  # reduced for tuning

# Run grid search
for buf in buffers:
    for lr in learning_rates:
        for eps in epsilons:
            print(buf, lr, eps)
            train_adv_model(buf, lr, eps, learner_name, episodes_per_run)

400000 0.001 0.01
📦 Training: buf=400000, lr=0.001, eps=0.01 → trained_model\test_adv_RL_400000eps0.01lr0.001\gpt


2025-07-17 23:15:15,819 - ADV - DEBUG - version control: (None, None)
2025-07-17 23:15:15,822 - ADV - DEBUG - Loading learner model from trained_model/RNN_learner_single/cells_5gpt/model-49900.weights_final.h5
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-07-17 23:15:16,447 - ADV - DEBUG - Trained model saved to: trained_model\test_adv_RL_400000eps0.01lr0.001\gpt\model-0.keras


dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.001\gpt\model-0.keras


2025-07-17 23:15:23,694 - ADV - DEBUG - episode: 99/50000, score: 0, e: 0.01, reward: 19.884, seudo-rew:0.0 max: 0.9324346780776978 init-act: [-0.281  0.32   1.233  0.153]
2025-07-17 23:15:32,250 - ADV - DEBUG - episode: 199/50000, score: 0, e: 0.01, reward: 20.012, seudo-rew:0.0 max: 1.0522441864013672 init-act: [0.379 0.828 1.361 0.768]
2025-07-17 23:15:39,891 - ADV - DEBUG - episode: 299/50000, score: 0, e: 0.01, reward: 21.851, seudo-rew:0.0 max: 1.3724719285964966 init-act: [0.97  1.402 1.531 1.542]
2025-07-17 23:15:47,143 - ADV - DEBUG - episode: 399/50000, score: 0, e: 0.01, reward: 28.907, seudo-rew:0.0 max: 2.0750691890716553 init-act: [1.658 1.973 1.76  2.936]
2025-07-17 23:15:58,134 - ADV - DEBUG - episode: 499/50000, score: 0, e: 0.01, reward: 35.852, seudo-rew:0.0 max: 3.334939479827881 init-act: [2.69  3.061 2.007 3.932]
2025-07-17 23:16:08,564 - ADV - DEBUG - episode: 599/50000, score: 0, e: 0.01, reward: 35.249, seudo-rew:0.0 max: 4.302211284637451 init-act: [3.508 3.98

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.001\gpt\model-2000.keras


2025-07-17 23:18:46,256 - ADV - DEBUG - episode: 2099/50000, score: 0, e: 0.01, reward: 39.99, seudo-rew:0.0 max: 20.044692993164062 init-act: [12.395 17.506 13.154 21.022]
2025-07-17 23:18:55,466 - ADV - DEBUG - episode: 2199/50000, score: 0, e: 0.01, reward: 39.34, seudo-rew:0.0 max: 20.967355728149414 init-act: [12.983 18.25  14.231 21.924]
2025-07-17 23:19:03,733 - ADV - DEBUG - episode: 2299/50000, score: 0, e: 0.01, reward: 41.468, seudo-rew:0.0 max: 21.92399024963379 init-act: [13.859 19.438 15.38  22.93 ]
2025-07-17 23:19:12,000 - ADV - DEBUG - episode: 2399/50000, score: 0, e: 0.01, reward: 41.97, seudo-rew:0.0 max: 22.826431274414062 init-act: [14.369 20.101 16.529 23.778]
2025-07-17 23:19:20,169 - ADV - DEBUG - episode: 2499/50000, score: 0, e: 0.01, reward: 40.939, seudo-rew:0.0 max: 23.6070499420166 init-act: [14.876 21.096 16.625 24.568]
2025-07-17 23:19:28,282 - ADV - DEBUG - episode: 2599/50000, score: 0, e: 0.01, reward: 40.517, seudo-rew:0.0 max: 24.421985626220703 in

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.001\gpt\model-4000.keras


2025-07-17 23:21:31,960 - ADV - DEBUG - episode: 4099/50000, score: 0, e: 0.01, reward: 41.644, seudo-rew:0.0 max: 34.242156982421875 init-act: [22.305 34.32  23.186 35.25 ]
2025-07-17 23:21:40,196 - ADV - DEBUG - episode: 4199/50000, score: 0, e: 0.01, reward: 42.252, seudo-rew:0.0 max: 34.49198532104492 init-act: [22.358 34.867 23.418 35.43 ]
2025-07-17 23:21:48,412 - ADV - DEBUG - episode: 4299/50000, score: 0, e: 0.01, reward: 41.043, seudo-rew:0.0 max: 34.760223388671875 init-act: [23.148 35.131 23.289 35.654]
2025-07-17 23:21:56,536 - ADV - DEBUG - episode: 4399/50000, score: 0, e: 0.01, reward: 42.366, seudo-rew:0.0 max: 35.021934509277344 init-act: [23.691 35.559 23.713 36.043]
2025-07-17 23:22:05,086 - ADV - DEBUG - episode: 4499/50000, score: 0, e: 0.01, reward: 42.762, seudo-rew:0.0 max: 35.306732177734375 init-act: [23.547 36.109 23.699 36.401]
2025-07-17 23:22:13,348 - ADV - DEBUG - episode: 4599/50000, score: 0, e: 0.01, reward: 42.543, seudo-rew:0.0 max: 35.6059379577636

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.001\gpt\model-6000.keras


2025-07-17 23:24:25,432 - ADV - DEBUG - episode: 6099/50000, score: 0, e: 0.01, reward: 43.09, seudo-rew:0.0 max: 38.41801834106445 init-act: [29.279 39.069 28.603 39.166]
2025-07-17 23:24:33,697 - ADV - DEBUG - episode: 6199/50000, score: 0, e: 0.01, reward: 43.028, seudo-rew:0.0 max: 38.4499626159668 init-act: [28.92  38.76  28.991 39.16 ]
2025-07-17 23:24:41,944 - ADV - DEBUG - episode: 6299/50000, score: 0, e: 0.01, reward: 43.625, seudo-rew:0.0 max: 38.6476936340332 init-act: [29.09  39.209 29.305 39.301]
2025-07-17 23:24:50,115 - ADV - DEBUG - episode: 6399/50000, score: 0, e: 0.01, reward: 43.533, seudo-rew:0.0 max: 38.38881301879883 init-act: [29.028 39.372 29.297 39.269]
2025-07-17 23:24:58,246 - ADV - DEBUG - episode: 6499/50000, score: 0, e: 0.01, reward: 43.065, seudo-rew:0.0 max: 38.76958084106445 init-act: [29.024 39.904 29.09  39.586]
2025-07-17 23:25:06,477 - ADV - DEBUG - episode: 6599/50000, score: 0, e: 0.01, reward: 43.455, seudo-rew:0.0 max: 38.9602165222168 init-a

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.001\gpt\model-8000.keras


2025-07-17 23:27:17,248 - ADV - DEBUG - episode: 8099/50000, score: 0, e: 0.01, reward: 44.228, seudo-rew:0.0 max: 41.088741302490234 init-act: [29.48  41.961 29.529 41.82 ]
2025-07-17 23:27:25,443 - ADV - DEBUG - episode: 8199/50000, score: 0, e: 0.01, reward: 44.217, seudo-rew:0.0 max: 41.473838806152344 init-act: [29.831 42.004 29.619 41.891]
2025-07-17 23:27:33,781 - ADV - DEBUG - episode: 8299/50000, score: 0, e: 0.01, reward: 44.24, seudo-rew:0.0 max: 41.65438461303711 init-act: [30.403 42.482 30.167 42.229]
2025-07-17 23:27:42,061 - ADV - DEBUG - episode: 8399/50000, score: 0, e: 0.01, reward: 43.44, seudo-rew:0.0 max: 41.93798065185547 init-act: [30.612 42.75  30.387 42.557]
2025-07-17 23:27:50,353 - ADV - DEBUG - episode: 8499/50000, score: 0, e: 0.01, reward: 43.934, seudo-rew:0.0 max: 42.17569351196289 init-act: [30.49  43.109 30.143 42.925]
2025-07-17 23:27:58,661 - ADV - DEBUG - episode: 8599/50000, score: 0, e: 0.01, reward: 43.866, seudo-rew:0.0 max: 42.34109878540039 in

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.001\gpt\model-10000.keras


2025-07-17 23:30:02,095 - ADV - DEBUG - episode: 10099/50000, score: 0, e: 0.01, reward: 44.047, seudo-rew:0.0 max: 44.14265441894531 init-act: [29.605 44.171 30.959 44.603]
2025-07-17 23:30:10,270 - ADV - DEBUG - episode: 10199/50000, score: 0, e: 0.01, reward: 44.216, seudo-rew:0.0 max: 44.25813674926758 init-act: [29.988 45.279 31.441 45.01 ]
2025-07-17 23:30:18,576 - ADV - DEBUG - episode: 10299/50000, score: 0, e: 0.01, reward: 44.204, seudo-rew:0.0 max: 44.310176849365234 init-act: [30.059 45.086 31.244 45.016]
2025-07-17 23:30:26,877 - ADV - DEBUG - episode: 10399/50000, score: 0, e: 0.01, reward: 44.237, seudo-rew:0.0 max: 44.33881759643555 init-act: [30.033 45.429 31.285 45.164]
2025-07-17 23:30:35,083 - ADV - DEBUG - episode: 10499/50000, score: 0, e: 0.01, reward: 44.017, seudo-rew:0.0 max: 44.3734016418457 init-act: [29.701 45.161 30.67  45.422]
2025-07-17 23:30:43,404 - ADV - DEBUG - episode: 10599/50000, score: 0, e: 0.01, reward: 44.397, seudo-rew:0.0 max: 44.67582702636

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.001\gpt\model-12000.keras


2025-07-17 23:32:47,103 - ADV - DEBUG - episode: 12099/50000, score: 0, e: 0.01, reward: 44.405, seudo-rew:0.0 max: 45.759979248046875 init-act: [31.066 46.647 30.82  46.606]
2025-07-17 23:32:55,291 - ADV - DEBUG - episode: 12199/50000, score: 0, e: 0.01, reward: 44.299, seudo-rew:0.0 max: 45.67156219482422 init-act: [30.566 46.148 30.553 46.149]
2025-07-17 23:33:03,438 - ADV - DEBUG - episode: 12299/50000, score: 0, e: 0.01, reward: 44.158, seudo-rew:0.0 max: 46.03373336791992 init-act: [30.512 46.947 30.591 46.871]
2025-07-17 23:33:11,756 - ADV - DEBUG - episode: 12399/50000, score: 0, e: 0.01, reward: 44.57, seudo-rew:0.0 max: 45.892059326171875 init-act: [30.666 46.787 30.58  46.808]
2025-07-17 23:33:19,911 - ADV - DEBUG - episode: 12499/50000, score: 0, e: 0.01, reward: 44.49, seudo-rew:0.0 max: 45.886051177978516 init-act: [30.626 46.74  30.397 46.694]
2025-07-17 23:33:28,059 - ADV - DEBUG - episode: 12599/50000, score: 0, e: 0.01, reward: 44.658, seudo-rew:0.0 max: 45.8734664916

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.001\gpt\model-14000.keras


2025-07-17 23:35:31,174 - ADV - DEBUG - episode: 14099/50000, score: 0, e: 0.01, reward: 44.762, seudo-rew:0.0 max: 45.6954460144043 init-act: [29.968 46.477 29.666 46.202]
2025-07-17 23:35:39,439 - ADV - DEBUG - episode: 14199/50000, score: 0, e: 0.01, reward: 45.019, seudo-rew:0.0 max: 45.631019592285156 init-act: [29.942 46.347 30.242 46.077]
2025-07-17 23:35:47,861 - ADV - DEBUG - episode: 14299/50000, score: 0, e: 0.01, reward: 45.206, seudo-rew:0.0 max: 45.52671432495117 init-act: [29.888 46.076 29.672 45.887]
2025-07-17 23:35:56,131 - ADV - DEBUG - episode: 14399/50000, score: 0, e: 0.01, reward: 44.969, seudo-rew:0.0 max: 45.490516662597656 init-act: [29.876 46.318 29.715 46.148]
2025-07-17 23:36:04,344 - ADV - DEBUG - episode: 14499/50000, score: 0, e: 0.01, reward: 44.991, seudo-rew:0.0 max: 45.7000732421875 init-act: [29.82  46.56  29.975 46.484]
2025-07-17 23:36:12,547 - ADV - DEBUG - episode: 14599/50000, score: 0, e: 0.01, reward: 45.003, seudo-rew:0.0 max: 45.67469024658

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.001\gpt\model-16000.keras


2025-07-17 23:38:16,052 - ADV - DEBUG - episode: 16099/50000, score: 0, e: 0.01, reward: 44.87, seudo-rew:0.0 max: 45.729087829589844 init-act: [30.615 46.393 31.258 46.86 ]
2025-07-17 23:38:24,210 - ADV - DEBUG - episode: 16199/50000, score: 0, e: 0.01, reward: 45.104, seudo-rew:0.0 max: 46.00761032104492 init-act: [30.583 46.915 31.346 46.687]
2025-07-17 23:38:32,478 - ADV - DEBUG - episode: 16299/50000, score: 0, e: 0.01, reward: 45.294, seudo-rew:0.0 max: 46.190181732177734 init-act: [30.675 46.473 31.201 46.883]
2025-07-17 23:38:40,707 - ADV - DEBUG - episode: 16399/50000, score: 0, e: 0.01, reward: 44.853, seudo-rew:0.0 max: 46.26774597167969 init-act: [31.264 46.961 31.311 46.921]
2025-07-17 23:38:48,930 - ADV - DEBUG - episode: 16499/50000, score: 0, e: 0.01, reward: 45.035, seudo-rew:0.0 max: 46.326255798339844 init-act: [30.993 47.06  31.155 46.918]
2025-07-17 23:38:57,118 - ADV - DEBUG - episode: 16599/50000, score: 0, e: 0.01, reward: 45.193, seudo-rew:0.0 max: 46.183727264

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.001\gpt\model-18000.keras


2025-07-17 23:41:00,358 - ADV - DEBUG - episode: 18099/50000, score: 0, e: 0.01, reward: 44.85, seudo-rew:0.0 max: 46.50429916381836 init-act: [30.428 47.504 30.645 47.491]
2025-07-17 23:41:08,614 - ADV - DEBUG - episode: 18199/50000, score: 0, e: 0.01, reward: 45.344, seudo-rew:0.0 max: 46.51142120361328 init-act: [30.75  47.265 30.533 47.147]
2025-07-17 23:41:16,823 - ADV - DEBUG - episode: 18299/50000, score: 0, e: 0.01, reward: 45.514, seudo-rew:0.0 max: 46.3773307800293 init-act: [30.314 46.824 30.145 46.917]
2025-07-17 23:41:25,024 - ADV - DEBUG - episode: 18399/50000, score: 0, e: 0.01, reward: 45.698, seudo-rew:0.0 max: 46.43687438964844 init-act: [30.752 47.039 30.445 47.083]
2025-07-17 23:41:33,186 - ADV - DEBUG - episode: 18499/50000, score: 0, e: 0.01, reward: 45.566, seudo-rew:0.0 max: 46.43220520019531 init-act: [30.874 47.051 30.473 47.063]
2025-07-17 23:41:41,375 - ADV - DEBUG - episode: 18599/50000, score: 0, e: 0.01, reward: 45.203, seudo-rew:0.0 max: 46.4015274047851

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.001\gpt\model-20000.keras


2025-07-17 23:43:44,363 - ADV - DEBUG - episode: 20099/50000, score: 0, e: 0.01, reward: 45.395, seudo-rew:0.0 max: 46.8270149230957 init-act: [30.782 47.867 31.322 47.56 ]
2025-07-17 23:43:52,558 - ADV - DEBUG - episode: 20199/50000, score: 0, e: 0.01, reward: 45.274, seudo-rew:0.0 max: 46.73775100708008 init-act: [30.812 47.363 31.274 47.386]
2025-07-17 23:44:00,727 - ADV - DEBUG - episode: 20299/50000, score: 0, e: 0.01, reward: 45.527, seudo-rew:0.0 max: 46.83397674560547 init-act: [31.541 47.173 32.846 47.374]
2025-07-17 23:44:08,932 - ADV - DEBUG - episode: 20399/50000, score: 0, e: 0.01, reward: 44.935, seudo-rew:0.0 max: 47.35879898071289 init-act: [31.774 48.301 32.914 48.153]
2025-07-17 23:44:17,103 - ADV - DEBUG - episode: 20499/50000, score: 0, e: 0.01, reward: 44.404, seudo-rew:0.0 max: 46.97361373901367 init-act: [31.744 47.703 32.461 47.804]
2025-07-17 23:44:25,337 - ADV - DEBUG - episode: 20599/50000, score: 0, e: 0.01, reward: 45.76, seudo-rew:0.0 max: 47.2010726928710

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.001\gpt\model-22000.keras


2025-07-17 23:46:29,044 - ADV - DEBUG - episode: 22099/50000, score: 0, e: 0.01, reward: 45.141, seudo-rew:0.0 max: 47.22231674194336 init-act: [32.178 48.183 33.268 47.938]
2025-07-17 23:46:37,255 - ADV - DEBUG - episode: 22199/50000, score: 0, e: 0.01, reward: 45.749, seudo-rew:0.0 max: 47.233978271484375 init-act: [32.275 48.234 33.387 47.999]
2025-07-17 23:46:45,458 - ADV - DEBUG - episode: 22299/50000, score: 0, e: 0.01, reward: 45.677, seudo-rew:0.0 max: 47.34918212890625 init-act: [31.933 48.344 33.136 48.214]
2025-07-17 23:46:53,652 - ADV - DEBUG - episode: 22399/50000, score: 0, e: 0.01, reward: 45.24, seudo-rew:0.0 max: 47.42889404296875 init-act: [31.709 48.22  32.502 48.155]
2025-07-17 23:47:01,810 - ADV - DEBUG - episode: 22499/50000, score: 0, e: 0.01, reward: 45.268, seudo-rew:0.0 max: 47.54298782348633 init-act: [31.57  48.361 32.228 48.41 ]
2025-07-17 23:47:10,035 - ADV - DEBUG - episode: 22599/50000, score: 0, e: 0.01, reward: 45.639, seudo-rew:0.0 max: 47.79908752441

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.001\gpt\model-24000.keras


2025-07-17 23:49:14,650 - ADV - DEBUG - episode: 24099/50000, score: 0, e: 0.01, reward: 45.508, seudo-rew:0.0 max: 47.814308166503906 init-act: [30.474 48.448 30.081 48.441]
2025-07-17 23:49:22,852 - ADV - DEBUG - episode: 24199/50000, score: 0, e: 0.01, reward: 45.519, seudo-rew:0.0 max: 47.69748306274414 init-act: [31.908 48.137 30.949 48.411]
2025-07-17 23:49:31,018 - ADV - DEBUG - episode: 24299/50000, score: 0, e: 0.01, reward: 45.502, seudo-rew:0.0 max: 47.97880554199219 init-act: [31.247 48.176 30.51  48.567]
2025-07-17 23:49:39,313 - ADV - DEBUG - episode: 24399/50000, score: 0, e: 0.01, reward: 45.318, seudo-rew:0.0 max: 47.350162506103516 init-act: [31.617 47.926 30.573 48.207]
2025-07-17 23:49:47,527 - ADV - DEBUG - episode: 24499/50000, score: 0, e: 0.01, reward: 45.775, seudo-rew:0.0 max: 47.26530075073242 init-act: [31.434 48.003 30.168 48.193]
2025-07-17 23:49:55,735 - ADV - DEBUG - episode: 24599/50000, score: 0, e: 0.01, reward: 45.737, seudo-rew:0.0 max: 47.182220458

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.001\gpt\model-26000.keras


2025-07-17 23:51:59,044 - ADV - DEBUG - episode: 26099/50000, score: 0, e: 0.01, reward: 45.969, seudo-rew:0.0 max: 46.99992370605469 init-act: [29.758 47.737 29.658 47.712]
2025-07-17 23:52:07,248 - ADV - DEBUG - episode: 26199/50000, score: 0, e: 0.01, reward: 45.645, seudo-rew:0.0 max: 46.981788635253906 init-act: [29.579 47.698 29.309 47.535]
2025-07-17 23:52:15,442 - ADV - DEBUG - episode: 26299/50000, score: 0, e: 0.01, reward: 45.681, seudo-rew:0.0 max: 47.05668640136719 init-act: [29.569 47.868 29.297 47.922]
2025-07-17 23:52:23,745 - ADV - DEBUG - episode: 26399/50000, score: 0, e: 0.01, reward: 44.796, seudo-rew:0.0 max: 47.040931701660156 init-act: [29.857 47.781 29.426 47.899]
2025-07-17 23:52:31,956 - ADV - DEBUG - episode: 26499/50000, score: 0, e: 0.01, reward: 45.917, seudo-rew:0.0 max: 47.19127655029297 init-act: [29.752 48.083 29.365 47.858]
2025-07-17 23:52:40,195 - ADV - DEBUG - episode: 26599/50000, score: 0, e: 0.01, reward: 45.766, seudo-rew:0.0 max: 46.730758666

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.001\gpt\model-28000.keras


2025-07-17 23:54:43,861 - ADV - DEBUG - episode: 28099/50000, score: 0, e: 0.01, reward: 45.822, seudo-rew:0.0 max: 47.090667724609375 init-act: [30.338 47.296 30.027 47.789]
2025-07-17 23:54:52,040 - ADV - DEBUG - episode: 28199/50000, score: 0, e: 0.01, reward: 45.847, seudo-rew:0.0 max: 46.87095642089844 init-act: [30.407 47.317 29.871 47.55 ]
2025-07-17 23:55:00,251 - ADV - DEBUG - episode: 28299/50000, score: 0, e: 0.01, reward: 46.028, seudo-rew:0.0 max: 46.76293182373047 init-act: [29.91  47.401 29.85  47.729]
2025-07-17 23:55:08,462 - ADV - DEBUG - episode: 28399/50000, score: 0, e: 0.01, reward: 45.702, seudo-rew:0.0 max: 46.82819747924805 init-act: [29.854 47.475 29.713 47.761]
2025-07-17 23:55:16,875 - ADV - DEBUG - episode: 28499/50000, score: 0, e: 0.01, reward: 45.754, seudo-rew:0.0 max: 47.09264373779297 init-act: [29.996 47.763 29.527 48.105]
2025-07-17 23:55:25,083 - ADV - DEBUG - episode: 28599/50000, score: 0, e: 0.01, reward: 46.313, seudo-rew:0.0 max: 46.8642425537

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.001\gpt\model-30000.keras


2025-07-17 23:57:28,602 - ADV - DEBUG - episode: 30099/50000, score: 0, e: 0.01, reward: 45.619, seudo-rew:0.0 max: 47.163047790527344 init-act: [30.957 47.599 29.855 47.677]
2025-07-17 23:57:36,833 - ADV - DEBUG - episode: 30199/50000, score: 0, e: 0.01, reward: 46.043, seudo-rew:0.0 max: 47.33472442626953 init-act: [30.969 47.679 30.004 47.911]
2025-07-17 23:57:45,021 - ADV - DEBUG - episode: 30299/50000, score: 0, e: 0.01, reward: 46.143, seudo-rew:0.0 max: 47.136775970458984 init-act: [31.051 48.032 30.459 48.146]
2025-07-17 23:57:53,211 - ADV - DEBUG - episode: 30399/50000, score: 0, e: 0.01, reward: 45.818, seudo-rew:0.0 max: 47.06222915649414 init-act: [30.614 47.953 29.928 47.766]
2025-07-17 23:58:01,433 - ADV - DEBUG - episode: 30499/50000, score: 0, e: 0.01, reward: 45.166, seudo-rew:0.0 max: 47.44135284423828 init-act: [30.445 48.275 29.83  48.222]
2025-07-17 23:58:09,600 - ADV - DEBUG - episode: 30599/50000, score: 0, e: 0.01, reward: 46.068, seudo-rew:0.0 max: 47.332538604

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.001\gpt\model-32000.keras


2025-07-18 00:00:14,357 - ADV - DEBUG - episode: 32099/50000, score: 0, e: 0.01, reward: 46.271, seudo-rew:0.0 max: 47.1133918762207 init-act: [31.088 48.036 31.596 47.913]
2025-07-18 00:00:22,589 - ADV - DEBUG - episode: 32199/50000, score: 0, e: 0.01, reward: 46.18, seudo-rew:0.0 max: 47.25011444091797 init-act: [30.916 48.163 31.239 47.996]
2025-07-18 00:00:30,780 - ADV - DEBUG - episode: 32299/50000, score: 0, e: 0.01, reward: 46.019, seudo-rew:0.0 max: 47.4001579284668 init-act: [30.312 48.5   30.662 48.335]
2025-07-18 00:00:38,957 - ADV - DEBUG - episode: 32399/50000, score: 0, e: 0.01, reward: 45.827, seudo-rew:0.0 max: 47.352291107177734 init-act: [30.285 48.043 30.498 48.114]
2025-07-18 00:00:47,296 - ADV - DEBUG - episode: 32499/50000, score: 0, e: 0.01, reward: 45.743, seudo-rew:0.0 max: 47.28093338012695 init-act: [29.785 47.91  29.693 47.732]
2025-07-18 00:00:56,835 - ADV - DEBUG - episode: 32599/50000, score: 0, e: 0.01, reward: 46.147, seudo-rew:0.0 max: 47.2285194396972

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.001\gpt\model-34000.keras


2025-07-18 00:03:09,856 - ADV - DEBUG - episode: 34099/50000, score: 0, e: 0.01, reward: 46.272, seudo-rew:0.0 max: 46.682716369628906 init-act: [32.018 47.39  32.043 47.34 ]
2025-07-18 00:03:19,817 - ADV - DEBUG - episode: 34199/50000, score: 0, e: 0.01, reward: 45.969, seudo-rew:0.0 max: 46.218624114990234 init-act: [30.854 46.654 30.492 46.837]
2025-07-18 00:03:29,304 - ADV - DEBUG - episode: 34299/50000, score: 0, e: 0.01, reward: 46.273, seudo-rew:0.0 max: 46.38633346557617 init-act: [30.377 47.225 30.051 47.234]
2025-07-18 00:03:38,949 - ADV - DEBUG - episode: 34399/50000, score: 0, e: 0.01, reward: 46.659, seudo-rew:0.0 max: 46.26519775390625 init-act: [29.737 46.718 29.401 46.717]
2025-07-18 00:03:48,440 - ADV - DEBUG - episode: 34499/50000, score: 0, e: 0.01, reward: 46.464, seudo-rew:0.0 max: 45.99215316772461 init-act: [29.703 46.41  29.479 46.711]
2025-07-18 00:03:57,953 - ADV - DEBUG - episode: 34599/50000, score: 0, e: 0.01, reward: 46.844, seudo-rew:0.0 max: 46.075389862

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.001\gpt\model-36000.keras


2025-07-18 00:06:20,304 - ADV - DEBUG - episode: 36099/50000, score: 0, e: 0.01, reward: 46.675, seudo-rew:0.0 max: 45.85948181152344 init-act: [29.639 46.967 29.545 46.821]
2025-07-18 00:06:29,888 - ADV - DEBUG - episode: 36199/50000, score: 0, e: 0.01, reward: 46.765, seudo-rew:0.0 max: 46.09593200683594 init-act: [30.065 47.114 29.733 46.848]
2025-07-18 00:06:39,633 - ADV - DEBUG - episode: 36299/50000, score: 0, e: 0.01, reward: 46.047, seudo-rew:0.0 max: 46.324485778808594 init-act: [29.947 46.999 29.801 46.893]
2025-07-18 00:06:49,662 - ADV - DEBUG - episode: 36399/50000, score: 0, e: 0.01, reward: 46.535, seudo-rew:0.0 max: 46.01899719238281 init-act: [29.946 46.799 29.612 46.751]
2025-07-18 00:06:58,189 - ADV - DEBUG - episode: 36499/50000, score: 0, e: 0.01, reward: 46.569, seudo-rew:0.0 max: 46.288143157958984 init-act: [29.656 46.823 29.721 46.793]
2025-07-18 00:07:06,410 - ADV - DEBUG - episode: 36599/50000, score: 0, e: 0.01, reward: 46.449, seudo-rew:0.0 max: 46.030456542

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.001\gpt\model-38000.keras


2025-07-18 00:09:11,558 - ADV - DEBUG - episode: 38099/50000, score: 0, e: 0.01, reward: 45.948, seudo-rew:0.0 max: 46.55143356323242 init-act: [29.121 46.921 29.047 47.617]
2025-07-18 00:09:19,727 - ADV - DEBUG - episode: 38199/50000, score: 0, e: 0.01, reward: 46.081, seudo-rew:0.0 max: 46.46979904174805 init-act: [28.887 46.819 29.18  47.199]
2025-07-18 00:09:27,972 - ADV - DEBUG - episode: 38299/50000, score: 0, e: 0.01, reward: 46.017, seudo-rew:0.0 max: 47.08452224731445 init-act: [29.182 47.278 29.141 47.96 ]
2025-07-18 00:09:36,200 - ADV - DEBUG - episode: 38399/50000, score: 0, e: 0.01, reward: 45.908, seudo-rew:0.0 max: 46.56046676635742 init-act: [29.081 47.082 28.838 47.414]
2025-07-18 00:09:44,380 - ADV - DEBUG - episode: 38499/50000, score: 0, e: 0.01, reward: 46.658, seudo-rew:0.0 max: 46.73162841796875 init-act: [29.291 47.224 28.966 47.767]
2025-07-18 00:09:52,635 - ADV - DEBUG - episode: 38599/50000, score: 0, e: 0.01, reward: 46.265, seudo-rew:0.0 max: 46.52989578247

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.001\gpt\model-40000.keras


2025-07-18 00:12:14,453 - ADV - DEBUG - episode: 40099/50000, score: 0, e: 0.01, reward: 46.566, seudo-rew:0.0 max: 47.716583251953125 init-act: [29.439 48.009 29.197 48.406]
2025-07-18 00:12:24,863 - ADV - DEBUG - episode: 40199/50000, score: 0, e: 0.01, reward: 46.801, seudo-rew:0.0 max: 47.591251373291016 init-act: [29.373 48.358 29.844 48.292]
2025-07-18 00:12:35,780 - ADV - DEBUG - episode: 40299/50000, score: 0, e: 0.01, reward: 45.59, seudo-rew:0.0 max: 48.15089797973633 init-act: [30.85  48.638 31.272 48.715]
2025-07-18 00:12:45,040 - ADV - DEBUG - episode: 40399/50000, score: 0, e: 0.01, reward: 46.645, seudo-rew:0.0 max: 47.78507614135742 init-act: [30.352 48.86  30.963 48.786]
2025-07-18 00:12:53,321 - ADV - DEBUG - episode: 40499/50000, score: 0, e: 0.01, reward: 46.689, seudo-rew:0.0 max: 48.052486419677734 init-act: [29.863 48.811 30.654 48.599]
2025-07-18 00:13:01,582 - ADV - DEBUG - episode: 40599/50000, score: 0, e: 0.01, reward: 46.606, seudo-rew:0.0 max: 48.109657287

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.001\gpt\model-42000.keras


2025-07-18 00:15:09,881 - ADV - DEBUG - episode: 42099/50000, score: 0, e: 0.01, reward: 46.558, seudo-rew:0.0 max: 48.06078338623047 init-act: [28.451 48.442 29.572 48.824]
2025-07-18 00:15:20,789 - ADV - DEBUG - episode: 42199/50000, score: 0, e: 0.01, reward: 47.045, seudo-rew:0.0 max: 48.01130294799805 init-act: [28.006 48.727 29.841 48.841]
2025-07-18 00:15:31,081 - ADV - DEBUG - episode: 42299/50000, score: 0, e: 0.01, reward: 46.267, seudo-rew:0.0 max: 48.25851058959961 init-act: [27.867 48.84  29.071 49.144]
2025-07-18 00:15:42,171 - ADV - DEBUG - episode: 42399/50000, score: 0, e: 0.01, reward: 46.485, seudo-rew:0.0 max: 47.69330596923828 init-act: [27.704 48.444 28.643 48.488]
2025-07-18 00:15:51,559 - ADV - DEBUG - episode: 42499/50000, score: 0, e: 0.01, reward: 47.069, seudo-rew:0.0 max: 47.580196380615234 init-act: [27.644 48.499 28.692 48.637]
2025-07-18 00:15:59,950 - ADV - DEBUG - episode: 42599/50000, score: 0, e: 0.01, reward: 46.81, seudo-rew:0.0 max: 47.45568466186

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.001\gpt\model-44000.keras


2025-07-18 00:18:05,079 - ADV - DEBUG - episode: 44099/50000, score: 0, e: 0.01, reward: 47.092, seudo-rew:0.0 max: 47.94308853149414 init-act: [28.547 48.267 28.656 48.218]
2025-07-18 00:18:13,329 - ADV - DEBUG - episode: 44199/50000, score: 0, e: 0.01, reward: 46.749, seudo-rew:0.0 max: 47.71205520629883 init-act: [28.49  48.461 28.465 48.301]
2025-07-18 00:18:21,653 - ADV - DEBUG - episode: 44299/50000, score: 0, e: 0.01, reward: 46.767, seudo-rew:0.0 max: 47.60029602050781 init-act: [28.505 48.254 28.447 48.063]
2025-07-18 00:18:29,929 - ADV - DEBUG - episode: 44399/50000, score: 0, e: 0.01, reward: 46.809, seudo-rew:0.0 max: 47.60163879394531 init-act: [28.432 48.435 28.229 48.148]
2025-07-18 00:18:38,208 - ADV - DEBUG - episode: 44499/50000, score: 0, e: 0.01, reward: 46.579, seudo-rew:0.0 max: 47.448585510253906 init-act: [29.353 48.324 28.723 48.294]
2025-07-18 00:18:46,439 - ADV - DEBUG - episode: 44599/50000, score: 0, e: 0.01, reward: 46.868, seudo-rew:0.0 max: 47.4535675048

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.001\gpt\model-46000.keras


2025-07-18 00:21:02,837 - ADV - DEBUG - episode: 46099/50000, score: 0, e: 0.01, reward: 46.744, seudo-rew:0.0 max: 47.7780647277832 init-act: [30.202 48.272 30.645 48.282]
2025-07-18 00:21:13,761 - ADV - DEBUG - episode: 46199/50000, score: 0, e: 0.01, reward: 46.993, seudo-rew:0.0 max: 47.2043571472168 init-act: [30.084 47.954 30.216 48.109]
2025-07-18 00:21:23,157 - ADV - DEBUG - episode: 46299/50000, score: 0, e: 0.01, reward: 47.026, seudo-rew:0.0 max: 47.08338928222656 init-act: [30.822 47.953 30.57  47.974]
2025-07-18 00:21:31,422 - ADV - DEBUG - episode: 46399/50000, score: 0, e: 0.01, reward: 47.199, seudo-rew:0.0 max: 46.98612976074219 init-act: [29.651 47.809 29.479 47.985]
2025-07-18 00:21:39,961 - ADV - DEBUG - episode: 46499/50000, score: 0, e: 0.01, reward: 46.95, seudo-rew:0.0 max: 46.76896286010742 init-act: [29.342 47.645 29.37  47.797]
2025-07-18 00:21:48,285 - ADV - DEBUG - episode: 46599/50000, score: 0, e: 0.01, reward: 46.757, seudo-rew:0.0 max: 46.88422393798828

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.001\gpt\model-48000.keras


2025-07-18 00:24:03,889 - ADV - DEBUG - episode: 48099/50000, score: 0, e: 0.01, reward: 46.851, seudo-rew:0.0 max: 47.443641662597656 init-act: [30.174 48.321 30.447 48.218]
2025-07-18 00:24:12,249 - ADV - DEBUG - episode: 48199/50000, score: 0, e: 0.01, reward: 47.342, seudo-rew:0.0 max: 47.01351547241211 init-act: [30.151 47.939 30.535 47.805]
2025-07-18 00:24:20,523 - ADV - DEBUG - episode: 48299/50000, score: 0, e: 0.01, reward: 47.477, seudo-rew:0.0 max: 47.15713882446289 init-act: [30.111 48.    30.491 47.921]
2025-07-18 00:24:28,904 - ADV - DEBUG - episode: 48399/50000, score: 0, e: 0.01, reward: 46.658, seudo-rew:0.0 max: 46.864288330078125 init-act: [30.691 47.797 31.479 47.75 ]
2025-07-18 00:24:37,576 - ADV - DEBUG - episode: 48499/50000, score: 0, e: 0.01, reward: 47.033, seudo-rew:0.0 max: 46.77005386352539 init-act: [30.284 47.23  30.912 47.207]
2025-07-18 00:24:46,379 - ADV - DEBUG - episode: 48599/50000, score: 0, e: 0.01, reward: 47.181, seudo-rew:0.0 max: 47.153232574

400000 0.001 0.1
📦 Training: buf=400000, lr=0.001, eps=0.1 → trained_model\test_adv_RL_400000eps0.1lr0.001\gpt


2025-07-18 00:26:44,605 - ADV - DEBUG - version control: (None, None)
2025-07-18 00:26:44,605 - ADV - DEBUG - Loading learner model from trained_model/RNN_learner_single/cells_5gpt/model-49900.weights_final.h5
2025-07-18 00:26:44,685 - ADV - DEBUG - Trained model saved to: trained_model\test_adv_RL_400000eps0.1lr0.001\gpt\model-0.keras


dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.001\gpt\model-0.keras


2025-07-18 00:26:50,259 - ADV - DEBUG - episode: 99/50000, score: 0, e: 0.1, reward: 21.519, seudo-rew:0.0 max: 1.304434895515442 init-act: [1.856 1.208 1.316 1.284]
2025-07-18 00:26:55,788 - ADV - DEBUG - episode: 199/50000, score: 0, e: 0.1, reward: 23.816, seudo-rew:0.0 max: 2.1403913497924805 init-act: [2.157 2.448 2.108 2.567]
2025-07-18 00:27:01,316 - ADV - DEBUG - episode: 299/50000, score: 0, e: 0.1, reward: 31.282, seudo-rew:0.0 max: 2.7783641815185547 init-act: [2.332 3.424 2.682 3.711]
2025-07-18 00:27:06,834 - ADV - DEBUG - episode: 399/50000, score: 0, e: 0.1, reward: 38.616, seudo-rew:0.0 max: 3.682082414627075 init-act: [2.608 4.258 2.887 4.473]
2025-07-18 00:27:15,070 - ADV - DEBUG - episode: 499/50000, score: 0, e: 0.1, reward: 39.839, seudo-rew:0.0 max: 4.729257583618164 init-act: [2.865 4.883 3.232 5.53 ]
2025-07-18 00:27:23,327 - ADV - DEBUG - episode: 599/50000, score: 0, e: 0.1, reward: 38.996, seudo-rew:0.0 max: 5.884475231170654 init-act: [3.46  5.788 3.686 6.58

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.001\gpt\model-2000.keras


2025-07-18 00:29:35,374 - ADV - DEBUG - episode: 2099/50000, score: 0, e: 0.1, reward: 40.529, seudo-rew:0.0 max: 21.593494415283203 init-act: [14.623 22.139 14.948 22.516]
2025-07-18 00:29:43,652 - ADV - DEBUG - episode: 2199/50000, score: 0, e: 0.1, reward: 40.475, seudo-rew:0.0 max: 22.43187713623047 init-act: [15.14  23.375 15.161 23.342]
2025-07-18 00:29:52,173 - ADV - DEBUG - episode: 2299/50000, score: 0, e: 0.1, reward: 40.582, seudo-rew:0.0 max: 23.270282745361328 init-act: [15.681 23.864 15.966 24.094]
2025-07-18 00:30:02,088 - ADV - DEBUG - episode: 2399/50000, score: 0, e: 0.1, reward: 39.591, seudo-rew:0.0 max: 24.03376007080078 init-act: [16.39  24.736 16.28  24.861]
2025-07-18 00:30:12,186 - ADV - DEBUG - episode: 2499/50000, score: 0, e: 0.1, reward: 40.477, seudo-rew:0.0 max: 24.771041870117188 init-act: [16.965 25.705 17.394 25.584]
2025-07-18 00:30:22,128 - ADV - DEBUG - episode: 2599/50000, score: 0, e: 0.1, reward: 40.309, seudo-rew:0.0 max: 25.5784854888916 init-a

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.001\gpt\model-4000.keras


2025-07-18 00:32:43,067 - ADV - DEBUG - episode: 4099/50000, score: 0, e: 0.1, reward: 40.811, seudo-rew:0.0 max: 35.5938720703125 init-act: [24.088 36.414 24.094 36.611]
2025-07-18 00:32:51,361 - ADV - DEBUG - episode: 4199/50000, score: 0, e: 0.1, reward: 40.842, seudo-rew:0.0 max: 35.94416046142578 init-act: [24.665 36.561 24.998 36.656]
2025-07-18 00:32:59,676 - ADV - DEBUG - episode: 4299/50000, score: 0, e: 0.1, reward: 40.509, seudo-rew:0.0 max: 36.19599914550781 init-act: [25.138 36.571 25.006 37.22 ]
2025-07-18 00:33:08,757 - ADV - DEBUG - episode: 4399/50000, score: 0, e: 0.1, reward: 41.624, seudo-rew:0.0 max: 36.41082763671875 init-act: [25.881 37.211 26.642 37.457]
2025-07-18 00:33:18,270 - ADV - DEBUG - episode: 4499/50000, score: 0, e: 0.1, reward: 41.955, seudo-rew:0.0 max: 36.749000549316406 init-act: [25.162 37.574 25.229 37.848]
2025-07-18 00:33:27,835 - ADV - DEBUG - episode: 4599/50000, score: 0, e: 0.1, reward: 41.52, seudo-rew:0.0 max: 37.02814483642578 init-act:

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.001\gpt\model-6000.keras


2025-07-18 00:35:57,328 - ADV - DEBUG - episode: 6099/50000, score: 0, e: 0.1, reward: 41.552, seudo-rew:0.0 max: 39.26860427856445 init-act: [26.215 40.102 25.65  40.117]
2025-07-18 00:36:06,588 - ADV - DEBUG - episode: 6199/50000, score: 0, e: 0.1, reward: 42.291, seudo-rew:0.0 max: 39.41578674316406 init-act: [26.242 40.211 26.027 40.355]
2025-07-18 00:36:14,858 - ADV - DEBUG - episode: 6299/50000, score: 0, e: 0.1, reward: 41.732, seudo-rew:0.0 max: 39.67102813720703 init-act: [26.955 40.698 27.149 40.815]
2025-07-18 00:36:23,151 - ADV - DEBUG - episode: 6399/50000, score: 0, e: 0.1, reward: 41.414, seudo-rew:0.0 max: 39.880645751953125 init-act: [26.453 40.713 27.803 40.866]
2025-07-18 00:36:31,481 - ADV - DEBUG - episode: 6499/50000, score: 0, e: 0.1, reward: 41.825, seudo-rew:0.0 max: 40.385921478271484 init-act: [26.465 41.172 26.449 41.208]
2025-07-18 00:36:41,217 - ADV - DEBUG - episode: 6599/50000, score: 0, e: 0.1, reward: 41.535, seudo-rew:0.0 max: 40.42936325073242 init-a

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.001\gpt\model-8000.keras


2025-07-18 00:39:03,898 - ADV - DEBUG - episode: 8099/50000, score: 0, e: 0.1, reward: 40.007, seudo-rew:0.0 max: 43.17095947265625 init-act: [28.453 44.005 28.738 44.218]
2025-07-18 00:39:12,104 - ADV - DEBUG - episode: 8199/50000, score: 0, e: 0.1, reward: 41.219, seudo-rew:0.0 max: 43.4190673828125 init-act: [28.367 43.805 28.122 44.332]
2025-07-18 00:39:20,345 - ADV - DEBUG - episode: 8299/50000, score: 0, e: 0.1, reward: 41.569, seudo-rew:0.0 max: 43.32816696166992 init-act: [28.17  44.348 27.773 44.511]
2025-07-18 00:39:28,612 - ADV - DEBUG - episode: 8399/50000, score: 0, e: 0.1, reward: 41.038, seudo-rew:0.0 max: 43.421504974365234 init-act: [28.805 44.403 28.031 44.173]
2025-07-18 00:39:36,941 - ADV - DEBUG - episode: 8499/50000, score: 0, e: 0.1, reward: 40.533, seudo-rew:0.0 max: 43.638893127441406 init-act: [29.618 44.788 29.718 44.716]
2025-07-18 00:39:45,169 - ADV - DEBUG - episode: 8599/50000, score: 0, e: 0.1, reward: 41.17, seudo-rew:0.0 max: 43.9819221496582 init-act:

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.001\gpt\model-10000.keras


2025-07-18 00:41:50,504 - ADV - DEBUG - episode: 10099/50000, score: 0, e: 0.1, reward: 41.468, seudo-rew:0.0 max: 45.39875793457031 init-act: [29.629 46.452 29.607 46.563]
2025-07-18 00:41:58,831 - ADV - DEBUG - episode: 10199/50000, score: 0, e: 0.1, reward: 42.602, seudo-rew:0.0 max: 45.414161682128906 init-act: [29.022 46.418 28.588 46.37 ]
2025-07-18 00:42:07,042 - ADV - DEBUG - episode: 10299/50000, score: 0, e: 0.1, reward: 42.112, seudo-rew:0.0 max: 45.395999908447266 init-act: [28.863 46.218 28.535 46.469]
2025-07-18 00:42:15,699 - ADV - DEBUG - episode: 10399/50000, score: 0, e: 0.1, reward: 41.771, seudo-rew:0.0 max: 45.40648651123047 init-act: [28.369 46.203 28.11  46.516]
2025-07-18 00:42:23,969 - ADV - DEBUG - episode: 10499/50000, score: 0, e: 0.1, reward: 41.849, seudo-rew:0.0 max: 45.603538513183594 init-act: [28.23  46.007 27.977 46.18 ]
2025-07-18 00:42:32,782 - ADV - DEBUG - episode: 10599/50000, score: 0, e: 0.1, reward: 42.178, seudo-rew:0.0 max: 45.39808654785156

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.001\gpt\model-12000.keras


2025-07-18 00:44:37,880 - ADV - DEBUG - episode: 12099/50000, score: 0, e: 0.1, reward: 43.318, seudo-rew:0.0 max: 46.09503936767578 init-act: [28.457 46.496 28.804 46.641]
2025-07-18 00:44:46,172 - ADV - DEBUG - episode: 12199/50000, score: 0, e: 0.1, reward: 42.244, seudo-rew:0.0 max: 46.188411712646484 init-act: [28.124 46.833 28.123 47.294]
2025-07-18 00:44:54,485 - ADV - DEBUG - episode: 12299/50000, score: 0, e: 0.1, reward: 43.076, seudo-rew:0.0 max: 46.136695861816406 init-act: [28.305 46.716 27.961 46.673]
2025-07-18 00:45:02,809 - ADV - DEBUG - episode: 12399/50000, score: 0, e: 0.1, reward: 42.689, seudo-rew:0.0 max: 46.152801513671875 init-act: [28.315 47.026 28.081 46.703]
2025-07-18 00:45:10,995 - ADV - DEBUG - episode: 12499/50000, score: 0, e: 0.1, reward: 41.246, seudo-rew:0.0 max: 46.0600700378418 init-act: [28.067 46.742 27.75  47.211]
2025-07-18 00:45:19,338 - ADV - DEBUG - episode: 12599/50000, score: 0, e: 0.1, reward: 42.604, seudo-rew:0.0 max: 46.11576461791992 

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.001\gpt\model-14000.keras


2025-07-18 00:47:25,384 - ADV - DEBUG - episode: 14099/50000, score: 0, e: 0.1, reward: 41.847, seudo-rew:0.0 max: 46.518280029296875 init-act: [29.059 47.434 28.727 47.444]
2025-07-18 00:47:33,738 - ADV - DEBUG - episode: 14199/50000, score: 0, e: 0.1, reward: 42.204, seudo-rew:0.0 max: 46.780757904052734 init-act: [29.663 47.305 30.108 47.546]
2025-07-18 00:47:42,010 - ADV - DEBUG - episode: 14299/50000, score: 0, e: 0.1, reward: 42.654, seudo-rew:0.0 max: 45.85649871826172 init-act: [29.842 47.932 30.695 47.928]
2025-07-18 00:47:50,225 - ADV - DEBUG - episode: 14399/50000, score: 0, e: 0.1, reward: 42.518, seudo-rew:0.0 max: 47.003883361816406 init-act: [30.152 48.153 30.361 48.023]
2025-07-18 00:47:58,546 - ADV - DEBUG - episode: 14499/50000, score: 0, e: 0.1, reward: 42.11, seudo-rew:0.0 max: 47.070491790771484 init-act: [29.125 47.848 29.242 48.124]
2025-07-18 00:48:06,853 - ADV - DEBUG - episode: 14599/50000, score: 0, e: 0.1, reward: 42.899, seudo-rew:0.0 max: 47.22165298461914

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.001\gpt\model-16000.keras


2025-07-18 00:50:10,957 - ADV - DEBUG - episode: 16099/50000, score: 0, e: 0.1, reward: 42.368, seudo-rew:0.0 max: 46.891510009765625 init-act: [30.59  47.528 32.221 47.899]
2025-07-18 00:50:19,298 - ADV - DEBUG - episode: 16199/50000, score: 0, e: 0.1, reward: 42.849, seudo-rew:0.0 max: 46.96871566772461 init-act: [29.618 47.944 30.836 48.   ]
2025-07-18 00:50:27,619 - ADV - DEBUG - episode: 16299/50000, score: 0, e: 0.1, reward: 43.305, seudo-rew:0.0 max: 47.30378723144531 init-act: [29.479 48.068 30.759 48.11 ]
2025-07-18 00:50:35,933 - ADV - DEBUG - episode: 16399/50000, score: 0, e: 0.1, reward: 42.573, seudo-rew:0.0 max: 47.04195022583008 init-act: [29.463 48.107 30.094 48.009]
2025-07-18 00:50:44,217 - ADV - DEBUG - episode: 16499/50000, score: 0, e: 0.1, reward: 42.843, seudo-rew:0.0 max: 47.4419059753418 init-act: [29.228 47.725 29.416 48.51 ]
2025-07-18 00:50:52,718 - ADV - DEBUG - episode: 16599/50000, score: 0, e: 0.1, reward: 42.128, seudo-rew:0.0 max: 47.66573715209961 in

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.001\gpt\model-18000.keras


2025-07-18 00:52:57,920 - ADV - DEBUG - episode: 18099/50000, score: 0, e: 0.1, reward: 43.843, seudo-rew:0.0 max: 47.562889099121094 init-act: [29.811 48.558 29.959 48.223]
2025-07-18 00:53:06,205 - ADV - DEBUG - episode: 18199/50000, score: 0, e: 0.1, reward: 43.514, seudo-rew:0.0 max: 47.450286865234375 init-act: [29.707 48.316 29.799 48.239]
2025-07-18 00:53:14,482 - ADV - DEBUG - episode: 18299/50000, score: 0, e: 0.1, reward: 43.212, seudo-rew:0.0 max: 47.420284271240234 init-act: [30.342 47.839 29.811 47.871]
2025-07-18 00:53:22,784 - ADV - DEBUG - episode: 18399/50000, score: 0, e: 0.1, reward: 43.194, seudo-rew:0.0 max: 47.67688751220703 init-act: [30.44  48.69  30.008 48.59 ]
2025-07-18 00:53:31,078 - ADV - DEBUG - episode: 18499/50000, score: 0, e: 0.1, reward: 43.576, seudo-rew:0.0 max: 47.58066177368164 init-act: [30.012 48.206 29.502 48.22 ]
2025-07-18 00:53:39,334 - ADV - DEBUG - episode: 18599/50000, score: 0, e: 0.1, reward: 43.255, seudo-rew:0.0 max: 47.36989593505859

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.001\gpt\model-20000.keras


2025-07-18 00:55:44,263 - ADV - DEBUG - episode: 20099/50000, score: 0, e: 0.1, reward: 42.841, seudo-rew:0.0 max: 47.82164001464844 init-act: [29.93  48.43  30.51  48.774]
2025-07-18 00:55:52,778 - ADV - DEBUG - episode: 20199/50000, score: 0, e: 0.1, reward: 43.697, seudo-rew:0.0 max: 48.024051666259766 init-act: [30.182 49.047 30.641 49.004]
2025-07-18 00:56:01,092 - ADV - DEBUG - episode: 20299/50000, score: 0, e: 0.1, reward: 43.913, seudo-rew:0.0 max: 47.80173110961914 init-act: [29.965 48.585 30.021 48.664]
2025-07-18 00:56:10,093 - ADV - DEBUG - episode: 20399/50000, score: 0, e: 0.1, reward: 43.498, seudo-rew:0.0 max: 47.88347625732422 init-act: [29.91  48.65  29.566 48.439]
2025-07-18 00:56:18,426 - ADV - DEBUG - episode: 20499/50000, score: 0, e: 0.1, reward: 42.974, seudo-rew:0.0 max: 48.12740707397461 init-act: [31.021 48.782 31.174 48.637]
2025-07-18 00:56:26,683 - ADV - DEBUG - episode: 20599/50000, score: 0, e: 0.1, reward: 43.075, seudo-rew:0.0 max: 48.13286590576172 i

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.001\gpt\model-22000.keras


2025-07-18 00:58:31,434 - ADV - DEBUG - episode: 22099/50000, score: 0, e: 0.1, reward: 43.832, seudo-rew:0.0 max: 47.809234619140625 init-act: [31.264 48.447 30.959 48.599]
2025-07-18 00:58:39,801 - ADV - DEBUG - episode: 22199/50000, score: 0, e: 0.1, reward: 44.239, seudo-rew:0.0 max: 47.495967864990234 init-act: [30.769 48.09  30.462 48.413]
2025-07-18 00:58:48,065 - ADV - DEBUG - episode: 22299/50000, score: 0, e: 0.1, reward: 43.085, seudo-rew:0.0 max: 47.54756164550781 init-act: [32.284 48.105 32.133 47.927]
2025-07-18 00:58:56,361 - ADV - DEBUG - episode: 22399/50000, score: 0, e: 0.1, reward: 43.682, seudo-rew:0.0 max: 47.781982421875 init-act: [32.11  48.668 31.396 48.691]
2025-07-18 00:59:04,697 - ADV - DEBUG - episode: 22499/50000, score: 0, e: 0.1, reward: 43.341, seudo-rew:0.0 max: 47.86579895019531 init-act: [31.496 48.382 30.894 48.543]
2025-07-18 00:59:12,905 - ADV - DEBUG - episode: 22599/50000, score: 0, e: 0.1, reward: 43.509, seudo-rew:0.0 max: 47.73822021484375 in

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.001\gpt\model-24000.keras


2025-07-18 01:01:17,844 - ADV - DEBUG - episode: 24099/50000, score: 0, e: 0.1, reward: 43.324, seudo-rew:0.0 max: 47.889366149902344 init-act: [31.197 48.305 31.342 48.406]
2025-07-18 01:01:26,022 - ADV - DEBUG - episode: 24199/50000, score: 0, e: 0.1, reward: 43.469, seudo-rew:0.0 max: 47.475894927978516 init-act: [31.17  48.004 31.311 48.249]
2025-07-18 01:01:34,338 - ADV - DEBUG - episode: 24299/50000, score: 0, e: 0.1, reward: 43.479, seudo-rew:0.0 max: 47.75150680541992 init-act: [31.37  48.124 31.133 48.442]
2025-07-18 01:01:42,616 - ADV - DEBUG - episode: 24399/50000, score: 0, e: 0.1, reward: 43.952, seudo-rew:0.0 max: 47.415672302246094 init-act: [31.76  48.411 31.334 48.457]
2025-07-18 01:01:50,976 - ADV - DEBUG - episode: 24499/50000, score: 0, e: 0.1, reward: 43.953, seudo-rew:0.0 max: 47.50833511352539 init-act: [31.971 48.59  31.494 48.479]
2025-07-18 01:01:59,238 - ADV - DEBUG - episode: 24599/50000, score: 0, e: 0.1, reward: 43.756, seudo-rew:0.0 max: 47.71800994873047

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.001\gpt\model-26000.keras


2025-07-18 01:04:04,337 - ADV - DEBUG - episode: 26099/50000, score: 0, e: 0.1, reward: 43.441, seudo-rew:0.0 max: 48.70830154418945 init-act: [32.008 49.535 31.596 49.7  ]
2025-07-18 01:04:12,644 - ADV - DEBUG - episode: 26199/50000, score: 0, e: 0.1, reward: 43.783, seudo-rew:0.0 max: 48.75590896606445 init-act: [32.579 49.413 31.848 49.411]
2025-07-18 01:04:20,875 - ADV - DEBUG - episode: 26299/50000, score: 0, e: 0.1, reward: 43.563, seudo-rew:0.0 max: 48.716026306152344 init-act: [32.328 49.53  32.101 49.582]
2025-07-18 01:04:29,101 - ADV - DEBUG - episode: 26399/50000, score: 0, e: 0.1, reward: 43.431, seudo-rew:0.0 max: 48.913368225097656 init-act: [32.476 49.329 31.943 49.517]
2025-07-18 01:04:37,410 - ADV - DEBUG - episode: 26499/50000, score: 0, e: 0.1, reward: 43.765, seudo-rew:0.0 max: 48.77786636352539 init-act: [32.465 49.746 32.    49.656]
2025-07-18 01:04:45,709 - ADV - DEBUG - episode: 26599/50000, score: 0, e: 0.1, reward: 44.422, seudo-rew:0.0 max: 48.94567108154297 

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.001\gpt\model-28000.keras


2025-07-18 01:06:50,267 - ADV - DEBUG - episode: 28099/50000, score: 0, e: 0.1, reward: 44.878, seudo-rew:0.0 max: 48.16328048706055 init-act: [32.76  48.957 32.252 48.773]
2025-07-18 01:06:58,492 - ADV - DEBUG - episode: 28199/50000, score: 0, e: 0.1, reward: 44.322, seudo-rew:0.0 max: 47.995052337646484 init-act: [32.615 48.554 32.36  48.574]
2025-07-18 01:07:06,774 - ADV - DEBUG - episode: 28299/50000, score: 0, e: 0.1, reward: 44.146, seudo-rew:0.0 max: 47.747291564941406 init-act: [32.604 48.445 31.99  48.349]
2025-07-18 01:07:15,029 - ADV - DEBUG - episode: 28399/50000, score: 0, e: 0.1, reward: 44.229, seudo-rew:0.0 max: 47.66698455810547 init-act: [33.992 48.182 33.386 48.426]
2025-07-18 01:07:23,261 - ADV - DEBUG - episode: 28499/50000, score: 0, e: 0.1, reward: 44.365, seudo-rew:0.0 max: 47.35114669799805 init-act: [32.967 48.473 32.112 48.452]
2025-07-18 01:07:31,464 - ADV - DEBUG - episode: 28599/50000, score: 0, e: 0.1, reward: 43.848, seudo-rew:0.0 max: 47.69719696044922 

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.001\gpt\model-30000.keras


2025-07-18 01:09:36,162 - ADV - DEBUG - episode: 30099/50000, score: 0, e: 0.1, reward: 44.07, seudo-rew:0.0 max: 47.74254608154297 init-act: [32.25  48.138 32.166 47.9  ]
2025-07-18 01:09:44,420 - ADV - DEBUG - episode: 30199/50000, score: 0, e: 0.1, reward: 44.822, seudo-rew:0.0 max: 47.69328689575195 init-act: [32.621 48.458 32.91  48.794]
2025-07-18 01:09:52,695 - ADV - DEBUG - episode: 30299/50000, score: 0, e: 0.1, reward: 44.699, seudo-rew:0.0 max: 47.929237365722656 init-act: [32.553 48.491 32.355 48.61 ]
2025-07-18 01:10:00,836 - ADV - DEBUG - episode: 30399/50000, score: 0, e: 0.1, reward: 44.197, seudo-rew:0.0 max: 48.122257232666016 init-act: [32.568 48.63  32.313 48.684]
2025-07-18 01:10:09,111 - ADV - DEBUG - episode: 30499/50000, score: 0, e: 0.1, reward: 43.895, seudo-rew:0.0 max: 48.066795349121094 init-act: [33.125 49.078 33.19  49.132]
2025-07-18 01:10:17,558 - ADV - DEBUG - episode: 30599/50000, score: 0, e: 0.1, reward: 44.126, seudo-rew:0.0 max: 48.15467834472656 

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.001\gpt\model-32000.keras


2025-07-18 01:12:23,005 - ADV - DEBUG - episode: 32099/50000, score: 0, e: 0.1, reward: 44.601, seudo-rew:0.0 max: 49.20270919799805 init-act: [33.074 50.184 32.895 49.77 ]
2025-07-18 01:12:31,426 - ADV - DEBUG - episode: 32199/50000, score: 0, e: 0.1, reward: 43.831, seudo-rew:0.0 max: 49.21867752075195 init-act: [32.873 50.222 32.39  50.228]
2025-07-18 01:12:39,793 - ADV - DEBUG - episode: 32299/50000, score: 0, e: 0.1, reward: 44.533, seudo-rew:0.0 max: 49.21281814575195 init-act: [32.602 49.601 32.26  49.659]
2025-07-18 01:12:48,068 - ADV - DEBUG - episode: 32399/50000, score: 0, e: 0.1, reward: 44.743, seudo-rew:0.0 max: 49.31907272338867 init-act: [32.883 49.816 32.363 49.948]
2025-07-18 01:12:56,331 - ADV - DEBUG - episode: 32499/50000, score: 0, e: 0.1, reward: 44.856, seudo-rew:0.0 max: 49.283138275146484 init-act: [32.54  50.144 31.985 50.145]
2025-07-18 01:13:04,626 - ADV - DEBUG - episode: 32599/50000, score: 0, e: 0.1, reward: 44.773, seudo-rew:0.0 max: 49.40458679199219 i

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.001\gpt\model-34000.keras


2025-07-18 01:15:08,985 - ADV - DEBUG - episode: 34099/50000, score: 0, e: 0.1, reward: 43.9, seudo-rew:0.0 max: 49.11830520629883 init-act: [32.316 49.884 32.457 49.725]
2025-07-18 01:15:17,259 - ADV - DEBUG - episode: 34199/50000, score: 0, e: 0.1, reward: 43.721, seudo-rew:0.0 max: 48.89077377319336 init-act: [32.097 49.543 31.849 49.466]
2025-07-18 01:15:25,537 - ADV - DEBUG - episode: 34299/50000, score: 0, e: 0.1, reward: 44.215, seudo-rew:0.0 max: 49.041282653808594 init-act: [32.388 49.719 31.906 49.513]
2025-07-18 01:15:33,868 - ADV - DEBUG - episode: 34399/50000, score: 0, e: 0.1, reward: 44.358, seudo-rew:0.0 max: 49.09650421142578 init-act: [32.402 50.073 32.069 50.082]
2025-07-18 01:15:42,075 - ADV - DEBUG - episode: 34499/50000, score: 0, e: 0.1, reward: 45.048, seudo-rew:0.0 max: 49.127742767333984 init-act: [32.729 50.039 32.14  49.94 ]
2025-07-18 01:15:50,466 - ADV - DEBUG - episode: 34599/50000, score: 0, e: 0.1, reward: 44.722, seudo-rew:0.0 max: 48.97667694091797 in

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.001\gpt\model-36000.keras


2025-07-18 01:17:55,172 - ADV - DEBUG - episode: 36099/50000, score: 0, e: 0.1, reward: 43.651, seudo-rew:0.0 max: 48.31560134887695 init-act: [32.738 49.074 32.873 49.172]
2025-07-18 01:18:03,483 - ADV - DEBUG - episode: 36199/50000, score: 0, e: 0.1, reward: 44.775, seudo-rew:0.0 max: 48.39134979248047 init-act: [32.603 49.117 32.444 49.34 ]
2025-07-18 01:18:11,858 - ADV - DEBUG - episode: 36299/50000, score: 0, e: 0.1, reward: 44.122, seudo-rew:0.0 max: 48.545867919921875 init-act: [32.901 49.477 32.983 49.44 ]
2025-07-18 01:18:20,401 - ADV - DEBUG - episode: 36399/50000, score: 0, e: 0.1, reward: 44.447, seudo-rew:0.0 max: 48.54888916015625 init-act: [32.758 49.117 32.769 49.582]
2025-07-18 01:18:28,657 - ADV - DEBUG - episode: 36499/50000, score: 0, e: 0.1, reward: 44.031, seudo-rew:0.0 max: 48.46989440917969 init-act: [33.23  49.177 32.76  49.34 ]
2025-07-18 01:18:36,982 - ADV - DEBUG - episode: 36599/50000, score: 0, e: 0.1, reward: 44.183, seudo-rew:0.0 max: 48.74138259887695 i

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.001\gpt\model-38000.keras


2025-07-18 01:20:41,503 - ADV - DEBUG - episode: 38099/50000, score: 0, e: 0.1, reward: 44.135, seudo-rew:0.0 max: 48.86267852783203 init-act: [32.242 49.372 33.246 49.312]
2025-07-18 01:20:49,906 - ADV - DEBUG - episode: 38199/50000, score: 0, e: 0.1, reward: 43.982, seudo-rew:0.0 max: 48.942298889160156 init-act: [32.114 49.96  32.398 49.613]
2025-07-18 01:20:58,255 - ADV - DEBUG - episode: 38299/50000, score: 0, e: 0.1, reward: 44.92, seudo-rew:0.0 max: 48.866851806640625 init-act: [32.541 49.288 32.781 49.241]
2025-07-18 01:21:06,491 - ADV - DEBUG - episode: 38399/50000, score: 0, e: 0.1, reward: 44.394, seudo-rew:0.0 max: 48.753379821777344 init-act: [32.105 49.695 32.601 49.525]
2025-07-18 01:21:14,749 - ADV - DEBUG - episode: 38499/50000, score: 0, e: 0.1, reward: 44.794, seudo-rew:0.0 max: 48.5941276550293 init-act: [32.572 49.207 32.765 49.181]
2025-07-18 01:21:22,989 - ADV - DEBUG - episode: 38599/50000, score: 0, e: 0.1, reward: 44.354, seudo-rew:0.0 max: 48.71917724609375 i

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.001\gpt\model-40000.keras


2025-07-18 01:23:27,573 - ADV - DEBUG - episode: 40099/50000, score: 0, e: 0.1, reward: 44.23, seudo-rew:0.0 max: 48.232147216796875 init-act: [32.487 49.117 32.021 49.007]
2025-07-18 01:23:35,949 - ADV - DEBUG - episode: 40199/50000, score: 0, e: 0.1, reward: 43.604, seudo-rew:0.0 max: 48.2078857421875 init-act: [32.541 48.967 32.402 48.742]
2025-07-18 01:23:44,209 - ADV - DEBUG - episode: 40299/50000, score: 0, e: 0.1, reward: 44.394, seudo-rew:0.0 max: 48.56283950805664 init-act: [32.637 49.026 32.988 48.921]
2025-07-18 01:23:52,659 - ADV - DEBUG - episode: 40399/50000, score: 0, e: 0.1, reward: 43.944, seudo-rew:0.0 max: 48.36416244506836 init-act: [32.167 48.981 32.274 49.074]
2025-07-18 01:24:00,844 - ADV - DEBUG - episode: 40499/50000, score: 0, e: 0.1, reward: 43.946, seudo-rew:0.0 max: 48.359474182128906 init-act: [32.625 49.007 33.078 48.841]
2025-07-18 01:24:09,160 - ADV - DEBUG - episode: 40599/50000, score: 0, e: 0.1, reward: 44.603, seudo-rew:0.0 max: 48.423439025878906 i

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.001\gpt\model-42000.keras


2025-07-18 01:26:15,220 - ADV - DEBUG - episode: 42099/50000, score: 0, e: 0.1, reward: 43.9, seudo-rew:0.0 max: 48.63029098510742 init-act: [33.414 49.582 32.994 49.309]
2025-07-18 01:26:23,508 - ADV - DEBUG - episode: 42199/50000, score: 0, e: 0.1, reward: 44.424, seudo-rew:0.0 max: 48.52729415893555 init-act: [33.151 49.403 32.607 49.015]
2025-07-18 01:26:31,764 - ADV - DEBUG - episode: 42299/50000, score: 0, e: 0.1, reward: 44.368, seudo-rew:0.0 max: 48.55446243286133 init-act: [33.487 49.54  33.171 49.5  ]
2025-07-18 01:26:40,095 - ADV - DEBUG - episode: 42399/50000, score: 0, e: 0.1, reward: 44.82, seudo-rew:0.0 max: 48.783546447753906 init-act: [33.569 49.626 32.932 49.603]
2025-07-18 01:26:48,403 - ADV - DEBUG - episode: 42499/50000, score: 0, e: 0.1, reward: 45.158, seudo-rew:0.0 max: 48.674861907958984 init-act: [33.146 49.38  32.491 49.219]
2025-07-18 01:26:56,620 - ADV - DEBUG - episode: 42599/50000, score: 0, e: 0.1, reward: 44.519, seudo-rew:0.0 max: 48.76781463623047 ini

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.001\gpt\model-44000.keras


2025-07-18 01:29:01,153 - ADV - DEBUG - episode: 44099/50000, score: 0, e: 0.1, reward: 44.948, seudo-rew:0.0 max: 48.37458038330078 init-act: [33.096 49.309 32.462 48.885]
2025-07-18 01:29:09,451 - ADV - DEBUG - episode: 44199/50000, score: 0, e: 0.1, reward: 44.982, seudo-rew:0.0 max: 48.51130294799805 init-act: [33.053 49.505 32.64  49.352]
2025-07-18 01:29:17,699 - ADV - DEBUG - episode: 44299/50000, score: 0, e: 0.1, reward: 44.342, seudo-rew:0.0 max: 48.43008041381836 init-act: [33.766 49.207 33.558 49.399]
2025-07-18 01:29:25,886 - ADV - DEBUG - episode: 44399/50000, score: 0, e: 0.1, reward: 43.746, seudo-rew:0.0 max: 48.49624252319336 init-act: [32.887 49.288 32.576 48.975]
2025-07-18 01:29:34,206 - ADV - DEBUG - episode: 44499/50000, score: 0, e: 0.1, reward: 44.395, seudo-rew:0.0 max: 48.24698257446289 init-act: [33.062 49.07  32.547 48.876]
2025-07-18 01:29:42,520 - ADV - DEBUG - episode: 44599/50000, score: 0, e: 0.1, reward: 44.382, seudo-rew:0.0 max: 48.415252685546875 i

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.001\gpt\model-46000.keras


2025-07-18 01:31:47,171 - ADV - DEBUG - episode: 46099/50000, score: 0, e: 0.1, reward: 43.739, seudo-rew:0.0 max: 48.9216194152832 init-act: [33.293 49.861 33.277 49.711]
2025-07-18 01:31:55,458 - ADV - DEBUG - episode: 46199/50000, score: 0, e: 0.1, reward: 43.496, seudo-rew:0.0 max: 48.885414123535156 init-act: [33.011 49.664 33.877 49.907]
2025-07-18 01:32:03,698 - ADV - DEBUG - episode: 46299/50000, score: 0, e: 0.1, reward: 43.865, seudo-rew:0.0 max: 48.94122314453125 init-act: [33.255 49.504 33.493 49.574]
2025-07-18 01:32:11,995 - ADV - DEBUG - episode: 46399/50000, score: 0, e: 0.1, reward: 44.427, seudo-rew:0.0 max: 48.782501220703125 init-act: [32.578 49.395 33.126 49.285]
2025-07-18 01:32:20,309 - ADV - DEBUG - episode: 46499/50000, score: 0, e: 0.1, reward: 44.805, seudo-rew:0.0 max: 48.7367057800293 init-act: [32.152 49.453 32.888 49.509]
2025-07-18 01:32:28,552 - ADV - DEBUG - episode: 46599/50000, score: 0, e: 0.1, reward: 44.699, seudo-rew:0.0 max: 48.754554748535156 i

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.001\gpt\model-48000.keras


2025-07-18 01:34:33,132 - ADV - DEBUG - episode: 48099/50000, score: 0, e: 0.1, reward: 44.522, seudo-rew:0.0 max: 47.5499382019043 init-act: [32.229 48.04  32.239 48.18 ]
2025-07-18 01:34:41,467 - ADV - DEBUG - episode: 48199/50000, score: 0, e: 0.1, reward: 44.027, seudo-rew:0.0 max: 47.55604934692383 init-act: [32.084 48.297 31.96  48.279]
2025-07-18 01:34:49,741 - ADV - DEBUG - episode: 48299/50000, score: 0, e: 0.1, reward: 44.666, seudo-rew:0.0 max: 47.552223205566406 init-act: [31.979 48.099 31.948 48.071]
2025-07-18 01:34:58,006 - ADV - DEBUG - episode: 48399/50000, score: 0, e: 0.1, reward: 45.044, seudo-rew:0.0 max: 47.570884704589844 init-act: [32.057 48.378 31.858 48.361]
2025-07-18 01:35:06,285 - ADV - DEBUG - episode: 48499/50000, score: 0, e: 0.1, reward: 44.594, seudo-rew:0.0 max: 47.55036926269531 init-act: [32.321 48.262 32.322 48.217]
2025-07-18 01:35:14,627 - ADV - DEBUG - episode: 48599/50000, score: 0, e: 0.1, reward: 43.772, seudo-rew:0.0 max: 47.755043029785156 

400000 0.001 0.2
📦 Training: buf=400000, lr=0.001, eps=0.2 → trained_model\test_adv_RL_400000eps0.2lr0.001\gpt


2025-07-18 01:37:11,243 - ADV - DEBUG - Trained model saved to: trained_model\test_adv_RL_400000eps0.2lr0.001\gpt\model-0.keras


dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.001\gpt\model-0.keras


2025-07-18 01:37:16,794 - ADV - DEBUG - episode: 99/50000, score: 0, e: 0.2, reward: 20.753, seudo-rew:0.0 max: 2.0338478088378906 init-act: [2.02  0.891 0.961 0.997]
2025-07-18 01:37:22,348 - ADV - DEBUG - episode: 199/50000, score: 0, e: 0.2, reward: 21.845, seudo-rew:0.0 max: 2.4443678855895996 init-act: [2.31  2.43  2.055 2.256]
2025-07-18 01:37:27,929 - ADV - DEBUG - episode: 299/50000, score: 0, e: 0.2, reward: 35.042, seudo-rew:0.0 max: 3.2732691764831543 init-act: [2.472 4.165 2.388 3.641]
2025-07-18 01:37:33,483 - ADV - DEBUG - episode: 399/50000, score: 0, e: 0.2, reward: 36.519, seudo-rew:0.0 max: 4.258643627166748 init-act: [2.64  5.148 2.84  4.583]
2025-07-18 01:37:41,799 - ADV - DEBUG - episode: 499/50000, score: 0, e: 0.2, reward: 36.848, seudo-rew:0.0 max: 5.340853691101074 init-act: [2.908 6.101 3.266 5.513]
2025-07-18 01:37:50,140 - ADV - DEBUG - episode: 599/50000, score: 0, e: 0.2, reward: 36.503, seudo-rew:0.0 max: 6.376832962036133 init-act: [3.407 7.251 3.614 6.6

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.001\gpt\model-2000.keras


2025-07-18 01:39:55,005 - ADV - DEBUG - episode: 2099/50000, score: 0, e: 0.2, reward: 38.198, seudo-rew:0.0 max: 21.434375762939453 init-act: [14.206 22.035 14.46  22.254]
2025-07-18 01:40:03,530 - ADV - DEBUG - episode: 2199/50000, score: 0, e: 0.2, reward: 38.437, seudo-rew:0.0 max: 22.20349884033203 init-act: [15.14  22.965 15.261 23.089]
2025-07-18 01:40:11,847 - ADV - DEBUG - episode: 2299/50000, score: 0, e: 0.2, reward: 38.445, seudo-rew:0.0 max: 23.15904426574707 init-act: [15.539 23.631 15.83  24.142]
2025-07-18 01:40:20,267 - ADV - DEBUG - episode: 2399/50000, score: 0, e: 0.2, reward: 37.979, seudo-rew:0.0 max: 24.099952697753906 init-act: [16.223 24.96  16.447 25.132]
2025-07-18 01:40:28,989 - ADV - DEBUG - episode: 2499/50000, score: 0, e: 0.2, reward: 38.28, seudo-rew:0.0 max: 24.964557647705078 init-act: [16.735 25.92  17.143 25.967]
2025-07-18 01:40:37,448 - ADV - DEBUG - episode: 2599/50000, score: 0, e: 0.2, reward: 38.114, seudo-rew:0.0 max: 25.86960792541504 init-a

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.001\gpt\model-4000.keras


2025-07-18 01:42:42,931 - ADV - DEBUG - episode: 4099/50000, score: 0, e: 0.2, reward: 37.659, seudo-rew:0.0 max: 37.54983139038086 init-act: [26.402 38.621 26.361 38.575]
2025-07-18 01:42:51,245 - ADV - DEBUG - episode: 4199/50000, score: 0, e: 0.2, reward: 39.524, seudo-rew:0.0 max: 38.00273513793945 init-act: [26.283 38.162 26.166 38.432]
2025-07-18 01:42:59,516 - ADV - DEBUG - episode: 4299/50000, score: 0, e: 0.2, reward: 39.607, seudo-rew:0.0 max: 38.484745025634766 init-act: [26.366 38.869 26.209 39.148]
2025-07-18 01:43:07,855 - ADV - DEBUG - episode: 4399/50000, score: 0, e: 0.2, reward: 40.009, seudo-rew:0.0 max: 38.60826110839844 init-act: [26.762 39.629 26.373 39.611]
2025-07-18 01:43:16,226 - ADV - DEBUG - episode: 4499/50000, score: 0, e: 0.2, reward: 39.503, seudo-rew:0.0 max: 39.16318130493164 init-act: [26.337 39.546 26.272 39.862]
2025-07-18 01:43:24,535 - ADV - DEBUG - episode: 4599/50000, score: 0, e: 0.2, reward: 39.945, seudo-rew:0.0 max: 39.443077087402344 init-a

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.001\gpt\model-6000.keras


2025-07-18 01:45:29,358 - ADV - DEBUG - episode: 6099/50000, score: 0, e: 0.2, reward: 39.723, seudo-rew:0.0 max: 42.002098083496094 init-act: [28.172 42.406 28.367 42.789]
2025-07-18 01:45:37,647 - ADV - DEBUG - episode: 6199/50000, score: 0, e: 0.2, reward: 39.893, seudo-rew:0.0 max: 41.92115783691406 init-act: [28.182 43.207 27.945 43.114]
2025-07-18 01:45:45,959 - ADV - DEBUG - episode: 6299/50000, score: 0, e: 0.2, reward: 38.816, seudo-rew:0.0 max: 42.198387145996094 init-act: [28.108 42.698 28.463 42.801]
2025-07-18 01:45:54,403 - ADV - DEBUG - episode: 6399/50000, score: 0, e: 0.2, reward: 39.467, seudo-rew:0.0 max: 42.200199127197266 init-act: [28.611 43.592 28.588 43.711]
2025-07-18 01:46:02,737 - ADV - DEBUG - episode: 6499/50000, score: 0, e: 0.2, reward: 38.129, seudo-rew:0.0 max: 42.747703552246094 init-act: [28.1   43.621 28.16  43.531]
2025-07-18 01:46:11,003 - ADV - DEBUG - episode: 6599/50000, score: 0, e: 0.2, reward: 38.625, seudo-rew:0.0 max: 42.99064254760742 init

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.001\gpt\model-8000.keras


2025-07-18 01:48:16,131 - ADV - DEBUG - episode: 8099/50000, score: 0, e: 0.2, reward: 38.978, seudo-rew:0.0 max: 45.2502326965332 init-act: [29.322 45.603 29.736 45.646]
2025-07-18 01:48:24,454 - ADV - DEBUG - episode: 8199/50000, score: 0, e: 0.2, reward: 40.279, seudo-rew:0.0 max: 45.24598693847656 init-act: [29.988 46.246 30.516 46.414]
2025-07-18 01:48:32,773 - ADV - DEBUG - episode: 8299/50000, score: 0, e: 0.2, reward: 40.165, seudo-rew:0.0 max: 45.473114013671875 init-act: [30.247 46.289 30.017 46.262]
2025-07-18 01:48:41,038 - ADV - DEBUG - episode: 8399/50000, score: 0, e: 0.2, reward: 38.787, seudo-rew:0.0 max: 45.3337516784668 init-act: [30.045 46.247 30.013 46.343]
2025-07-18 01:48:49,376 - ADV - DEBUG - episode: 8499/50000, score: 0, e: 0.2, reward: 38.626, seudo-rew:0.0 max: 45.34719467163086 init-act: [31.357 46.36  30.277 46.214]
2025-07-18 01:48:57,800 - ADV - DEBUG - episode: 8599/50000, score: 0, e: 0.2, reward: 39.962, seudo-rew:0.0 max: 45.726524353027344 init-act

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.001\gpt\model-10000.keras


2025-07-18 01:51:03,548 - ADV - DEBUG - episode: 10099/50000, score: 0, e: 0.2, reward: 40.61, seudo-rew:0.0 max: 47.34130859375 init-act: [31.605 48.297 31.562 48.3  ]
2025-07-18 01:51:11,852 - ADV - DEBUG - episode: 10199/50000, score: 0, e: 0.2, reward: 38.66, seudo-rew:0.0 max: 47.204368591308594 init-act: [31.592 47.859 31.209 48.083]
2025-07-18 01:51:20,172 - ADV - DEBUG - episode: 10299/50000, score: 0, e: 0.2, reward: 38.535, seudo-rew:0.0 max: 47.42888259887695 init-act: [31.43  48.484 31.002 48.591]
2025-07-18 01:51:28,486 - ADV - DEBUG - episode: 10399/50000, score: 0, e: 0.2, reward: 39.355, seudo-rew:0.0 max: 47.289031982421875 init-act: [31.207 47.831 30.825 47.793]
2025-07-18 01:51:36,803 - ADV - DEBUG - episode: 10499/50000, score: 0, e: 0.2, reward: 38.817, seudo-rew:0.0 max: 47.4404411315918 init-act: [31.334 47.976 30.918 48.069]
2025-07-18 01:51:45,022 - ADV - DEBUG - episode: 10599/50000, score: 0, e: 0.2, reward: 38.365, seudo-rew:0.0 max: 47.404266357421875 init-

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.001\gpt\model-12000.keras


2025-07-18 01:53:49,671 - ADV - DEBUG - episode: 12099/50000, score: 0, e: 0.2, reward: 38.224, seudo-rew:0.0 max: 47.5231819152832 init-act: [30.223 47.719 31.476 48.161]
2025-07-18 01:53:57,926 - ADV - DEBUG - episode: 12199/50000, score: 0, e: 0.2, reward: 37.066, seudo-rew:0.0 max: 47.289390563964844 init-act: [31.059 47.828 31.498 48.096]
2025-07-18 01:54:06,227 - ADV - DEBUG - episode: 12299/50000, score: 0, e: 0.2, reward: 38.228, seudo-rew:0.0 max: 47.37701416015625 init-act: [30.82  48.061 31.533 48.065]
2025-07-18 01:54:14,509 - ADV - DEBUG - episode: 12399/50000, score: 0, e: 0.2, reward: 39.601, seudo-rew:0.0 max: 47.23102951049805 init-act: [30.385 48.139 30.867 48.075]
2025-07-18 01:54:22,725 - ADV - DEBUG - episode: 12499/50000, score: 0, e: 0.2, reward: 40.323, seudo-rew:0.0 max: 47.174461364746094 init-act: [29.916 47.564 30.098 47.851]
2025-07-18 01:54:31,004 - ADV - DEBUG - episode: 12599/50000, score: 0, e: 0.2, reward: 39.947, seudo-rew:0.0 max: 47.169891357421875 

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.001\gpt\model-14000.keras


2025-07-18 01:56:37,024 - ADV - DEBUG - episode: 14099/50000, score: 0, e: 0.2, reward: 38.128, seudo-rew:0.0 max: 47.573062896728516 init-act: [31.209 48.491 31.17  48.785]
2025-07-18 01:56:45,805 - ADV - DEBUG - episode: 14199/50000, score: 0, e: 0.2, reward: 40.611, seudo-rew:0.0 max: 47.212623596191406 init-act: [30.841 47.649 30.951 48.132]
2025-07-18 01:56:54,259 - ADV - DEBUG - episode: 14299/50000, score: 0, e: 0.2, reward: 39.994, seudo-rew:0.0 max: 47.23735809326172 init-act: [30.994 47.89  31.294 48.269]
2025-07-18 01:57:02,786 - ADV - DEBUG - episode: 14399/50000, score: 0, e: 0.2, reward: 40.406, seudo-rew:0.0 max: 47.265316009521484 init-act: [31.391 48.015 31.236 48.11 ]
2025-07-18 01:57:11,427 - ADV - DEBUG - episode: 14499/50000, score: 0, e: 0.2, reward: 39.789, seudo-rew:0.0 max: 47.121822357177734 init-act: [31.856 48.082 31.625 48.366]
2025-07-18 01:57:20,376 - ADV - DEBUG - episode: 14599/50000, score: 0, e: 0.2, reward: 40.542, seudo-rew:0.0 max: 47.3299522399902

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.001\gpt\model-16000.keras


2025-07-18 01:59:32,333 - ADV - DEBUG - episode: 16099/50000, score: 0, e: 0.2, reward: 40.398, seudo-rew:0.0 max: 47.87211608886719 init-act: [32.921 48.747 33.353 48.766]
2025-07-18 01:59:40,928 - ADV - DEBUG - episode: 16199/50000, score: 0, e: 0.2, reward: 41.198, seudo-rew:0.0 max: 47.84120178222656 init-act: [32.873 48.54  32.685 48.66 ]
2025-07-18 01:59:49,763 - ADV - DEBUG - episode: 16299/50000, score: 0, e: 0.2, reward: 39.723, seudo-rew:0.0 max: 48.057838439941406 init-act: [33.461 48.935 32.686 49.257]
2025-07-18 01:59:58,766 - ADV - DEBUG - episode: 16399/50000, score: 0, e: 0.2, reward: 39.177, seudo-rew:0.0 max: 48.35148239135742 init-act: [33.508 48.962 33.777 49.117]
2025-07-18 02:00:07,557 - ADV - DEBUG - episode: 16499/50000, score: 0, e: 0.2, reward: 39.652, seudo-rew:0.0 max: 48.568363189697266 init-act: [33.422 49.594 33.029 49.353]
2025-07-18 02:00:16,320 - ADV - DEBUG - episode: 16599/50000, score: 0, e: 0.2, reward: 39.446, seudo-rew:0.0 max: 48.415428161621094

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.001\gpt\model-18000.keras


2025-07-18 02:02:31,833 - ADV - DEBUG - episode: 18099/50000, score: 0, e: 0.2, reward: 40.159, seudo-rew:0.0 max: 47.650840759277344 init-act: [32.549 48.898 32.611 48.697]
2025-07-18 02:02:40,463 - ADV - DEBUG - episode: 18199/50000, score: 0, e: 0.2, reward: 39.522, seudo-rew:0.0 max: 47.610748291015625 init-act: [32.01  48.25  31.812 48.336]
2025-07-18 02:02:49,522 - ADV - DEBUG - episode: 18299/50000, score: 0, e: 0.2, reward: 41.198, seudo-rew:0.0 max: 47.3997917175293 init-act: [32.277 48.074 31.969 48.273]
2025-07-18 02:02:59,055 - ADV - DEBUG - episode: 18399/50000, score: 0, e: 0.2, reward: 40.428, seudo-rew:0.0 max: 47.567508697509766 init-act: [32.732 48.005 32.475 48.605]
2025-07-18 02:03:08,870 - ADV - DEBUG - episode: 18499/50000, score: 0, e: 0.2, reward: 41.362, seudo-rew:0.0 max: 47.48297119140625 init-act: [32.092 47.563 31.771 47.901]
2025-07-18 02:03:18,757 - ADV - DEBUG - episode: 18599/50000, score: 0, e: 0.2, reward: 39.978, seudo-rew:0.0 max: 47.34756088256836 

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.001\gpt\model-20000.keras


2025-07-18 02:05:48,343 - ADV - DEBUG - episode: 20099/50000, score: 0, e: 0.2, reward: 40.406, seudo-rew:0.0 max: 46.43417739868164 init-act: [31.846 47.534 31.588 47.193]
2025-07-18 02:05:58,531 - ADV - DEBUG - episode: 20199/50000, score: 0, e: 0.2, reward: 41.123, seudo-rew:0.0 max: 46.413673400878906 init-act: [32.225 47.579 31.463 47.446]
2025-07-18 02:06:08,589 - ADV - DEBUG - episode: 20299/50000, score: 0, e: 0.2, reward: 41.122, seudo-rew:0.0 max: 46.4991455078125 init-act: [31.766 47.268 31.185 47.339]
2025-07-18 02:06:18,459 - ADV - DEBUG - episode: 20399/50000, score: 0, e: 0.2, reward: 41.615, seudo-rew:0.0 max: 46.60874938964844 init-act: [32.029 47.039 31.773 47.281]
2025-07-18 02:06:28,571 - ADV - DEBUG - episode: 20499/50000, score: 0, e: 0.2, reward: 40.827, seudo-rew:0.0 max: 46.521507263183594 init-act: [32.019 47.447 31.629 47.357]
2025-07-18 02:06:38,866 - ADV - DEBUG - episode: 20599/50000, score: 0, e: 0.2, reward: 41.9, seudo-rew:0.0 max: 46.46291732788086 ini

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.001\gpt\model-22000.keras


2025-07-18 02:08:58,278 - ADV - DEBUG - episode: 22099/50000, score: 0, e: 0.2, reward: 41.896, seudo-rew:0.0 max: 46.57487487792969 init-act: [32.213 47.509 31.996 47.696]
2025-07-18 02:09:08,045 - ADV - DEBUG - episode: 22199/50000, score: 0, e: 0.2, reward: 41.123, seudo-rew:0.0 max: 46.806182861328125 init-act: [31.91  47.352 31.51  47.472]
2025-07-18 02:09:17,949 - ADV - DEBUG - episode: 22299/50000, score: 0, e: 0.2, reward: 40.886, seudo-rew:0.0 max: 46.77621841430664 init-act: [31.934 47.586 31.707 47.733]
2025-07-18 02:09:27,986 - ADV - DEBUG - episode: 22399/50000, score: 0, e: 0.2, reward: 40.711, seudo-rew:0.0 max: 47.09532928466797 init-act: [33.08  48.024 32.24  47.758]
2025-07-18 02:09:37,867 - ADV - DEBUG - episode: 22499/50000, score: 0, e: 0.2, reward: 40.181, seudo-rew:0.0 max: 47.01945114135742 init-act: [31.94  47.683 31.478 47.508]
2025-07-18 02:09:47,532 - ADV - DEBUG - episode: 22599/50000, score: 0, e: 0.2, reward: 41.402, seudo-rew:0.0 max: 46.7503776550293 in

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.001\gpt\model-24000.keras


2025-07-18 02:12:10,912 - ADV - DEBUG - episode: 24099/50000, score: 0, e: 0.2, reward: 41.482, seudo-rew:0.0 max: 48.04139709472656 init-act: [32.373 48.733 31.948 48.46 ]
2025-07-18 02:12:19,328 - ADV - DEBUG - episode: 24199/50000, score: 0, e: 0.2, reward: 41.496, seudo-rew:0.0 max: 48.07057571411133 init-act: [32.379 48.71  31.909 48.871]
2025-07-18 02:12:27,876 - ADV - DEBUG - episode: 24299/50000, score: 0, e: 0.2, reward: 40.421, seudo-rew:0.0 max: 47.60322189331055 init-act: [32.343 48.52  32.562 48.432]
2025-07-18 02:12:36,458 - ADV - DEBUG - episode: 24399/50000, score: 0, e: 0.2, reward: 41.425, seudo-rew:0.0 max: 47.857757568359375 init-act: [32.852 48.348 32.85  48.352]
2025-07-18 02:12:45,174 - ADV - DEBUG - episode: 24499/50000, score: 0, e: 0.2, reward: 40.815, seudo-rew:0.0 max: 47.83709716796875 init-act: [33.013 48.606 32.957 48.684]
2025-07-18 02:12:53,656 - ADV - DEBUG - episode: 24599/50000, score: 0, e: 0.2, reward: 40.057, seudo-rew:0.0 max: 47.65092086791992 i

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.001\gpt\model-26000.keras


2025-07-18 02:15:04,405 - ADV - DEBUG - episode: 26099/50000, score: 0, e: 0.2, reward: 39.462, seudo-rew:0.0 max: 46.70896530151367 init-act: [33.536 47.387 33.18  47.539]
2025-07-18 02:15:13,208 - ADV - DEBUG - episode: 26199/50000, score: 0, e: 0.2, reward: 41.528, seudo-rew:0.0 max: 46.81167221069336 init-act: [32.397 47.471 32.531 47.586]
2025-07-18 02:15:22,208 - ADV - DEBUG - episode: 26299/50000, score: 0, e: 0.2, reward: 41.294, seudo-rew:0.0 max: 46.962318420410156 init-act: [33.014 47.746 32.176 47.86 ]
2025-07-18 02:15:31,331 - ADV - DEBUG - episode: 26399/50000, score: 0, e: 0.2, reward: 40.537, seudo-rew:0.0 max: 47.122337341308594 init-act: [32.859 47.375 33.064 48.033]
2025-07-18 02:15:40,860 - ADV - DEBUG - episode: 26499/50000, score: 0, e: 0.2, reward: 40.837, seudo-rew:0.0 max: 47.389404296875 init-act: [32.465 47.814 32.543 47.995]
2025-07-18 02:15:50,776 - ADV - DEBUG - episode: 26599/50000, score: 0, e: 0.2, reward: 42.029, seudo-rew:0.0 max: 47.02399826049805 in

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.001\gpt\model-28000.keras


2025-07-18 02:18:11,165 - ADV - DEBUG - episode: 28099/50000, score: 0, e: 0.2, reward: 41.454, seudo-rew:0.0 max: 49.40801239013672 init-act: [34.    50.238 34.189 49.991]
2025-07-18 02:18:20,027 - ADV - DEBUG - episode: 28199/50000, score: 0, e: 0.2, reward: 41.06, seudo-rew:0.0 max: 49.140785217285156 init-act: [33.957 50.05  33.686 49.917]
2025-07-18 02:18:28,977 - ADV - DEBUG - episode: 28299/50000, score: 0, e: 0.2, reward: 41.275, seudo-rew:0.0 max: 49.525230407714844 init-act: [33.165 50.204 33.016 50.125]
2025-07-18 02:18:38,133 - ADV - DEBUG - episode: 28399/50000, score: 0, e: 0.2, reward: 41.23, seudo-rew:0.0 max: 49.75397491455078 init-act: [33.606 50.633 33.17  50.351]
2025-07-18 02:18:47,624 - ADV - DEBUG - episode: 28499/50000, score: 0, e: 0.2, reward: 41.835, seudo-rew:0.0 max: 49.49334716796875 init-act: [33.398 50.109 33.31  50.29 ]
2025-07-18 02:18:57,474 - ADV - DEBUG - episode: 28599/50000, score: 0, e: 0.2, reward: 41.551, seudo-rew:0.0 max: 49.46268081665039 in

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.001\gpt\model-30000.keras


2025-07-18 02:21:26,764 - ADV - DEBUG - episode: 30099/50000, score: 0, e: 0.2, reward: 40.99, seudo-rew:0.0 max: 49.27570343017578 init-act: [34.897 50.192 34.978 50.04 ]
2025-07-18 02:21:36,820 - ADV - DEBUG - episode: 30199/50000, score: 0, e: 0.2, reward: 41.229, seudo-rew:0.0 max: 49.40684509277344 init-act: [34.527 50.029 34.426 50.175]
2025-07-18 02:21:46,770 - ADV - DEBUG - episode: 30299/50000, score: 0, e: 0.2, reward: 41.139, seudo-rew:0.0 max: 49.21448516845703 init-act: [35.269 49.736 35.027 49.767]
2025-07-18 02:21:56,735 - ADV - DEBUG - episode: 30399/50000, score: 0, e: 0.2, reward: 41.48, seudo-rew:0.0 max: 49.23749542236328 init-act: [33.814 49.922 33.869 50.105]
2025-07-18 02:22:06,983 - ADV - DEBUG - episode: 30499/50000, score: 0, e: 0.2, reward: 42.198, seudo-rew:0.0 max: 48.871925354003906 init-act: [34.648 49.664 34.757 49.364]
2025-07-18 02:22:16,496 - ADV - DEBUG - episode: 30599/50000, score: 0, e: 0.2, reward: 40.676, seudo-rew:0.0 max: 49.02009582519531 ini

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.001\gpt\model-32000.keras


2025-07-18 02:24:27,226 - ADV - DEBUG - episode: 32099/50000, score: 0, e: 0.2, reward: 40.649, seudo-rew:0.0 max: 49.31757354736328 init-act: [33.465 50.281 33.217 50.074]
2025-07-18 02:24:36,451 - ADV - DEBUG - episode: 32199/50000, score: 0, e: 0.2, reward: 42.057, seudo-rew:0.0 max: 49.442779541015625 init-act: [33.545 50.329 32.989 50.143]
2025-07-18 02:24:46,282 - ADV - DEBUG - episode: 32299/50000, score: 0, e: 0.2, reward: 42.482, seudo-rew:0.0 max: 49.577117919921875 init-act: [33.968 50.02  33.219 50.148]
2025-07-18 02:24:56,092 - ADV - DEBUG - episode: 32399/50000, score: 0, e: 0.2, reward: 42.046, seudo-rew:0.0 max: 49.56566619873047 init-act: [33.811 50.27  33.807 50.305]
2025-07-18 02:25:06,014 - ADV - DEBUG - episode: 32499/50000, score: 0, e: 0.2, reward: 42.188, seudo-rew:0.0 max: 49.73649978637695 init-act: [33.527 50.063 32.881 50.046]
2025-07-18 02:25:15,880 - ADV - DEBUG - episode: 32599/50000, score: 0, e: 0.2, reward: 41.059, seudo-rew:0.0 max: 49.962913513183594

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.001\gpt\model-34000.keras


2025-07-18 02:27:31,465 - ADV - DEBUG - episode: 34099/50000, score: 0, e: 0.2, reward: 41.88, seudo-rew:0.0 max: 49.49787521362305 init-act: [34.096 50.128 33.713 50.066]
2025-07-18 02:27:39,950 - ADV - DEBUG - episode: 34199/50000, score: 0, e: 0.2, reward: 42.407, seudo-rew:0.0 max: 49.396331787109375 init-act: [33.781 50.364 33.717 50.564]
2025-07-18 02:27:48,286 - ADV - DEBUG - episode: 34299/50000, score: 0, e: 0.2, reward: 41.56, seudo-rew:0.0 max: 49.624534606933594 init-act: [33.109 50.254 32.91  50.533]
2025-07-18 02:27:56,567 - ADV - DEBUG - episode: 34399/50000, score: 0, e: 0.2, reward: 41.077, seudo-rew:0.0 max: 49.472782135009766 init-act: [34.91  50.242 35.462 50.485]
2025-07-18 02:28:04,943 - ADV - DEBUG - episode: 34499/50000, score: 0, e: 0.2, reward: 41.368, seudo-rew:0.0 max: 49.57993698120117 init-act: [33.723 50.324 33.918 50.407]
2025-07-18 02:28:13,302 - ADV - DEBUG - episode: 34599/50000, score: 0, e: 0.2, reward: 41.307, seudo-rew:0.0 max: 49.080482482910156 

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.001\gpt\model-36000.keras


2025-07-18 02:30:18,390 - ADV - DEBUG - episode: 36099/50000, score: 0, e: 0.2, reward: 42.47, seudo-rew:0.0 max: 49.7423210144043 init-act: [33.9   50.664 33.316 50.517]
2025-07-18 02:30:26,674 - ADV - DEBUG - episode: 36199/50000, score: 0, e: 0.2, reward: 41.687, seudo-rew:0.0 max: 49.805633544921875 init-act: [34.014 50.593 33.936 50.728]
2025-07-18 02:30:34,958 - ADV - DEBUG - episode: 36299/50000, score: 0, e: 0.2, reward: 40.985, seudo-rew:0.0 max: 49.87233352661133 init-act: [34.996 50.551 35.912 50.465]
2025-07-18 02:30:43,310 - ADV - DEBUG - episode: 36399/50000, score: 0, e: 0.2, reward: 41.99, seudo-rew:0.0 max: 50.13611602783203 init-act: [34.785 50.607 35.385 50.574]
2025-07-18 02:30:51,899 - ADV - DEBUG - episode: 36499/50000, score: 0, e: 0.2, reward: 41.191, seudo-rew:0.0 max: 50.1021842956543 init-act: [34.699 50.974 34.676 50.794]
2025-07-18 02:31:00,366 - ADV - DEBUG - episode: 36599/50000, score: 0, e: 0.2, reward: 42.25, seudo-rew:0.0 max: 49.86574172973633 init-a

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.001\gpt\model-38000.keras


2025-07-18 02:33:06,327 - ADV - DEBUG - episode: 38099/50000, score: 0, e: 0.2, reward: 41.084, seudo-rew:0.0 max: 48.83055877685547 init-act: [34.047 49.599 34.307 49.629]
2025-07-18 02:33:14,674 - ADV - DEBUG - episode: 38199/50000, score: 0, e: 0.2, reward: 40.278, seudo-rew:0.0 max: 49.352333068847656 init-act: [33.488 50.254 33.75  50.179]
2025-07-18 02:33:22,972 - ADV - DEBUG - episode: 38299/50000, score: 0, e: 0.2, reward: 40.617, seudo-rew:0.0 max: 49.06503677368164 init-act: [33.988 50.137 34.487 49.997]
2025-07-18 02:33:31,207 - ADV - DEBUG - episode: 38399/50000, score: 0, e: 0.2, reward: 40.71, seudo-rew:0.0 max: 49.13976287841797 init-act: [33.695 49.406 33.766 49.257]
2025-07-18 02:33:39,573 - ADV - DEBUG - episode: 38499/50000, score: 0, e: 0.2, reward: 41.435, seudo-rew:0.0 max: 48.87737274169922 init-act: [33.777 49.134 33.668 49.09 ]
2025-07-18 02:33:47,909 - ADV - DEBUG - episode: 38599/50000, score: 0, e: 0.2, reward: 42.256, seudo-rew:0.0 max: 48.6019401550293 ini

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.001\gpt\model-40000.keras


2025-07-18 02:35:52,926 - ADV - DEBUG - episode: 40099/50000, score: 0, e: 0.2, reward: 41.443, seudo-rew:0.0 max: 48.8393440246582 init-act: [33.914 49.613 33.953 49.914]
2025-07-18 02:36:01,367 - ADV - DEBUG - episode: 40199/50000, score: 0, e: 0.2, reward: 41.921, seudo-rew:0.0 max: 49.07266616821289 init-act: [33.489 49.807 33.279 50.015]
2025-07-18 02:36:09,811 - ADV - DEBUG - episode: 40299/50000, score: 0, e: 0.2, reward: 40.742, seudo-rew:0.0 max: 48.97787094116211 init-act: [34.215 50.044 33.44  49.878]
2025-07-18 02:36:18,289 - ADV - DEBUG - episode: 40399/50000, score: 0, e: 0.2, reward: 40.908, seudo-rew:0.0 max: 49.169471740722656 init-act: [33.274 49.995 32.822 49.699]
2025-07-18 02:36:26,728 - ADV - DEBUG - episode: 40499/50000, score: 0, e: 0.2, reward: 41.474, seudo-rew:0.0 max: 48.92497634887695 init-act: [33.693 49.465 33.272 49.348]
2025-07-18 02:36:35,281 - ADV - DEBUG - episode: 40599/50000, score: 0, e: 0.2, reward: 40.084, seudo-rew:0.0 max: 49.23337173461914 in

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.001\gpt\model-42000.keras


2025-07-18 02:38:40,915 - ADV - DEBUG - episode: 42099/50000, score: 0, e: 0.2, reward: 40.54, seudo-rew:0.0 max: 49.20383071899414 init-act: [32.912 49.526 33.117 49.492]
2025-07-18 02:38:49,252 - ADV - DEBUG - episode: 42199/50000, score: 0, e: 0.2, reward: 41.554, seudo-rew:0.0 max: 48.902156829833984 init-act: [33.234 49.352 33.143 49.446]
2025-07-18 02:38:57,458 - ADV - DEBUG - episode: 42299/50000, score: 0, e: 0.2, reward: 40.79, seudo-rew:0.0 max: 48.781349182128906 init-act: [33.217 49.521 33.215 49.325]
2025-07-18 02:39:05,785 - ADV - DEBUG - episode: 42399/50000, score: 0, e: 0.2, reward: 42.122, seudo-rew:0.0 max: 48.81374740600586 init-act: [32.709 49.516 32.682 49.394]
2025-07-18 02:39:14,128 - ADV - DEBUG - episode: 42499/50000, score: 0, e: 0.2, reward: 41.555, seudo-rew:0.0 max: 48.83932876586914 init-act: [33.114 49.341 33.494 49.767]
2025-07-18 02:39:22,431 - ADV - DEBUG - episode: 42599/50000, score: 0, e: 0.2, reward: 40.872, seudo-rew:0.0 max: 48.91159439086914 in

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.001\gpt\model-44000.keras


2025-07-18 02:41:28,993 - ADV - DEBUG - episode: 44099/50000, score: 0, e: 0.2, reward: 40.838, seudo-rew:0.0 max: 48.820892333984375 init-act: [33.006 49.638 32.614 49.526]
2025-07-18 02:41:37,364 - ADV - DEBUG - episode: 44199/50000, score: 0, e: 0.2, reward: 41.44, seudo-rew:0.0 max: 48.87514877319336 init-act: [32.77  49.676 32.15  49.496]
2025-07-18 02:41:45,790 - ADV - DEBUG - episode: 44299/50000, score: 0, e: 0.2, reward: 42.331, seudo-rew:0.0 max: 48.976295471191406 init-act: [32.932 49.672 33.014 49.656]
2025-07-18 02:41:54,206 - ADV - DEBUG - episode: 44399/50000, score: 0, e: 0.2, reward: 41.858, seudo-rew:0.0 max: 48.82822036743164 init-act: [32.834 49.768 32.532 49.596]
2025-07-18 02:42:02,620 - ADV - DEBUG - episode: 44499/50000, score: 0, e: 0.2, reward: 42.127, seudo-rew:0.0 max: 48.95780944824219 init-act: [33.365 49.81  32.935 49.754]
2025-07-18 02:42:10,870 - ADV - DEBUG - episode: 44599/50000, score: 0, e: 0.2, reward: 42.118, seudo-rew:0.0 max: 49.1003303527832 in

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.001\gpt\model-46000.keras


2025-07-18 02:44:15,598 - ADV - DEBUG - episode: 46099/50000, score: 0, e: 0.2, reward: 41.819, seudo-rew:0.0 max: 48.04359817504883 init-act: [33.557 48.901 32.819 49.012]
2025-07-18 02:44:23,876 - ADV - DEBUG - episode: 46199/50000, score: 0, e: 0.2, reward: 41.496, seudo-rew:0.0 max: 48.13942337036133 init-act: [33.107 48.652 32.704 48.819]
2025-07-18 02:44:32,212 - ADV - DEBUG - episode: 46299/50000, score: 0, e: 0.2, reward: 41.39, seudo-rew:0.0 max: 48.235816955566406 init-act: [32.859 48.957 32.719 49.333]
2025-07-18 02:44:40,439 - ADV - DEBUG - episode: 46399/50000, score: 0, e: 0.2, reward: 41.744, seudo-rew:0.0 max: 48.12623596191406 init-act: [32.801 48.742 32.453 48.761]
2025-07-18 02:44:48,771 - ADV - DEBUG - episode: 46499/50000, score: 0, e: 0.2, reward: 41.817, seudo-rew:0.0 max: 48.24831008911133 init-act: [33.193 49.147 32.483 48.974]
2025-07-18 02:44:57,135 - ADV - DEBUG - episode: 46599/50000, score: 0, e: 0.2, reward: 41.428, seudo-rew:0.0 max: 48.27122497558594 in

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.001\gpt\model-48000.keras


2025-07-18 02:47:03,074 - ADV - DEBUG - episode: 48099/50000, score: 0, e: 0.2, reward: 40.521, seudo-rew:0.0 max: 47.9312744140625 init-act: [33.695 48.652 33.803 48.531]
2025-07-18 02:47:11,443 - ADV - DEBUG - episode: 48199/50000, score: 0, e: 0.2, reward: 42.124, seudo-rew:0.0 max: 47.87491989135742 init-act: [33.591 48.688 33.375 48.492]
2025-07-18 02:47:19,829 - ADV - DEBUG - episode: 48299/50000, score: 0, e: 0.2, reward: 40.896, seudo-rew:0.0 max: 47.790470123291016 init-act: [33.914 48.663 33.648 48.621]
2025-07-18 02:47:28,239 - ADV - DEBUG - episode: 48399/50000, score: 0, e: 0.2, reward: 41.469, seudo-rew:0.0 max: 47.77662658691406 init-act: [33.508 48.309 32.902 48.272]
2025-07-18 02:47:36,616 - ADV - DEBUG - episode: 48499/50000, score: 0, e: 0.2, reward: 42.2, seudo-rew:0.0 max: 47.73186111450195 init-act: [33.172 48.766 33.064 48.646]
2025-07-18 02:47:44,930 - ADV - DEBUG - episode: 48599/50000, score: 0, e: 0.2, reward: 42.19, seudo-rew:0.0 max: 47.6703987121582 init-a

400000 0.0001 0.01
📦 Training: buf=400000, lr=0.0001, eps=0.01 → trained_model\test_adv_RL_400000eps0.01lr0.0001\gpt
dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.0001\gpt\model-0.keras


2025-07-18 02:49:46,994 - ADV - DEBUG - episode: 99/50000, score: 0, e: 0.01, reward: 21.37, seudo-rew:0.0 max: 1.161890983581543 init-act: [ 0.056  0.438  0.891 -0.402]
2025-07-18 02:49:52,548 - ADV - DEBUG - episode: 199/50000, score: 0, e: 0.01, reward: 20.183, seudo-rew:0.0 max: 1.6508495807647705 init-act: [ 0.241  0.543  1.268 -0.269]
2025-07-18 02:49:58,159 - ADV - DEBUG - episode: 299/50000, score: 0, e: 0.01, reward: 21.605, seudo-rew:0.0 max: 1.7981691360473633 init-act: [ 0.437  0.628  1.649 -0.075]
2025-07-18 02:50:03,774 - ADV - DEBUG - episode: 399/50000, score: 0, e: 0.01, reward: 21.012, seudo-rew:0.0 max: 2.2035794258117676 init-act: [0.611 0.813 1.973 0.064]
2025-07-18 02:50:13,309 - ADV - DEBUG - episode: 499/50000, score: 0, e: 0.01, reward: 21.431, seudo-rew:0.0 max: 2.184356689453125 init-act: [0.779 0.972 2.203 0.2  ]
2025-07-18 02:50:22,829 - ADV - DEBUG - episode: 599/50000, score: 0, e: 0.01, reward: 21.7, seudo-rew:0.0 max: 2.3478786945343018 init-act: [0.894

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.0001\gpt\model-2000.keras


2025-07-18 02:52:39,815 - ADV - DEBUG - episode: 2099/50000, score: 0, e: 0.01, reward: 37.198, seudo-rew:0.0 max: 9.847100257873535 init-act: [6.174 5.57  7.352 8.919]
2025-07-18 02:52:48,118 - ADV - DEBUG - episode: 2199/50000, score: 0, e: 0.01, reward: 37.176, seudo-rew:0.0 max: 10.985527038574219 init-act: [ 7.216  6.818  7.999 10.841]
2025-07-18 02:52:56,393 - ADV - DEBUG - episode: 2299/50000, score: 0, e: 0.01, reward: 37.146, seudo-rew:0.0 max: 12.108907699584961 init-act: [ 8.321  7.866  8.48  12.368]
2025-07-18 02:53:04,719 - ADV - DEBUG - episode: 2399/50000, score: 0, e: 0.01, reward: 37.364, seudo-rew:0.0 max: 13.166897773742676 init-act: [ 9.401  8.916  9.17  13.669]
2025-07-18 02:53:13,044 - ADV - DEBUG - episode: 2499/50000, score: 0, e: 0.01, reward: 37.25, seudo-rew:0.0 max: 14.174091339111328 init-act: [10.369  9.922  9.761 14.795]
2025-07-18 02:53:21,330 - ADV - DEBUG - episode: 2599/50000, score: 0, e: 0.01, reward: 37.354, seudo-rew:0.0 max: 15.184385299682617 in

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.0001\gpt\model-4000.keras


2025-07-18 02:55:26,214 - ADV - DEBUG - episode: 4099/50000, score: 0, e: 0.01, reward: 37.352, seudo-rew:0.0 max: 29.128149032592773 init-act: [19.581 22.066 17.972 30.174]
2025-07-18 02:55:34,557 - ADV - DEBUG - episode: 4199/50000, score: 0, e: 0.01, reward: 37.276, seudo-rew:0.0 max: 29.9740047454834 init-act: [19.987 23.211 18.425 30.937]
2025-07-18 02:55:42,857 - ADV - DEBUG - episode: 4299/50000, score: 0, e: 0.01, reward: 37.399, seudo-rew:0.0 max: 30.692920684814453 init-act: [20.583 23.299 18.759 31.656]
2025-07-18 02:55:51,326 - ADV - DEBUG - episode: 4399/50000, score: 0, e: 0.01, reward: 37.3, seudo-rew:0.0 max: 31.324317932128906 init-act: [21.1   23.77  19.2   32.257]
2025-07-18 02:55:59,807 - ADV - DEBUG - episode: 4499/50000, score: 0, e: 0.01, reward: 37.376, seudo-rew:0.0 max: 31.940237045288086 init-act: [21.716 24.211 19.694 32.889]
2025-07-18 02:56:08,339 - ADV - DEBUG - episode: 4599/50000, score: 0, e: 0.01, reward: 37.42, seudo-rew:0.0 max: 32.49128341674805 in

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.0001\gpt\model-6000.keras


2025-07-18 02:58:15,563 - ADV - DEBUG - episode: 6099/50000, score: 0, e: 0.01, reward: 37.331, seudo-rew:0.0 max: 37.2413330078125 init-act: [27.919 33.354 26.375 38.193]
2025-07-18 02:58:23,828 - ADV - DEBUG - episode: 6199/50000, score: 0, e: 0.01, reward: 37.6, seudo-rew:0.0 max: 37.395843505859375 init-act: [27.777 33.923 26.762 38.392]
2025-07-18 02:58:32,197 - ADV - DEBUG - episode: 6299/50000, score: 0, e: 0.01, reward: 37.287, seudo-rew:0.0 max: 37.54006576538086 init-act: [28.158 34.268 27.139 38.545]
2025-07-18 02:58:40,594 - ADV - DEBUG - episode: 6399/50000, score: 0, e: 0.01, reward: 37.296, seudo-rew:0.0 max: 37.64944076538086 init-act: [28.247 34.637 27.865 38.66 ]
2025-07-18 02:58:48,960 - ADV - DEBUG - episode: 6499/50000, score: 0, e: 0.01, reward: 37.645, seudo-rew:0.0 max: 37.81816101074219 init-act: [28.255 34.99  27.888 38.77 ]
2025-07-18 02:58:57,290 - ADV - DEBUG - episode: 6599/50000, score: 0, e: 0.01, reward: 37.284, seudo-rew:0.0 max: 37.906028747558594 ini

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.0001\gpt\model-8000.keras


2025-07-18 03:01:02,650 - ADV - DEBUG - episode: 8099/50000, score: 0, e: 0.01, reward: 43.685, seudo-rew:0.0 max: 39.170806884765625 init-act: [30.102 39.557 30.369 39.995]
2025-07-18 03:01:11,283 - ADV - DEBUG - episode: 8199/50000, score: 0, e: 0.01, reward: 43.99, seudo-rew:0.0 max: 39.406898498535156 init-act: [30.46  39.924 30.014 40.168]
2025-07-18 03:01:20,147 - ADV - DEBUG - episode: 8299/50000, score: 0, e: 0.01, reward: 44.139, seudo-rew:0.0 max: 39.5155143737793 init-act: [30.732 40.186 30.395 40.461]
2025-07-18 03:01:28,806 - ADV - DEBUG - episode: 8399/50000, score: 0, e: 0.01, reward: 45.159, seudo-rew:0.0 max: 39.73748016357422 init-act: [31.036 40.418 30.559 40.695]
2025-07-18 03:01:37,569 - ADV - DEBUG - episode: 8499/50000, score: 0, e: 0.01, reward: 44.656, seudo-rew:0.0 max: 39.960750579833984 init-act: [31.373 40.625 30.903 40.941]
2025-07-18 03:01:46,441 - ADV - DEBUG - episode: 8599/50000, score: 0, e: 0.01, reward: 44.76, seudo-rew:0.0 max: 40.148826599121094 i

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.0001\gpt\model-10000.keras


2025-07-18 03:04:01,767 - ADV - DEBUG - episode: 10099/50000, score: 0, e: 0.01, reward: 43.49, seudo-rew:0.0 max: 40.97469711303711 init-act: [32.992 41.639 32.758 41.933]
2025-07-18 03:04:10,925 - ADV - DEBUG - episode: 10199/50000, score: 0, e: 0.01, reward: 43.544, seudo-rew:0.0 max: 40.95492172241211 init-act: [33.006 41.681 32.882 41.893]
2025-07-18 03:04:19,668 - ADV - DEBUG - episode: 10299/50000, score: 0, e: 0.01, reward: 43.049, seudo-rew:0.0 max: 40.9873046875 init-act: [33.096 41.696 32.815 42.021]
2025-07-18 03:04:28,390 - ADV - DEBUG - episode: 10399/50000, score: 0, e: 0.01, reward: 43.234, seudo-rew:0.0 max: 40.984375 init-act: [33.029 41.698 32.788 41.943]
2025-07-18 03:04:37,188 - ADV - DEBUG - episode: 10499/50000, score: 0, e: 0.01, reward: 43.372, seudo-rew:0.0 max: 40.95276641845703 init-act: [33.025 41.706 32.817 41.895]
2025-07-18 03:04:46,090 - ADV - DEBUG - episode: 10599/50000, score: 0, e: 0.01, reward: 43.292, seudo-rew:0.0 max: 40.94074249267578 init-act:

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.0001\gpt\model-12000.keras


2025-07-18 03:06:54,869 - ADV - DEBUG - episode: 12099/50000, score: 0, e: 0.01, reward: 43.18, seudo-rew:0.0 max: 40.89925003051758 init-act: [33.123 41.815 32.932 41.843]
2025-07-18 03:07:03,294 - ADV - DEBUG - episode: 12199/50000, score: 0, e: 0.01, reward: 42.243, seudo-rew:0.0 max: 40.946617126464844 init-act: [33.155 41.883 33.067 41.951]
2025-07-18 03:07:11,564 - ADV - DEBUG - episode: 12299/50000, score: 0, e: 0.01, reward: 42.081, seudo-rew:0.0 max: 40.98225784301758 init-act: [33.166 41.82  33.061 41.878]
2025-07-18 03:07:19,993 - ADV - DEBUG - episode: 12399/50000, score: 0, e: 0.01, reward: 43.223, seudo-rew:0.0 max: 40.93321990966797 init-act: [33.149 41.832 33.018 41.921]
2025-07-18 03:07:28,484 - ADV - DEBUG - episode: 12499/50000, score: 0, e: 0.01, reward: 43.486, seudo-rew:0.0 max: 40.995548248291016 init-act: [33.231 41.84  33.043 41.985]
2025-07-18 03:07:36,789 - ADV - DEBUG - episode: 12599/50000, score: 0, e: 0.01, reward: 43.488, seudo-rew:0.0 max: 41.1371574401

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.0001\gpt\model-14000.keras


2025-07-18 03:09:42,013 - ADV - DEBUG - episode: 14099/50000, score: 0, e: 0.01, reward: 44.491, seudo-rew:0.0 max: 42.76906967163086 init-act: [33.296 43.501 33.148 43.6  ]
2025-07-18 03:09:50,319 - ADV - DEBUG - episode: 14199/50000, score: 0, e: 0.01, reward: 43.987, seudo-rew:0.0 max: 43.18195343017578 init-act: [33.493 43.918 33.381 43.89 ]
2025-07-18 03:09:58,668 - ADV - DEBUG - episode: 14299/50000, score: 0, e: 0.01, reward: 44.12, seudo-rew:0.0 max: 43.32517623901367 init-act: [33.741 44.177 33.662 44.108]
2025-07-18 03:10:07,224 - ADV - DEBUG - episode: 14399/50000, score: 0, e: 0.01, reward: 44.061, seudo-rew:0.0 max: 43.448265075683594 init-act: [33.434 44.297 33.879 44.222]
2025-07-18 03:10:15,692 - ADV - DEBUG - episode: 14499/50000, score: 0, e: 0.01, reward: 44.198, seudo-rew:0.0 max: 43.65464401245117 init-act: [33.623 44.201 34.012 44.348]
2025-07-18 03:10:24,060 - ADV - DEBUG - episode: 14599/50000, score: 0, e: 0.01, reward: 44.29, seudo-rew:0.0 max: 43.798454284667

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.0001\gpt\model-16000.keras


2025-07-18 03:12:32,719 - ADV - DEBUG - episode: 16099/50000, score: 0, e: 0.01, reward: 44.651, seudo-rew:0.0 max: 45.185001373291016 init-act: [36.191 45.582 36.203 46.082]
2025-07-18 03:12:41,138 - ADV - DEBUG - episode: 16199/50000, score: 0, e: 0.01, reward: 44.546, seudo-rew:0.0 max: 45.2269401550293 init-act: [36.255 45.604 36.095 46.112]
2025-07-18 03:12:49,445 - ADV - DEBUG - episode: 16299/50000, score: 0, e: 0.01, reward: 44.572, seudo-rew:0.0 max: 45.22090530395508 init-act: [36.141 45.547 35.934 46.164]
2025-07-18 03:12:57,806 - ADV - DEBUG - episode: 16399/50000, score: 0, e: 0.01, reward: 44.48, seudo-rew:0.0 max: 45.130645751953125 init-act: [36.078 45.518 35.949 46.089]
2025-07-18 03:13:06,248 - ADV - DEBUG - episode: 16499/50000, score: 0, e: 0.01, reward: 44.661, seudo-rew:0.0 max: 45.08132553100586 init-act: [35.869 45.436 35.943 45.95 ]
2025-07-18 03:13:14,535 - ADV - DEBUG - episode: 16599/50000, score: 0, e: 0.01, reward: 44.632, seudo-rew:0.0 max: 44.99637603759

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.0001\gpt\model-18000.keras


2025-07-18 03:15:19,632 - ADV - DEBUG - episode: 18099/50000, score: 0, e: 0.01, reward: 44.339, seudo-rew:0.0 max: 43.70144271850586 init-act: [33.392 44.043 33.718 44.66 ]
2025-07-18 03:15:27,988 - ADV - DEBUG - episode: 18199/50000, score: 0, e: 0.01, reward: 44.057, seudo-rew:0.0 max: 43.667171478271484 init-act: [33.2   43.961 33.537 44.582]
2025-07-18 03:15:36,339 - ADV - DEBUG - episode: 18299/50000, score: 0, e: 0.01, reward: 44.251, seudo-rew:0.0 max: 43.63071823120117 init-act: [32.769 43.831 33.351 44.535]
2025-07-18 03:15:44,663 - ADV - DEBUG - episode: 18399/50000, score: 0, e: 0.01, reward: 44.14, seudo-rew:0.0 max: 43.52112579345703 init-act: [32.861 43.81  33.322 44.499]
2025-07-18 03:15:53,363 - ADV - DEBUG - episode: 18499/50000, score: 0, e: 0.01, reward: 44.142, seudo-rew:0.0 max: 43.48202896118164 init-act: [32.854 43.733 33.152 44.434]
2025-07-18 03:16:01,785 - ADV - DEBUG - episode: 18599/50000, score: 0, e: 0.01, reward: 44.378, seudo-rew:0.0 max: 43.42166519165

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.0001\gpt\model-20000.keras


2025-07-18 03:18:07,413 - ADV - DEBUG - episode: 20099/50000, score: 0, e: 0.01, reward: 44.118, seudo-rew:0.0 max: 43.29559326171875 init-act: [31.793 43.51  32.381 44.25 ]
2025-07-18 03:18:15,800 - ADV - DEBUG - episode: 20199/50000, score: 0, e: 0.01, reward: 44.236, seudo-rew:0.0 max: 43.3280143737793 init-act: [31.906 43.55  32.199 44.272]
2025-07-18 03:18:24,145 - ADV - DEBUG - episode: 20299/50000, score: 0, e: 0.01, reward: 44.31, seudo-rew:0.0 max: 43.36450958251953 init-act: [31.76  43.558 32.162 44.271]
2025-07-18 03:18:32,522 - ADV - DEBUG - episode: 20399/50000, score: 0, e: 0.01, reward: 44.331, seudo-rew:0.0 max: 43.409149169921875 init-act: [31.655 43.523 31.776 44.254]
2025-07-18 03:18:40,853 - ADV - DEBUG - episode: 20499/50000, score: 0, e: 0.01, reward: 44.219, seudo-rew:0.0 max: 43.43759536743164 init-act: [31.641 43.639 31.623 44.417]
2025-07-18 03:18:49,162 - ADV - DEBUG - episode: 20599/50000, score: 0, e: 0.01, reward: 44.384, seudo-rew:0.0 max: 43.510101318359

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.0001\gpt\model-22000.keras


2025-07-18 03:20:54,552 - ADV - DEBUG - episode: 22099/50000, score: 0, e: 0.01, reward: 44.737, seudo-rew:0.0 max: 44.06037521362305 init-act: [31.152 43.94  31.576 44.984]
2025-07-18 03:21:03,075 - ADV - DEBUG - episode: 22199/50000, score: 0, e: 0.01, reward: 44.64, seudo-rew:0.0 max: 44.100284576416016 init-act: [31.291 43.979 31.631 45.082]
2025-07-18 03:21:11,488 - ADV - DEBUG - episode: 22299/50000, score: 0, e: 0.01, reward: 44.736, seudo-rew:0.0 max: 44.09538650512695 init-act: [31.264 44.009 31.701 45.026]
2025-07-18 03:21:19,778 - ADV - DEBUG - episode: 22399/50000, score: 0, e: 0.01, reward: 44.902, seudo-rew:0.0 max: 44.172119140625 init-act: [31.282 44.154 31.728 45.132]
2025-07-18 03:21:28,083 - ADV - DEBUG - episode: 22499/50000, score: 0, e: 0.01, reward: 45.021, seudo-rew:0.0 max: 44.17359161376953 init-act: [31.326 44.148 31.708 45.148]
2025-07-18 03:21:36,457 - ADV - DEBUG - episode: 22599/50000, score: 0, e: 0.01, reward: 44.65, seudo-rew:0.0 max: 44.16841506958008

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.0001\gpt\model-24000.keras


2025-07-18 03:23:42,116 - ADV - DEBUG - episode: 24099/50000, score: 0, e: 0.01, reward: 44.608, seudo-rew:0.0 max: 44.23931121826172 init-act: [30.344 44.275 30.826 45.185]
2025-07-18 03:23:50,440 - ADV - DEBUG - episode: 24199/50000, score: 0, e: 0.01, reward: 44.451, seudo-rew:0.0 max: 44.21099853515625 init-act: [30.389 44.301 30.853 45.15 ]
2025-07-18 03:23:58,733 - ADV - DEBUG - episode: 24299/50000, score: 0, e: 0.01, reward: 44.663, seudo-rew:0.0 max: 44.197025299072266 init-act: [30.229 44.3   30.678 45.143]
2025-07-18 03:24:07,116 - ADV - DEBUG - episode: 24399/50000, score: 0, e: 0.01, reward: 44.409, seudo-rew:0.0 max: 44.25968933105469 init-act: [30.155 44.315 30.436 45.164]
2025-07-18 03:24:15,462 - ADV - DEBUG - episode: 24499/50000, score: 0, e: 0.01, reward: 44.661, seudo-rew:0.0 max: 44.27619552612305 init-act: [30.137 44.321 30.331 45.203]
2025-07-18 03:24:23,777 - ADV - DEBUG - episode: 24599/50000, score: 0, e: 0.01, reward: 44.548, seudo-rew:0.0 max: 44.2699928283

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.0001\gpt\model-26000.keras


2025-07-18 03:26:29,619 - ADV - DEBUG - episode: 26099/50000, score: 0, e: 0.01, reward: 44.629, seudo-rew:0.0 max: 44.53553009033203 init-act: [29.98  44.663 30.126 45.469]
2025-07-18 03:26:38,584 - ADV - DEBUG - episode: 26199/50000, score: 0, e: 0.01, reward: 44.948, seudo-rew:0.0 max: 44.534873962402344 init-act: [29.91  44.619 30.174 45.426]
2025-07-18 03:26:46,943 - ADV - DEBUG - episode: 26299/50000, score: 0, e: 0.01, reward: 44.813, seudo-rew:0.0 max: 44.554725646972656 init-act: [29.886 44.67  30.164 45.486]
2025-07-18 03:26:55,374 - ADV - DEBUG - episode: 26399/50000, score: 0, e: 0.01, reward: 44.675, seudo-rew:0.0 max: 44.52670669555664 init-act: [29.885 44.623 30.054 45.387]
2025-07-18 03:27:03,754 - ADV - DEBUG - episode: 26499/50000, score: 0, e: 0.01, reward: 44.688, seudo-rew:0.0 max: 44.47383117675781 init-act: [29.888 44.649 30.164 45.394]
2025-07-18 03:27:12,280 - ADV - DEBUG - episode: 26599/50000, score: 0, e: 0.01, reward: 44.773, seudo-rew:0.0 max: 44.433403015

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.0001\gpt\model-28000.keras


2025-07-18 03:29:17,020 - ADV - DEBUG - episode: 28099/50000, score: 0, e: 0.01, reward: 44.609, seudo-rew:0.0 max: 44.29033279418945 init-act: [29.525 44.082 29.895 45.203]
2025-07-18 03:29:25,341 - ADV - DEBUG - episode: 28199/50000, score: 0, e: 0.01, reward: 44.553, seudo-rew:0.0 max: 44.26443862915039 init-act: [29.418 44.148 29.81  45.26 ]
2025-07-18 03:29:33,638 - ADV - DEBUG - episode: 28299/50000, score: 0, e: 0.01, reward: 44.572, seudo-rew:0.0 max: 44.35121154785156 init-act: [29.482 44.183 29.781 45.328]
2025-07-18 03:29:42,008 - ADV - DEBUG - episode: 28399/50000, score: 0, e: 0.01, reward: 44.671, seudo-rew:0.0 max: 44.37873458862305 init-act: [29.544 44.323 30.077 45.302]
2025-07-18 03:29:50,343 - ADV - DEBUG - episode: 28499/50000, score: 0, e: 0.01, reward: 44.653, seudo-rew:0.0 max: 44.43001937866211 init-act: [29.639 44.431 30.113 45.375]
2025-07-18 03:29:58,661 - ADV - DEBUG - episode: 28599/50000, score: 0, e: 0.01, reward: 44.535, seudo-rew:0.0 max: 44.48519134521

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.0001\gpt\model-30000.keras


2025-07-18 03:32:04,338 - ADV - DEBUG - episode: 30099/50000, score: 0, e: 0.01, reward: 44.584, seudo-rew:0.0 max: 44.54523849487305 init-act: [29.264 44.81  29.52  45.506]
2025-07-18 03:32:12,704 - ADV - DEBUG - episode: 30199/50000, score: 0, e: 0.01, reward: 44.731, seudo-rew:0.0 max: 44.568477630615234 init-act: [29.096 44.74  29.41  45.411]
2025-07-18 03:32:21,238 - ADV - DEBUG - episode: 30299/50000, score: 0, e: 0.01, reward: 44.634, seudo-rew:0.0 max: 44.56694793701172 init-act: [29.152 44.855 29.604 45.57 ]
2025-07-18 03:32:29,538 - ADV - DEBUG - episode: 30399/50000, score: 0, e: 0.01, reward: 44.839, seudo-rew:0.0 max: 44.56177520751953 init-act: [29.149 44.855 29.422 45.516]
2025-07-18 03:32:37,808 - ADV - DEBUG - episode: 30499/50000, score: 0, e: 0.01, reward: 44.609, seudo-rew:0.0 max: 44.62764358520508 init-act: [29.197 44.948 29.53  45.63 ]
2025-07-18 03:32:46,414 - ADV - DEBUG - episode: 30599/50000, score: 0, e: 0.01, reward: 44.612, seudo-rew:0.0 max: 44.5850906372

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.0001\gpt\model-32000.keras


2025-07-18 03:34:51,523 - ADV - DEBUG - episode: 32099/50000, score: 0, e: 0.01, reward: 45.009, seudo-rew:0.0 max: 44.56414794921875 init-act: [29.365 45.103 29.352 45.539]
2025-07-18 03:34:59,825 - ADV - DEBUG - episode: 32199/50000, score: 0, e: 0.01, reward: 44.737, seudo-rew:0.0 max: 44.58096694946289 init-act: [29.279 45.066 29.379 45.539]
2025-07-18 03:35:08,112 - ADV - DEBUG - episode: 32299/50000, score: 0, e: 0.01, reward: 44.659, seudo-rew:0.0 max: 44.60243606567383 init-act: [29.168 45.055 29.234 45.52 ]
2025-07-18 03:35:16,464 - ADV - DEBUG - episode: 32399/50000, score: 0, e: 0.01, reward: 44.659, seudo-rew:0.0 max: 44.55647277832031 init-act: [29.232 45.091 29.148 45.532]
2025-07-18 03:35:24,917 - ADV - DEBUG - episode: 32499/50000, score: 0, e: 0.01, reward: 44.792, seudo-rew:0.0 max: 44.54133224487305 init-act: [29.147 45.098 29.189 45.488]
2025-07-18 03:35:33,267 - ADV - DEBUG - episode: 32599/50000, score: 0, e: 0.01, reward: 44.415, seudo-rew:0.0 max: 44.57558822631

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.0001\gpt\model-34000.keras


2025-07-18 03:37:39,095 - ADV - DEBUG - episode: 34099/50000, score: 0, e: 0.01, reward: 44.5, seudo-rew:0.0 max: 44.86103057861328 init-act: [29.506 45.648 29.487 45.818]
2025-07-18 03:37:47,478 - ADV - DEBUG - episode: 34199/50000, score: 0, e: 0.01, reward: 44.493, seudo-rew:0.0 max: 44.89067459106445 init-act: [29.342 45.626 29.414 45.785]
2025-07-18 03:37:55,903 - ADV - DEBUG - episode: 34299/50000, score: 0, e: 0.01, reward: 44.525, seudo-rew:0.0 max: 44.89115524291992 init-act: [29.451 45.679 29.486 45.879]
2025-07-18 03:38:04,417 - ADV - DEBUG - episode: 34399/50000, score: 0, e: 0.01, reward: 44.553, seudo-rew:0.0 max: 44.89674377441406 init-act: [29.412 45.663 29.377 45.827]
2025-07-18 03:38:12,770 - ADV - DEBUG - episode: 34499/50000, score: 0, e: 0.01, reward: 44.835, seudo-rew:0.0 max: 44.937747955322266 init-act: [29.374 45.645 29.315 45.809]
2025-07-18 03:38:21,047 - ADV - DEBUG - episode: 34599/50000, score: 0, e: 0.01, reward: 44.588, seudo-rew:0.0 max: 44.994499206542

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.0001\gpt\model-36000.keras


2025-07-18 03:40:26,243 - ADV - DEBUG - episode: 36099/50000, score: 0, e: 0.01, reward: 44.65, seudo-rew:0.0 max: 44.98405838012695 init-act: [29.238 45.427 29.22  45.931]
2025-07-18 03:40:34,524 - ADV - DEBUG - episode: 36199/50000, score: 0, e: 0.01, reward: 44.42, seudo-rew:0.0 max: 44.96593475341797 init-act: [29.242 45.431 29.231 45.964]
2025-07-18 03:40:42,937 - ADV - DEBUG - episode: 36299/50000, score: 0, e: 0.01, reward: 44.636, seudo-rew:0.0 max: 44.915008544921875 init-act: [29.191 45.339 29.158 45.782]
2025-07-18 03:40:51,434 - ADV - DEBUG - episode: 36399/50000, score: 0, e: 0.01, reward: 44.753, seudo-rew:0.0 max: 45.105220794677734 init-act: [29.293 45.369 29.127 45.875]
2025-07-18 03:41:00,330 - ADV - DEBUG - episode: 36499/50000, score: 0, e: 0.01, reward: 44.656, seudo-rew:0.0 max: 45.0633544921875 init-act: [29.473 45.361 29.901 45.815]
2025-07-18 03:41:08,656 - ADV - DEBUG - episode: 36599/50000, score: 0, e: 0.01, reward: 44.609, seudo-rew:0.0 max: 44.833168029785

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.0001\gpt\model-38000.keras


2025-07-18 03:43:14,922 - ADV - DEBUG - episode: 38099/50000, score: 0, e: 0.01, reward: 44.713, seudo-rew:0.0 max: 44.89225387573242 init-act: [29.488 45.414 29.91  45.819]
2025-07-18 03:43:23,391 - ADV - DEBUG - episode: 38199/50000, score: 0, e: 0.01, reward: 44.84, seudo-rew:0.0 max: 44.87062454223633 init-act: [29.559 45.453 29.918 45.848]
2025-07-18 03:43:31,656 - ADV - DEBUG - episode: 38299/50000, score: 0, e: 0.01, reward: 44.693, seudo-rew:0.0 max: 44.88024139404297 init-act: [29.538 45.435 29.973 45.829]
2025-07-18 03:43:40,081 - ADV - DEBUG - episode: 38399/50000, score: 0, e: 0.01, reward: 44.595, seudo-rew:0.0 max: 44.889896392822266 init-act: [29.534 45.486 29.976 45.82 ]
2025-07-18 03:43:48,626 - ADV - DEBUG - episode: 38499/50000, score: 0, e: 0.01, reward: 44.804, seudo-rew:0.0 max: 44.82770538330078 init-act: [29.443 45.45  29.883 45.767]
2025-07-18 03:43:57,052 - ADV - DEBUG - episode: 38599/50000, score: 0, e: 0.01, reward: 44.734, seudo-rew:0.0 max: 44.78091812133

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.0001\gpt\model-40000.keras


2025-07-18 03:46:03,331 - ADV - DEBUG - episode: 40099/50000, score: 0, e: 0.01, reward: 44.771, seudo-rew:0.0 max: 44.96340560913086 init-act: [29.426 45.68  29.787 45.899]
2025-07-18 03:46:11,871 - ADV - DEBUG - episode: 40199/50000, score: 0, e: 0.01, reward: 44.761, seudo-rew:0.0 max: 44.99332046508789 init-act: [29.461 45.762 29.815 46.001]
2025-07-18 03:46:20,646 - ADV - DEBUG - episode: 40299/50000, score: 0, e: 0.01, reward: 44.798, seudo-rew:0.0 max: 44.9846305847168 init-act: [29.344 45.672 29.699 45.898]
2025-07-18 03:46:29,519 - ADV - DEBUG - episode: 40399/50000, score: 0, e: 0.01, reward: 44.753, seudo-rew:0.0 max: 44.987979888916016 init-act: [29.302 45.698 29.639 45.927]
2025-07-18 03:46:38,129 - ADV - DEBUG - episode: 40499/50000, score: 0, e: 0.01, reward: 44.744, seudo-rew:0.0 max: 44.99045181274414 init-act: [29.408 45.718 29.592 45.918]
2025-07-18 03:46:46,846 - ADV - DEBUG - episode: 40599/50000, score: 0, e: 0.01, reward: 44.838, seudo-rew:0.0 max: 45.05148696899

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.0001\gpt\model-42000.keras


2025-07-18 03:48:55,299 - ADV - DEBUG - episode: 42099/50000, score: 0, e: 0.01, reward: 44.661, seudo-rew:0.0 max: 45.02321243286133 init-act: [29.775 45.936 29.819 46.035]
2025-07-18 03:49:03,872 - ADV - DEBUG - episode: 42199/50000, score: 0, e: 0.01, reward: 45.006, seudo-rew:0.0 max: 45.09947204589844 init-act: [29.85  45.918 30.14  45.989]
2025-07-18 03:49:12,640 - ADV - DEBUG - episode: 42299/50000, score: 0, e: 0.01, reward: 44.655, seudo-rew:0.0 max: 45.12567138671875 init-act: [30.052 46.066 30.469 46.117]
2025-07-18 03:49:21,084 - ADV - DEBUG - episode: 42399/50000, score: 0, e: 0.01, reward: 44.536, seudo-rew:0.0 max: 45.13832092285156 init-act: [30.05  46.073 30.293 46.113]
2025-07-18 03:49:29,648 - ADV - DEBUG - episode: 42499/50000, score: 0, e: 0.01, reward: 44.785, seudo-rew:0.0 max: 45.17758560180664 init-act: [29.999 46.058 30.262 46.135]
2025-07-18 03:49:38,329 - ADV - DEBUG - episode: 42599/50000, score: 0, e: 0.01, reward: 44.575, seudo-rew:0.0 max: 45.21756744384

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.0001\gpt\model-44000.keras


2025-07-18 03:51:54,276 - ADV - DEBUG - episode: 44099/50000, score: 0, e: 0.01, reward: 44.601, seudo-rew:0.0 max: 45.24140930175781 init-act: [29.81  46.176 30.01  46.229]
2025-07-18 03:52:03,880 - ADV - DEBUG - episode: 44199/50000, score: 0, e: 0.01, reward: 44.581, seudo-rew:0.0 max: 45.2513427734375 init-act: [29.796 46.183 30.028 46.196]
2025-07-18 03:52:13,588 - ADV - DEBUG - episode: 44299/50000, score: 0, e: 0.01, reward: 44.519, seudo-rew:0.0 max: 45.27912521362305 init-act: [29.799 46.234 30.047 46.242]
2025-07-18 03:52:22,196 - ADV - DEBUG - episode: 44399/50000, score: 0, e: 0.01, reward: 44.436, seudo-rew:0.0 max: 45.3031005859375 init-act: [29.81  46.198 29.947 46.216]
2025-07-18 03:52:30,664 - ADV - DEBUG - episode: 44499/50000, score: 0, e: 0.01, reward: 44.662, seudo-rew:0.0 max: 45.24769592285156 init-act: [29.801 46.194 29.92  46.149]
2025-07-18 03:52:39,324 - ADV - DEBUG - episode: 44599/50000, score: 0, e: 0.01, reward: 44.649, seudo-rew:0.0 max: 45.4317588806152

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.0001\gpt\model-46000.keras


2025-07-18 03:55:03,763 - ADV - DEBUG - episode: 46099/50000, score: 0, e: 0.01, reward: 44.443, seudo-rew:0.0 max: 45.24909210205078 init-act: [30.583 45.958 30.535 45.945]
2025-07-18 03:55:13,732 - ADV - DEBUG - episode: 46199/50000, score: 0, e: 0.01, reward: 44.937, seudo-rew:0.0 max: 45.20315933227539 init-act: [30.73  46.003 30.678 46.031]
2025-07-18 03:55:23,695 - ADV - DEBUG - episode: 46299/50000, score: 0, e: 0.01, reward: 44.662, seudo-rew:0.0 max: 45.245338439941406 init-act: [30.929 46.129 31.176 46.02 ]
2025-07-18 03:55:33,459 - ADV - DEBUG - episode: 46399/50000, score: 0, e: 0.01, reward: 44.708, seudo-rew:0.0 max: 45.234130859375 init-act: [31.031 45.568 30.952 45.836]
2025-07-18 03:55:43,290 - ADV - DEBUG - episode: 46499/50000, score: 0, e: 0.01, reward: 44.409, seudo-rew:0.0 max: 45.267642974853516 init-act: [31.552 45.875 31.897 45.96 ]
2025-07-18 03:55:53,250 - ADV - DEBUG - episode: 46599/50000, score: 0, e: 0.01, reward: 44.51, seudo-rew:0.0 max: 45.381313323974

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr0.0001\gpt\model-48000.keras


2025-07-18 03:58:09,563 - ADV - DEBUG - episode: 48099/50000, score: 0, e: 0.01, reward: 44.767, seudo-rew:0.0 max: 45.2702522277832 init-act: [33.048 45.983 32.674 46.036]
2025-07-18 03:58:18,080 - ADV - DEBUG - episode: 48199/50000, score: 0, e: 0.01, reward: 44.935, seudo-rew:0.0 max: 45.19700241088867 init-act: [32.881 45.678 32.523 45.872]
2025-07-18 03:58:26,632 - ADV - DEBUG - episode: 48299/50000, score: 0, e: 0.01, reward: 44.713, seudo-rew:0.0 max: 44.96404266357422 init-act: [32.896 45.722 32.623 45.91 ]
2025-07-18 03:58:35,234 - ADV - DEBUG - episode: 48399/50000, score: 0, e: 0.01, reward: 44.788, seudo-rew:0.0 max: 44.9847297668457 init-act: [32.875 45.757 32.631 45.919]
2025-07-18 03:58:43,621 - ADV - DEBUG - episode: 48499/50000, score: 0, e: 0.01, reward: 44.362, seudo-rew:0.0 max: 45.001625061035156 init-act: [32.94  45.733 32.66  45.938]
2025-07-18 03:58:52,144 - ADV - DEBUG - episode: 48599/50000, score: 0, e: 0.01, reward: 44.363, seudo-rew:0.0 max: 44.973854064941

400000 0.0001 0.1
📦 Training: buf=400000, lr=0.0001, eps=0.1 → trained_model\test_adv_RL_400000eps0.1lr0.0001\gpt


2025-07-18 04:00:57,326 - ADV - DEBUG - Trained model saved to: trained_model\test_adv_RL_400000eps0.1lr0.0001\gpt\model-0.keras


dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.0001\gpt\model-0.keras


2025-07-18 04:01:03,208 - ADV - DEBUG - episode: 99/50000, score: 0, e: 0.1, reward: 25.805, seudo-rew:0.0 max: 1.8198633193969727 init-act: [ 0.255  0.759 -0.2    0.071]
2025-07-18 04:01:08,939 - ADV - DEBUG - episode: 199/50000, score: 0, e: 0.1, reward: 25.478, seudo-rew:0.0 max: 2.0666332244873047 init-act: [0.585 1.506 0.039 0.368]
2025-07-18 04:01:14,528 - ADV - DEBUG - episode: 299/50000, score: 0, e: 0.1, reward: 27.248, seudo-rew:0.0 max: 2.405041456222534 init-act: [0.904 2.385 0.313 0.737]
2025-07-18 04:01:20,224 - ADV - DEBUG - episode: 399/50000, score: 0, e: 0.1, reward: 26.937, seudo-rew:0.0 max: 2.8667373657226562 init-act: [1.301 3.45  0.736 1.286]
2025-07-18 04:01:30,145 - ADV - DEBUG - episode: 499/50000, score: 0, e: 0.1, reward: 28.174, seudo-rew:0.0 max: 3.553483247756958 init-act: [1.729 4.302 1.217 2.015]
2025-07-18 04:01:40,081 - ADV - DEBUG - episode: 599/50000, score: 0, e: 0.1, reward: 28.02, seudo-rew:0.0 max: 4.255444526672363 init-act: [2.116 5.109 1.754 

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.0001\gpt\model-2000.keras


2025-07-18 04:03:54,521 - ADV - DEBUG - episode: 2099/50000, score: 0, e: 0.1, reward: 41.539, seudo-rew:0.0 max: 18.8820743560791 init-act: [12.098 19.673 11.618 16.137]
2025-07-18 04:04:03,550 - ADV - DEBUG - episode: 2199/50000, score: 0, e: 0.1, reward: 41.803, seudo-rew:0.0 max: 19.888906478881836 init-act: [12.847 20.708 12.405 17.39 ]
2025-07-18 04:04:12,646 - ADV - DEBUG - episode: 2299/50000, score: 0, e: 0.1, reward: 41.786, seudo-rew:0.0 max: 21.03314208984375 init-act: [13.467 21.749 13.079 18.402]
2025-07-18 04:04:21,048 - ADV - DEBUG - episode: 2399/50000, score: 0, e: 0.1, reward: 40.497, seudo-rew:0.0 max: 22.087051391601562 init-act: [14.231 22.791 13.745 19.316]
2025-07-18 04:04:29,481 - ADV - DEBUG - episode: 2499/50000, score: 0, e: 0.1, reward: 41.233, seudo-rew:0.0 max: 22.90847396850586 init-act: [14.822 23.652 14.527 20.309]
2025-07-18 04:04:38,143 - ADV - DEBUG - episode: 2599/50000, score: 0, e: 0.1, reward: 41.462, seudo-rew:0.0 max: 23.961149215698242 init-a

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.0001\gpt\model-4000.keras


2025-07-18 04:06:49,879 - ADV - DEBUG - episode: 4099/50000, score: 0, e: 0.1, reward: 41.856, seudo-rew:0.0 max: 36.962364196777344 init-act: [27.041 37.655 27.051 35.945]
2025-07-18 04:06:58,259 - ADV - DEBUG - episode: 4199/50000, score: 0, e: 0.1, reward: 41.786, seudo-rew:0.0 max: 37.70418167114258 init-act: [27.567 38.486 27.877 36.885]
2025-07-18 04:07:06,908 - ADV - DEBUG - episode: 4299/50000, score: 0, e: 0.1, reward: 41.782, seudo-rew:0.0 max: 38.43526840209961 init-act: [28.051 39.023 28.275 37.603]
2025-07-18 04:07:15,491 - ADV - DEBUG - episode: 4399/50000, score: 0, e: 0.1, reward: 41.919, seudo-rew:0.0 max: 39.129615783691406 init-act: [28.824 39.854 28.822 38.514]
2025-07-18 04:07:24,019 - ADV - DEBUG - episode: 4499/50000, score: 0, e: 0.1, reward: 40.947, seudo-rew:0.0 max: 39.84996795654297 init-act: [29.399 40.52  29.462 39.293]
2025-07-18 04:07:32,534 - ADV - DEBUG - episode: 4599/50000, score: 0, e: 0.1, reward: 41.556, seudo-rew:0.0 max: 40.40721130371094 init-a

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.0001\gpt\model-6000.keras


2025-07-18 04:09:41,406 - ADV - DEBUG - episode: 6099/50000, score: 0, e: 0.1, reward: 41.399, seudo-rew:0.0 max: 45.45958709716797 init-act: [33.977 45.966 33.604 45.725]
2025-07-18 04:09:49,891 - ADV - DEBUG - episode: 6199/50000, score: 0, e: 0.1, reward: 41.228, seudo-rew:0.0 max: 45.61781692504883 init-act: [34.062 46.249 33.621 46.008]
2025-07-18 04:09:58,398 - ADV - DEBUG - episode: 6299/50000, score: 0, e: 0.1, reward: 41.985, seudo-rew:0.0 max: 45.72351837158203 init-act: [33.845 46.059 33.574 45.989]
2025-07-18 04:10:06,935 - ADV - DEBUG - episode: 6399/50000, score: 0, e: 0.1, reward: 41.883, seudo-rew:0.0 max: 45.862945556640625 init-act: [33.803 46.409 33.519 46.275]
2025-07-18 04:10:15,779 - ADV - DEBUG - episode: 6499/50000, score: 0, e: 0.1, reward: 42.177, seudo-rew:0.0 max: 46.047977447509766 init-act: [33.822 46.551 33.572 46.5  ]
2025-07-18 04:10:24,865 - ADV - DEBUG - episode: 6599/50000, score: 0, e: 0.1, reward: 42.251, seudo-rew:0.0 max: 46.24985122680664 init-a

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.0001\gpt\model-8000.keras


2025-07-18 04:12:37,416 - ADV - DEBUG - episode: 8099/50000, score: 0, e: 0.1, reward: 42.512, seudo-rew:0.0 max: 46.86001205444336 init-act: [32.994 47.386 33.447 47.639]
2025-07-18 04:12:45,793 - ADV - DEBUG - episode: 8199/50000, score: 0, e: 0.1, reward: 42.245, seudo-rew:0.0 max: 46.78470230102539 init-act: [32.899 47.261 33.531 47.734]
2025-07-18 04:12:54,346 - ADV - DEBUG - episode: 8299/50000, score: 0, e: 0.1, reward: 42.767, seudo-rew:0.0 max: 46.763851165771484 init-act: [32.743 47.299 33.258 47.642]
2025-07-18 04:13:02,709 - ADV - DEBUG - episode: 8399/50000, score: 0, e: 0.1, reward: 42.605, seudo-rew:0.0 max: 46.722110748291016 init-act: [32.498 47.314 32.994 47.677]
2025-07-18 04:13:11,041 - ADV - DEBUG - episode: 8499/50000, score: 0, e: 0.1, reward: 41.72, seudo-rew:0.0 max: 46.799320220947266 init-act: [32.437 47.192 32.973 47.735]
2025-07-18 04:13:19,309 - ADV - DEBUG - episode: 8599/50000, score: 0, e: 0.1, reward: 42.653, seudo-rew:0.0 max: 46.78968811035156 init-a

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.0001\gpt\model-10000.keras


2025-07-18 04:15:24,662 - ADV - DEBUG - episode: 10099/50000, score: 0, e: 0.1, reward: 42.637, seudo-rew:0.0 max: 47.10950469970703 init-act: [30.982 47.754 31.072 48.052]
2025-07-18 04:15:33,031 - ADV - DEBUG - episode: 10199/50000, score: 0, e: 0.1, reward: 42.881, seudo-rew:0.0 max: 47.23802185058594 init-act: [30.885 47.722 31.073 48.036]
2025-07-18 04:15:41,301 - ADV - DEBUG - episode: 10299/50000, score: 0, e: 0.1, reward: 41.19, seudo-rew:0.0 max: 47.41859436035156 init-act: [31.    47.659 31.113 48.148]
2025-07-18 04:15:49,802 - ADV - DEBUG - episode: 10399/50000, score: 0, e: 0.1, reward: 42.646, seudo-rew:0.0 max: 47.212284088134766 init-act: [31.045 47.883 31.185 48.14 ]
2025-07-18 04:15:58,379 - ADV - DEBUG - episode: 10499/50000, score: 0, e: 0.1, reward: 42.927, seudo-rew:0.0 max: 47.248050689697266 init-act: [30.998 47.875 31.018 48.21 ]
2025-07-18 04:16:06,771 - ADV - DEBUG - episode: 10599/50000, score: 0, e: 0.1, reward: 42.75, seudo-rew:0.0 max: 47.43313980102539 in

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.0001\gpt\model-12000.keras


2025-07-18 04:18:12,338 - ADV - DEBUG - episode: 12099/50000, score: 0, e: 0.1, reward: 42.402, seudo-rew:0.0 max: 47.077362060546875 init-act: [29.908 47.439 30.164 48.005]
2025-07-18 04:18:20,656 - ADV - DEBUG - episode: 12199/50000, score: 0, e: 0.1, reward: 42.317, seudo-rew:0.0 max: 46.991451263427734 init-act: [29.728 47.086 29.939 47.933]
2025-07-18 04:18:28,988 - ADV - DEBUG - episode: 12299/50000, score: 0, e: 0.1, reward: 41.306, seudo-rew:0.0 max: 46.9644775390625 init-act: [29.878 47.243 30.315 47.922]
2025-07-18 04:18:37,309 - ADV - DEBUG - episode: 12399/50000, score: 0, e: 0.1, reward: 42.761, seudo-rew:0.0 max: 46.938663482666016 init-act: [29.797 47.136 30.299 47.851]
2025-07-18 04:18:45,650 - ADV - DEBUG - episode: 12499/50000, score: 0, e: 0.1, reward: 42.658, seudo-rew:0.0 max: 46.835514068603516 init-act: [29.826 47.044 30.105 47.711]
2025-07-18 04:18:53,971 - ADV - DEBUG - episode: 12599/50000, score: 0, e: 0.1, reward: 42.293, seudo-rew:0.0 max: 46.75090026855469

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.0001\gpt\model-14000.keras


2025-07-18 04:20:59,477 - ADV - DEBUG - episode: 14099/50000, score: 0, e: 0.1, reward: 42.629, seudo-rew:0.0 max: 46.04426956176758 init-act: [29.729 46.66  30.227 46.947]
2025-07-18 04:21:07,851 - ADV - DEBUG - episode: 14199/50000, score: 0, e: 0.1, reward: 42.191, seudo-rew:0.0 max: 46.006591796875 init-act: [29.699 46.522 30.01  46.968]
2025-07-18 04:21:16,193 - ADV - DEBUG - episode: 14299/50000, score: 0, e: 0.1, reward: 41.245, seudo-rew:0.0 max: 45.94343185424805 init-act: [29.614 46.659 29.877 46.938]
2025-07-18 04:21:24,507 - ADV - DEBUG - episode: 14399/50000, score: 0, e: 0.1, reward: 42.432, seudo-rew:0.0 max: 45.994773864746094 init-act: [29.676 46.695 30.275 46.987]
2025-07-18 04:21:32,897 - ADV - DEBUG - episode: 14499/50000, score: 0, e: 0.1, reward: 42.535, seudo-rew:0.0 max: 45.877079010009766 init-act: [29.703 46.691 30.222 46.764]
2025-07-18 04:21:41,226 - ADV - DEBUG - episode: 14599/50000, score: 0, e: 0.1, reward: 42.53, seudo-rew:0.0 max: 45.79458999633789 ini

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.0001\gpt\model-16000.keras


2025-07-18 04:23:46,985 - ADV - DEBUG - episode: 16099/50000, score: 0, e: 0.1, reward: 42.885, seudo-rew:0.0 max: 45.33090591430664 init-act: [29.766 45.966 29.844 46.268]
2025-07-18 04:23:55,287 - ADV - DEBUG - episode: 16199/50000, score: 0, e: 0.1, reward: 42.639, seudo-rew:0.0 max: 45.29478073120117 init-act: [29.891 46.047 30.094 46.3  ]
2025-07-18 04:24:03,624 - ADV - DEBUG - episode: 16299/50000, score: 0, e: 0.1, reward: 42.883, seudo-rew:0.0 max: 45.31085205078125 init-act: [29.891 46.072 30.    46.277]
2025-07-18 04:24:11,971 - ADV - DEBUG - episode: 16399/50000, score: 0, e: 0.1, reward: 42.912, seudo-rew:0.0 max: 45.30618667602539 init-act: [29.871 46.116 30.059 46.22 ]
2025-07-18 04:24:20,353 - ADV - DEBUG - episode: 16499/50000, score: 0, e: 0.1, reward: 42.892, seudo-rew:0.0 max: 45.44168472290039 init-act: [29.718 46.044 29.904 46.238]
2025-07-18 04:24:28,577 - ADV - DEBUG - episode: 16599/50000, score: 0, e: 0.1, reward: 42.945, seudo-rew:0.0 max: 45.33646011352539 in

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.0001\gpt\model-18000.keras


2025-07-18 04:26:34,278 - ADV - DEBUG - episode: 18099/50000, score: 0, e: 0.1, reward: 42.951, seudo-rew:0.0 max: 45.60334777832031 init-act: [29.76  46.324 29.739 46.523]
2025-07-18 04:26:42,629 - ADV - DEBUG - episode: 18199/50000, score: 0, e: 0.1, reward: 42.495, seudo-rew:0.0 max: 45.766326904296875 init-act: [29.717 46.207 29.785 46.484]
2025-07-18 04:26:51,436 - ADV - DEBUG - episode: 18299/50000, score: 0, e: 0.1, reward: 42.756, seudo-rew:0.0 max: 45.7515983581543 init-act: [29.836 46.197 29.869 46.533]
2025-07-18 04:26:59,697 - ADV - DEBUG - episode: 18399/50000, score: 0, e: 0.1, reward: 42.53, seudo-rew:0.0 max: 45.640140533447266 init-act: [29.865 46.195 30.123 46.525]
2025-07-18 04:27:08,014 - ADV - DEBUG - episode: 18499/50000, score: 0, e: 0.1, reward: 42.664, seudo-rew:0.0 max: 45.648414611816406 init-act: [29.76  46.076 29.817 46.594]
2025-07-18 04:27:16,400 - ADV - DEBUG - episode: 18599/50000, score: 0, e: 0.1, reward: 42.882, seudo-rew:0.0 max: 45.794334411621094 

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.0001\gpt\model-20000.keras


2025-07-18 04:29:21,623 - ADV - DEBUG - episode: 20099/50000, score: 0, e: 0.1, reward: 42.626, seudo-rew:0.0 max: 45.660667419433594 init-act: [30.173 45.902 30.377 46.51 ]
2025-07-18 04:29:29,900 - ADV - DEBUG - episode: 20199/50000, score: 0, e: 0.1, reward: 42.701, seudo-rew:0.0 max: 45.673152923583984 init-act: [30.03  45.996 30.256 46.647]
2025-07-18 04:29:38,327 - ADV - DEBUG - episode: 20299/50000, score: 0, e: 0.1, reward: 42.716, seudo-rew:0.0 max: 45.66986846923828 init-act: [29.869 46.051 30.142 46.603]
2025-07-18 04:29:46,677 - ADV - DEBUG - episode: 20399/50000, score: 0, e: 0.1, reward: 42.621, seudo-rew:0.0 max: 45.68376541137695 init-act: [29.919 46.079 30.152 46.559]
2025-07-18 04:29:55,028 - ADV - DEBUG - episode: 20499/50000, score: 0, e: 0.1, reward: 42.322, seudo-rew:0.0 max: 45.71131896972656 init-act: [30.062 46.073 30.433 46.628]
2025-07-18 04:30:03,383 - ADV - DEBUG - episode: 20599/50000, score: 0, e: 0.1, reward: 42.771, seudo-rew:0.0 max: 45.89506149291992 

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.0001\gpt\model-22000.keras


2025-07-18 04:32:08,939 - ADV - DEBUG - episode: 22099/50000, score: 0, e: 0.1, reward: 42.933, seudo-rew:0.0 max: 45.54232406616211 init-act: [29.586 45.984 29.82  46.492]
2025-07-18 04:32:17,260 - ADV - DEBUG - episode: 22199/50000, score: 0, e: 0.1, reward: 42.825, seudo-rew:0.0 max: 45.5356330871582 init-act: [29.651 46.024 30.131 46.461]
2025-07-18 04:32:25,524 - ADV - DEBUG - episode: 22299/50000, score: 0, e: 0.1, reward: 42.963, seudo-rew:0.0 max: 45.54672622680664 init-act: [29.777 46.133 30.135 46.521]
2025-07-18 04:32:33,869 - ADV - DEBUG - episode: 22399/50000, score: 0, e: 0.1, reward: 42.65, seudo-rew:0.0 max: 45.54535675048828 init-act: [29.695 46.222 29.971 46.527]
2025-07-18 04:32:42,159 - ADV - DEBUG - episode: 22499/50000, score: 0, e: 0.1, reward: 42.497, seudo-rew:0.0 max: 45.55328369140625 init-act: [29.771 46.106 30.059 46.501]
2025-07-18 04:32:50,563 - ADV - DEBUG - episode: 22599/50000, score: 0, e: 0.1, reward: 42.693, seudo-rew:0.0 max: 45.49931335449219 init

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.0001\gpt\model-24000.keras


2025-07-18 04:34:55,946 - ADV - DEBUG - episode: 24099/50000, score: 0, e: 0.1, reward: 42.457, seudo-rew:0.0 max: 45.62374496459961 init-act: [29.953 46.165 30.481 46.562]
2025-07-18 04:35:04,240 - ADV - DEBUG - episode: 24199/50000, score: 0, e: 0.1, reward: 42.728, seudo-rew:0.0 max: 45.48794174194336 init-act: [29.978 46.098 30.309 46.426]
2025-07-18 04:35:12,563 - ADV - DEBUG - episode: 24299/50000, score: 0, e: 0.1, reward: 42.245, seudo-rew:0.0 max: 45.549198150634766 init-act: [30.133 46.156 30.289 46.602]
2025-07-18 04:35:20,917 - ADV - DEBUG - episode: 24399/50000, score: 0, e: 0.1, reward: 42.053, seudo-rew:0.0 max: 45.52105712890625 init-act: [30.022 45.917 30.185 46.468]
2025-07-18 04:35:29,234 - ADV - DEBUG - episode: 24499/50000, score: 0, e: 0.1, reward: 42.414, seudo-rew:0.0 max: 45.45720291137695 init-act: [30.059 46.009 30.201 46.321]
2025-07-18 04:35:37,549 - ADV - DEBUG - episode: 24599/50000, score: 0, e: 0.1, reward: 42.443, seudo-rew:0.0 max: 45.401878356933594 

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.0001\gpt\model-26000.keras


2025-07-18 04:37:43,026 - ADV - DEBUG - episode: 26099/50000, score: 0, e: 0.1, reward: 42.382, seudo-rew:0.0 max: 45.72288513183594 init-act: [30.397 46.004 30.361 46.422]
2025-07-18 04:37:51,443 - ADV - DEBUG - episode: 26199/50000, score: 0, e: 0.1, reward: 42.335, seudo-rew:0.0 max: 45.448360443115234 init-act: [30.372 46.021 30.227 46.398]
2025-07-18 04:37:59,833 - ADV - DEBUG - episode: 26299/50000, score: 0, e: 0.1, reward: 42.068, seudo-rew:0.0 max: 45.52053451538086 init-act: [30.226 46.065 30.156 46.43 ]
2025-07-18 04:38:08,095 - ADV - DEBUG - episode: 26399/50000, score: 0, e: 0.1, reward: 42.289, seudo-rew:0.0 max: 45.472007751464844 init-act: [30.029 45.918 30.025 46.357]
2025-07-18 04:38:16,463 - ADV - DEBUG - episode: 26499/50000, score: 0, e: 0.1, reward: 42.81, seudo-rew:0.0 max: 45.4207649230957 init-act: [30.074 46.028 29.998 46.355]
2025-07-18 04:38:24,848 - ADV - DEBUG - episode: 26599/50000, score: 0, e: 0.1, reward: 42.53, seudo-rew:0.0 max: 45.69391632080078 ini

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.0001\gpt\model-28000.keras


2025-07-18 04:40:30,383 - ADV - DEBUG - episode: 28099/50000, score: 0, e: 0.1, reward: 42.199, seudo-rew:0.0 max: 45.62773132324219 init-act: [29.802 46.184 29.76  46.38 ]
2025-07-18 04:40:38,810 - ADV - DEBUG - episode: 28199/50000, score: 0, e: 0.1, reward: 41.748, seudo-rew:0.0 max: 45.61171340942383 init-act: [29.786 45.997 29.75  46.226]
2025-07-18 04:40:47,359 - ADV - DEBUG - episode: 28299/50000, score: 0, e: 0.1, reward: 41.982, seudo-rew:0.0 max: 45.614959716796875 init-act: [30.131 46.269 30.013 46.48 ]
2025-07-18 04:40:55,829 - ADV - DEBUG - episode: 28399/50000, score: 0, e: 0.1, reward: 42.334, seudo-rew:0.0 max: 45.66560363769531 init-act: [30.32  46.308 30.102 46.354]
2025-07-18 04:41:04,309 - ADV - DEBUG - episode: 28499/50000, score: 0, e: 0.1, reward: 42.266, seudo-rew:0.0 max: 45.499385833740234 init-act: [30.373 46.324 30.119 46.439]
2025-07-18 04:41:12,583 - ADV - DEBUG - episode: 28599/50000, score: 0, e: 0.1, reward: 41.714, seudo-rew:0.0 max: 45.4299430847168 i

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.0001\gpt\model-30000.keras


2025-07-18 04:43:18,702 - ADV - DEBUG - episode: 30099/50000, score: 0, e: 0.1, reward: 41.692, seudo-rew:0.0 max: 45.512699127197266 init-act: [30.068 46.375 29.947 46.483]
2025-07-18 04:43:27,110 - ADV - DEBUG - episode: 30199/50000, score: 0, e: 0.1, reward: 42.44, seudo-rew:0.0 max: 45.42057418823242 init-act: [29.961 46.271 29.834 46.453]
2025-07-18 04:43:35,432 - ADV - DEBUG - episode: 30299/50000, score: 0, e: 0.1, reward: 42.299, seudo-rew:0.0 max: 45.65862274169922 init-act: [29.963 46.321 29.81  46.408]
2025-07-18 04:43:43,792 - ADV - DEBUG - episode: 30399/50000, score: 0, e: 0.1, reward: 42.226, seudo-rew:0.0 max: 45.56807327270508 init-act: [29.99  46.413 29.856 46.437]
2025-07-18 04:43:52,174 - ADV - DEBUG - episode: 30499/50000, score: 0, e: 0.1, reward: 42.119, seudo-rew:0.0 max: 45.589378356933594 init-act: [29.974 46.279 30.028 46.529]
2025-07-18 04:44:00,579 - ADV - DEBUG - episode: 30599/50000, score: 0, e: 0.1, reward: 42.062, seudo-rew:0.0 max: 45.78129196166992 i

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.0001\gpt\model-32000.keras


2025-07-18 04:46:06,502 - ADV - DEBUG - episode: 32099/50000, score: 0, e: 0.1, reward: 42.037, seudo-rew:0.0 max: 45.76587677001953 init-act: [29.445 46.583 29.541 46.729]
2025-07-18 04:46:14,870 - ADV - DEBUG - episode: 32199/50000, score: 0, e: 0.1, reward: 41.846, seudo-rew:0.0 max: 45.739105224609375 init-act: [29.399 46.578 29.436 46.621]
2025-07-18 04:46:23,320 - ADV - DEBUG - episode: 32299/50000, score: 0, e: 0.1, reward: 42.255, seudo-rew:0.0 max: 45.77818298339844 init-act: [29.562 46.731 29.519 46.8  ]
2025-07-18 04:46:31,764 - ADV - DEBUG - episode: 32399/50000, score: 0, e: 0.1, reward: 42.301, seudo-rew:0.0 max: 45.759525299072266 init-act: [29.714 46.742 29.702 46.729]
2025-07-18 04:46:40,200 - ADV - DEBUG - episode: 32499/50000, score: 0, e: 0.1, reward: 42.143, seudo-rew:0.0 max: 45.88484191894531 init-act: [29.567 46.57  29.629 46.625]
2025-07-18 04:46:48,653 - ADV - DEBUG - episode: 32599/50000, score: 0, e: 0.1, reward: 42.026, seudo-rew:0.0 max: 45.84844207763672 

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.0001\gpt\model-34000.keras


2025-07-18 04:48:54,952 - ADV - DEBUG - episode: 34099/50000, score: 0, e: 0.1, reward: 41.768, seudo-rew:0.0 max: 45.532798767089844 init-act: [29.941 46.182 30.125 46.288]
2025-07-18 04:49:03,410 - ADV - DEBUG - episode: 34199/50000, score: 0, e: 0.1, reward: 42.867, seudo-rew:0.0 max: 45.5213508605957 init-act: [29.974 46.009 30.224 46.234]
2025-07-18 04:49:11,799 - ADV - DEBUG - episode: 34299/50000, score: 0, e: 0.1, reward: 42.316, seudo-rew:0.0 max: 45.4576530456543 init-act: [30.066 45.858 30.208 46.228]
2025-07-18 04:49:20,525 - ADV - DEBUG - episode: 34399/50000, score: 0, e: 0.1, reward: 42.36, seudo-rew:0.0 max: 45.589874267578125 init-act: [30.316 46.226 30.495 46.458]
2025-07-18 04:49:29,031 - ADV - DEBUG - episode: 34499/50000, score: 0, e: 0.1, reward: 42.273, seudo-rew:0.0 max: 45.74944305419922 init-act: [30.09  46.301 30.273 46.476]
2025-07-18 04:49:37,456 - ADV - DEBUG - episode: 34599/50000, score: 0, e: 0.1, reward: 41.94, seudo-rew:0.0 max: 45.51956558227539 init

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.0001\gpt\model-36000.keras


2025-07-18 04:51:43,842 - ADV - DEBUG - episode: 36099/50000, score: 0, e: 0.1, reward: 41.869, seudo-rew:0.0 max: 45.88623809814453 init-act: [29.752 46.481 29.762 46.772]
2025-07-18 04:51:52,182 - ADV - DEBUG - episode: 36199/50000, score: 0, e: 0.1, reward: 42.032, seudo-rew:0.0 max: 46.048072814941406 init-act: [29.627 46.354 29.689 46.805]
2025-07-18 04:52:00,546 - ADV - DEBUG - episode: 36299/50000, score: 0, e: 0.1, reward: 41.826, seudo-rew:0.0 max: 46.03345489501953 init-act: [29.91  46.618 29.962 46.699]
2025-07-18 04:52:08,902 - ADV - DEBUG - episode: 36399/50000, score: 0, e: 0.1, reward: 42.126, seudo-rew:0.0 max: 46.06538772583008 init-act: [29.977 46.406 30.072 46.773]
2025-07-18 04:52:17,316 - ADV - DEBUG - episode: 36499/50000, score: 0, e: 0.1, reward: 41.982, seudo-rew:0.0 max: 45.75840377807617 init-act: [30.034 46.379 29.945 46.669]
2025-07-18 04:52:25,679 - ADV - DEBUG - episode: 36599/50000, score: 0, e: 0.1, reward: 42.482, seudo-rew:0.0 max: 45.794891357421875 

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.0001\gpt\model-38000.keras


2025-07-18 04:54:31,134 - ADV - DEBUG - episode: 38099/50000, score: 0, e: 0.1, reward: 42.273, seudo-rew:0.0 max: 46.101829528808594 init-act: [29.819 46.69  29.687 46.91 ]
2025-07-18 04:54:39,490 - ADV - DEBUG - episode: 38199/50000, score: 0, e: 0.1, reward: 42.055, seudo-rew:0.0 max: 46.05241394042969 init-act: [30.07  46.807 29.95  46.817]
2025-07-18 04:54:47,911 - ADV - DEBUG - episode: 38299/50000, score: 0, e: 0.1, reward: 42.231, seudo-rew:0.0 max: 45.806949615478516 init-act: [30.183 46.863 30.321 46.75 ]
2025-07-18 04:54:56,280 - ADV - DEBUG - episode: 38399/50000, score: 0, e: 0.1, reward: 41.939, seudo-rew:0.0 max: 46.02395248413086 init-act: [29.931 46.64  29.824 46.809]
2025-07-18 04:55:04,619 - ADV - DEBUG - episode: 38499/50000, score: 0, e: 0.1, reward: 42.583, seudo-rew:0.0 max: 45.74456024169922 init-act: [29.852 46.515 29.957 46.614]
2025-07-18 04:55:13,136 - ADV - DEBUG - episode: 38599/50000, score: 0, e: 0.1, reward: 42.359, seudo-rew:0.0 max: 46.03127670288086 

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.0001\gpt\model-40000.keras


2025-07-18 04:57:20,943 - ADV - DEBUG - episode: 40099/50000, score: 0, e: 0.1, reward: 42.326, seudo-rew:0.0 max: 45.93138885498047 init-act: [29.908 46.654 30.174 46.777]
2025-07-18 04:57:29,509 - ADV - DEBUG - episode: 40199/50000, score: 0, e: 0.1, reward: 42.227, seudo-rew:0.0 max: 45.97659683227539 init-act: [30.028 46.452 30.209 46.636]
2025-07-18 04:57:38,074 - ADV - DEBUG - episode: 40299/50000, score: 0, e: 0.1, reward: 42.013, seudo-rew:0.0 max: 46.03966522216797 init-act: [30.453 46.632 30.579 46.781]
2025-07-18 04:57:46,400 - ADV - DEBUG - episode: 40399/50000, score: 0, e: 0.1, reward: 41.995, seudo-rew:0.0 max: 46.084171295166016 init-act: [30.838 46.703 30.907 46.805]
2025-07-18 04:57:54,948 - ADV - DEBUG - episode: 40499/50000, score: 0, e: 0.1, reward: 42.298, seudo-rew:0.0 max: 46.010562896728516 init-act: [30.771 46.766 31.076 46.918]
2025-07-18 04:58:03,506 - ADV - DEBUG - episode: 40599/50000, score: 0, e: 0.1, reward: 42.332, seudo-rew:0.0 max: 46.027099609375 in

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.0001\gpt\model-42000.keras


2025-07-18 05:00:10,162 - ADV - DEBUG - episode: 42099/50000, score: 0, e: 0.1, reward: 42.089, seudo-rew:0.0 max: 45.95826721191406 init-act: [29.998 46.777 29.946 46.907]
2025-07-18 05:00:18,402 - ADV - DEBUG - episode: 42199/50000, score: 0, e: 0.1, reward: 42.436, seudo-rew:0.0 max: 45.85892105102539 init-act: [29.977 46.707 29.818 46.774]
2025-07-18 05:00:26,753 - ADV - DEBUG - episode: 42299/50000, score: 0, e: 0.1, reward: 42.327, seudo-rew:0.0 max: 45.86416244506836 init-act: [29.858 46.578 29.623 46.719]
2025-07-18 05:00:35,120 - ADV - DEBUG - episode: 42399/50000, score: 0, e: 0.1, reward: 42.205, seudo-rew:0.0 max: 46.10944747924805 init-act: [29.819 46.612 29.591 46.927]
2025-07-18 05:00:43,491 - ADV - DEBUG - episode: 42499/50000, score: 0, e: 0.1, reward: 42.269, seudo-rew:0.0 max: 45.853363037109375 init-act: [30.086 46.61  29.893 46.851]
2025-07-18 05:00:52,011 - ADV - DEBUG - episode: 42599/50000, score: 0, e: 0.1, reward: 42.432, seudo-rew:0.0 max: 46.11203384399414 i

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.0001\gpt\model-44000.keras


2025-07-18 05:02:57,521 - ADV - DEBUG - episode: 44099/50000, score: 0, e: 0.1, reward: 41.982, seudo-rew:0.0 max: 45.70399475097656 init-act: [30.318 46.243 30.211 46.47 ]
2025-07-18 05:03:05,971 - ADV - DEBUG - episode: 44199/50000, score: 0, e: 0.1, reward: 40.937, seudo-rew:0.0 max: 45.7889404296875 init-act: [30.407 46.336 30.304 46.544]
2025-07-18 05:03:14,377 - ADV - DEBUG - episode: 44299/50000, score: 0, e: 0.1, reward: 42.59, seudo-rew:0.0 max: 45.65021514892578 init-act: [30.27  46.379 30.225 46.523]
2025-07-18 05:03:22,654 - ADV - DEBUG - episode: 44399/50000, score: 0, e: 0.1, reward: 42.401, seudo-rew:0.0 max: 45.77860641479492 init-act: [30.113 46.009 30.065 46.429]
2025-07-18 05:03:31,599 - ADV - DEBUG - episode: 44499/50000, score: 0, e: 0.1, reward: 42.807, seudo-rew:0.0 max: 45.789791107177734 init-act: [30.261 46.024 30.217 46.472]
2025-07-18 05:03:39,896 - ADV - DEBUG - episode: 44599/50000, score: 0, e: 0.1, reward: 42.784, seudo-rew:0.0 max: 45.55672836303711 ini

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.0001\gpt\model-46000.keras


2025-07-18 05:05:45,641 - ADV - DEBUG - episode: 46099/50000, score: 0, e: 0.1, reward: 42.49, seudo-rew:0.0 max: 45.987823486328125 init-act: [30.803 46.629 30.742 46.843]
2025-07-18 05:05:54,008 - ADV - DEBUG - episode: 46199/50000, score: 0, e: 0.1, reward: 42.726, seudo-rew:0.0 max: 45.91891860961914 init-act: [30.68  46.671 30.558 46.88 ]
2025-07-18 05:06:02,429 - ADV - DEBUG - episode: 46299/50000, score: 0, e: 0.1, reward: 42.446, seudo-rew:0.0 max: 46.068634033203125 init-act: [30.791 46.811 30.635 46.864]
2025-07-18 05:06:11,650 - ADV - DEBUG - episode: 46399/50000, score: 0, e: 0.1, reward: 42.518, seudo-rew:0.0 max: 46.017189025878906 init-act: [30.734 46.931 30.837 46.972]
2025-07-18 05:06:20,122 - ADV - DEBUG - episode: 46499/50000, score: 0, e: 0.1, reward: 41.895, seudo-rew:0.0 max: 46.14873123168945 init-act: [30.725 47.033 30.61  47.044]
2025-07-18 05:06:28,446 - ADV - DEBUG - episode: 46599/50000, score: 0, e: 0.1, reward: 42.593, seudo-rew:0.0 max: 45.99590301513672 

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr0.0001\gpt\model-48000.keras


2025-07-18 05:08:34,491 - ADV - DEBUG - episode: 48099/50000, score: 0, e: 0.1, reward: 42.598, seudo-rew:0.0 max: 46.03327178955078 init-act: [30.676 46.82  30.441 46.813]
2025-07-18 05:08:42,880 - ADV - DEBUG - episode: 48199/50000, score: 0, e: 0.1, reward: 42.635, seudo-rew:0.0 max: 45.82112121582031 init-act: [30.799 46.594 30.527 46.626]
2025-07-18 05:08:51,300 - ADV - DEBUG - episode: 48299/50000, score: 0, e: 0.1, reward: 42.378, seudo-rew:0.0 max: 45.83122253417969 init-act: [30.799 46.495 30.574 46.59 ]
2025-07-18 05:08:59,600 - ADV - DEBUG - episode: 48399/50000, score: 0, e: 0.1, reward: 42.239, seudo-rew:0.0 max: 45.80198287963867 init-act: [30.781 46.471 30.426 46.536]
2025-07-18 05:09:07,942 - ADV - DEBUG - episode: 48499/50000, score: 0, e: 0.1, reward: 42.136, seudo-rew:0.0 max: 45.775901794433594 init-act: [31.139 46.702 30.993 46.615]
2025-07-18 05:09:16,319 - ADV - DEBUG - episode: 48599/50000, score: 0, e: 0.1, reward: 42.39, seudo-rew:0.0 max: 45.85837173461914 in

400000 0.0001 0.2
📦 Training: buf=400000, lr=0.0001, eps=0.2 → trained_model\test_adv_RL_400000eps0.2lr0.0001\gpt
dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.0001\gpt\model-0.keras


2025-07-18 05:11:14,361 - ADV - DEBUG - Trained model saved to: trained_model\test_adv_RL_400000eps0.2lr0.0001\gpt\model-0.keras
2025-07-18 05:11:20,071 - ADV - DEBUG - episode: 99/50000, score: 0, e: 0.2, reward: 37.733, seudo-rew:0.0 max: 2.078995704650879 init-act: [0.445 1.694 0.274 0.846]
2025-07-18 05:11:25,674 - ADV - DEBUG - episode: 199/50000, score: 0, e: 0.2, reward: 40.993, seudo-rew:0.0 max: 2.30015230178833 init-act: [1.082 2.407 0.812 1.479]
2025-07-18 05:11:31,219 - ADV - DEBUG - episode: 299/50000, score: 0, e: 0.2, reward: 39.056, seudo-rew:0.0 max: 3.0439369678497314 init-act: [1.779 3.214 1.578 2.261]
2025-07-18 05:11:36,795 - ADV - DEBUG - episode: 399/50000, score: 0, e: 0.2, reward: 39.406, seudo-rew:0.0 max: 4.100399017333984 init-act: [2.606 4.052 2.436 3.22 ]
2025-07-18 05:11:45,218 - ADV - DEBUG - episode: 499/50000, score: 0, e: 0.2, reward: 39.893, seudo-rew:0.0 max: 5.122749328613281 init-act: [3.315 5.007 3.084 4.233]
2025-07-18 05:11:53,634 - ADV - DEBUG

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.0001\gpt\model-2000.keras


2025-07-18 05:13:59,280 - ADV - DEBUG - episode: 2099/50000, score: 0, e: 0.2, reward: 39.21, seudo-rew:0.0 max: 22.9073486328125 init-act: [13.491 22.68  14.174 21.465]
2025-07-18 05:14:07,636 - ADV - DEBUG - episode: 2199/50000, score: 0, e: 0.2, reward: 39.057, seudo-rew:0.0 max: 23.945741653442383 init-act: [14.336 23.801 14.833 22.607]
2025-07-18 05:14:15,962 - ADV - DEBUG - episode: 2299/50000, score: 0, e: 0.2, reward: 39.559, seudo-rew:0.0 max: 24.971830368041992 init-act: [15.176 24.951 15.88  23.943]
2025-07-18 05:14:24,297 - ADV - DEBUG - episode: 2399/50000, score: 0, e: 0.2, reward: 39.071, seudo-rew:0.0 max: 26.00650405883789 init-act: [15.962 26.028 16.612 25.084]
2025-07-18 05:14:32,650 - ADV - DEBUG - episode: 2499/50000, score: 0, e: 0.2, reward: 39.657, seudo-rew:0.0 max: 27.079296112060547 init-act: [16.817 27.061 17.485 26.317]
2025-07-18 05:14:41,032 - ADV - DEBUG - episode: 2599/50000, score: 0, e: 0.2, reward: 39.265, seudo-rew:0.0 max: 28.17301368713379 init-ac

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.0001\gpt\model-4000.keras


2025-07-18 05:16:46,734 - ADV - DEBUG - episode: 4099/50000, score: 0, e: 0.2, reward: 38.998, seudo-rew:0.0 max: 40.1646614074707 init-act: [29.752 40.701 29.898 41.102]
2025-07-18 05:16:55,106 - ADV - DEBUG - episode: 4199/50000, score: 0, e: 0.2, reward: 38.781, seudo-rew:0.0 max: 40.71278762817383 init-act: [30.23  40.969 30.27  41.632]
2025-07-18 05:17:03,495 - ADV - DEBUG - episode: 4299/50000, score: 0, e: 0.2, reward: 39.268, seudo-rew:0.0 max: 41.16358184814453 init-act: [30.598 41.422 30.848 42.027]
2025-07-18 05:17:11,856 - ADV - DEBUG - episode: 4399/50000, score: 0, e: 0.2, reward: 38.467, seudo-rew:0.0 max: 41.58662033081055 init-act: [31.01  41.695 31.268 42.448]
2025-07-18 05:17:20,211 - ADV - DEBUG - episode: 4499/50000, score: 0, e: 0.2, reward: 40.407, seudo-rew:0.0 max: 41.85456085205078 init-act: [31.374 42.048 31.541 42.834]
2025-07-18 05:17:28,478 - ADV - DEBUG - episode: 4599/50000, score: 0, e: 0.2, reward: 39.268, seudo-rew:0.0 max: 42.17890167236328 init-act:

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.0001\gpt\model-6000.keras


2025-07-18 05:19:34,563 - ADV - DEBUG - episode: 6099/50000, score: 0, e: 0.2, reward: 40.521, seudo-rew:0.0 max: 45.18648147583008 init-act: [33.319 45.22  33.531 46.176]
2025-07-18 05:19:42,892 - ADV - DEBUG - episode: 6199/50000, score: 0, e: 0.2, reward: 40.147, seudo-rew:0.0 max: 45.2651481628418 init-act: [33.44  45.454 33.477 46.242]
2025-07-18 05:19:51,266 - ADV - DEBUG - episode: 6299/50000, score: 0, e: 0.2, reward: 40.454, seudo-rew:0.0 max: 45.397315979003906 init-act: [33.522 45.59  33.521 46.34 ]
2025-07-18 05:19:59,584 - ADV - DEBUG - episode: 6399/50000, score: 0, e: 0.2, reward: 40.376, seudo-rew:0.0 max: 45.53778839111328 init-act: [33.535 45.609 33.49  46.315]
2025-07-18 05:20:07,876 - ADV - DEBUG - episode: 6499/50000, score: 0, e: 0.2, reward: 39.865, seudo-rew:0.0 max: 45.39928436279297 init-act: [33.52  45.774 33.582 46.37 ]
2025-07-18 05:20:16,233 - ADV - DEBUG - episode: 6599/50000, score: 0, e: 0.2, reward: 40.293, seudo-rew:0.0 max: 45.426631927490234 init-ac

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.0001\gpt\model-8000.keras


2025-07-18 05:22:21,974 - ADV - DEBUG - episode: 8099/50000, score: 0, e: 0.2, reward: 39.938, seudo-rew:0.0 max: 46.24794006347656 init-act: [33.354 47.09  33.424 47.25 ]
2025-07-18 05:22:30,287 - ADV - DEBUG - episode: 8199/50000, score: 0, e: 0.2, reward: 40.333, seudo-rew:0.0 max: 46.474365234375 init-act: [33.452 47.116 33.695 47.273]
2025-07-18 05:22:38,606 - ADV - DEBUG - episode: 8299/50000, score: 0, e: 0.2, reward: 40.087, seudo-rew:0.0 max: 46.27865219116211 init-act: [33.341 46.763 33.363 47.121]
2025-07-18 05:22:46,939 - ADV - DEBUG - episode: 8399/50000, score: 0, e: 0.2, reward: 39.834, seudo-rew:0.0 max: 46.281639099121094 init-act: [33.314 46.733 33.436 47.18 ]
2025-07-18 05:22:55,289 - ADV - DEBUG - episode: 8499/50000, score: 0, e: 0.2, reward: 39.934, seudo-rew:0.0 max: 46.20038986206055 init-act: [33.529 46.864 33.947 47.111]
2025-07-18 05:23:03,679 - ADV - DEBUG - episode: 8599/50000, score: 0, e: 0.2, reward: 39.971, seudo-rew:0.0 max: 46.39667892456055 init-act:

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.0001\gpt\model-10000.keras


2025-07-18 05:25:08,976 - ADV - DEBUG - episode: 10099/50000, score: 0, e: 0.2, reward: 40.03, seudo-rew:0.0 max: 46.97850036621094 init-act: [33.287 47.647 33.496 47.707]
2025-07-18 05:25:17,412 - ADV - DEBUG - episode: 10199/50000, score: 0, e: 0.2, reward: 40.119, seudo-rew:0.0 max: 46.81828308105469 init-act: [33.514 47.797 33.922 47.878]
2025-07-18 05:25:25,779 - ADV - DEBUG - episode: 10299/50000, score: 0, e: 0.2, reward: 40.388, seudo-rew:0.0 max: 46.89496612548828 init-act: [33.27  47.781 33.847 47.84 ]
2025-07-18 05:25:34,113 - ADV - DEBUG - episode: 10399/50000, score: 0, e: 0.2, reward: 40.265, seudo-rew:0.0 max: 46.91049575805664 init-act: [33.348 47.875 34.047 47.819]
2025-07-18 05:25:42,378 - ADV - DEBUG - episode: 10499/50000, score: 0, e: 0.2, reward: 39.93, seudo-rew:0.0 max: 47.06810760498047 init-act: [33.197 47.753 33.766 47.738]
2025-07-18 05:25:50,860 - ADV - DEBUG - episode: 10599/50000, score: 0, e: 0.2, reward: 39.704, seudo-rew:0.0 max: 47.06907653808594 init

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.0001\gpt\model-12000.keras


2025-07-18 05:27:56,758 - ADV - DEBUG - episode: 12099/50000, score: 0, e: 0.2, reward: 40.338, seudo-rew:0.0 max: 47.08598327636719 init-act: [32.092 47.522 32.349 47.781]
2025-07-18 05:28:05,268 - ADV - DEBUG - episode: 12199/50000, score: 0, e: 0.2, reward: 39.982, seudo-rew:0.0 max: 46.889244079589844 init-act: [32.112 47.55  32.396 47.784]
2025-07-18 05:28:13,577 - ADV - DEBUG - episode: 12299/50000, score: 0, e: 0.2, reward: 40.334, seudo-rew:0.0 max: 47.15418243408203 init-act: [31.827 47.145 31.976 47.944]
2025-07-18 05:28:22,040 - ADV - DEBUG - episode: 12399/50000, score: 0, e: 0.2, reward: 40.395, seudo-rew:0.0 max: 46.90713119506836 init-act: [31.811 47.172 31.857 47.879]
2025-07-18 05:28:30,399 - ADV - DEBUG - episode: 12499/50000, score: 0, e: 0.2, reward: 40.38, seudo-rew:0.0 max: 47.16303634643555 init-act: [31.809 47.309 31.957 47.917]
2025-07-18 05:28:38,706 - ADV - DEBUG - episode: 12599/50000, score: 0, e: 0.2, reward: 40.243, seudo-rew:0.0 max: 46.9235954284668 ini

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.0001\gpt\model-14000.keras


2025-07-18 05:30:43,970 - ADV - DEBUG - episode: 14099/50000, score: 0, e: 0.2, reward: 40.29, seudo-rew:0.0 max: 46.99876403808594 init-act: [30.787 47.491 30.839 47.724]
2025-07-18 05:30:52,496 - ADV - DEBUG - episode: 14199/50000, score: 0, e: 0.2, reward: 40.263, seudo-rew:0.0 max: 46.96987533569336 init-act: [30.756 47.686 30.725 47.779]
2025-07-18 05:31:00,790 - ADV - DEBUG - episode: 14299/50000, score: 0, e: 0.2, reward: 40.255, seudo-rew:0.0 max: 47.08269500732422 init-act: [30.78  47.676 30.791 47.78 ]
2025-07-18 05:31:09,168 - ADV - DEBUG - episode: 14399/50000, score: 0, e: 0.2, reward: 40.133, seudo-rew:0.0 max: 46.8253173828125 init-act: [30.606 47.484 30.554 47.726]
2025-07-18 05:31:17,448 - ADV - DEBUG - episode: 14499/50000, score: 0, e: 0.2, reward: 39.919, seudo-rew:0.0 max: 46.738067626953125 init-act: [30.57  47.524 30.502 47.661]
2025-07-18 05:31:25,709 - ADV - DEBUG - episode: 14599/50000, score: 0, e: 0.2, reward: 39.876, seudo-rew:0.0 max: 46.63179397583008 ini

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.0001\gpt\model-16000.keras


2025-07-18 05:33:30,005 - ADV - DEBUG - episode: 16099/50000, score: 0, e: 0.2, reward: 39.835, seudo-rew:0.0 max: 46.451019287109375 init-act: [29.641 46.691 30.337 47.2  ]
2025-07-18 05:33:38,210 - ADV - DEBUG - episode: 16199/50000, score: 0, e: 0.2, reward: 39.861, seudo-rew:0.0 max: 46.17887878417969 init-act: [29.55  46.798 30.057 47.157]
2025-07-18 05:33:46,495 - ADV - DEBUG - episode: 16299/50000, score: 0, e: 0.2, reward: 40.307, seudo-rew:0.0 max: 46.18605041503906 init-act: [29.472 46.679 29.772 47.102]
2025-07-18 05:33:54,767 - ADV - DEBUG - episode: 16399/50000, score: 0, e: 0.2, reward: 40.101, seudo-rew:0.0 max: 46.401695251464844 init-act: [29.504 46.843 29.943 47.145]
2025-07-18 05:34:03,113 - ADV - DEBUG - episode: 16499/50000, score: 0, e: 0.2, reward: 40.098, seudo-rew:0.0 max: 46.147071838378906 init-act: [29.496 46.886 29.929 47.184]
2025-07-18 05:34:11,389 - ADV - DEBUG - episode: 16599/50000, score: 0, e: 0.2, reward: 39.816, seudo-rew:0.0 max: 46.37670135498047

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.0001\gpt\model-18000.keras


2025-07-18 05:36:16,131 - ADV - DEBUG - episode: 18099/50000, score: 0, e: 0.2, reward: 39.85, seudo-rew:0.0 max: 45.85417175292969 init-act: [29.33  46.655 29.49  46.874]
2025-07-18 05:36:24,411 - ADV - DEBUG - episode: 18199/50000, score: 0, e: 0.2, reward: 40.085, seudo-rew:0.0 max: 45.88286590576172 init-act: [29.278 46.735 29.729 46.824]
2025-07-18 05:36:32,781 - ADV - DEBUG - episode: 18299/50000, score: 0, e: 0.2, reward: 40.029, seudo-rew:0.0 max: 46.07283020019531 init-act: [29.064 46.763 29.419 46.801]
2025-07-18 05:36:41,063 - ADV - DEBUG - episode: 18399/50000, score: 0, e: 0.2, reward: 39.787, seudo-rew:0.0 max: 45.81671905517578 init-act: [28.99  46.814 29.425 46.84 ]
2025-07-18 05:36:49,355 - ADV - DEBUG - episode: 18499/50000, score: 0, e: 0.2, reward: 39.889, seudo-rew:0.0 max: 46.01625061035156 init-act: [28.994 46.684 29.383 46.751]
2025-07-18 05:36:57,670 - ADV - DEBUG - episode: 18599/50000, score: 0, e: 0.2, reward: 39.861, seudo-rew:0.0 max: 45.749794006347656 in

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.0001\gpt\model-20000.keras


2025-07-18 05:39:01,971 - ADV - DEBUG - episode: 20099/50000, score: 0, e: 0.2, reward: 40.078, seudo-rew:0.0 max: 45.66340255737305 init-act: [28.962 46.425 29.31  46.637]
2025-07-18 05:39:10,202 - ADV - DEBUG - episode: 20199/50000, score: 0, e: 0.2, reward: 39.531, seudo-rew:0.0 max: 45.67186737060547 init-act: [28.9   46.215 29.115 46.59 ]
2025-07-18 05:39:18,433 - ADV - DEBUG - episode: 20299/50000, score: 0, e: 0.2, reward: 39.66, seudo-rew:0.0 max: 45.57634735107422 init-act: [28.875 46.195 29.158 46.491]
2025-07-18 05:39:26,762 - ADV - DEBUG - episode: 20399/50000, score: 0, e: 0.2, reward: 39.947, seudo-rew:0.0 max: 45.54706954956055 init-act: [28.963 46.21  29.216 46.555]
2025-07-18 05:39:35,041 - ADV - DEBUG - episode: 20499/50000, score: 0, e: 0.2, reward: 38.812, seudo-rew:0.0 max: 45.74238967895508 init-act: [28.916 46.293 29.024 46.559]
2025-07-18 05:39:43,259 - ADV - DEBUG - episode: 20599/50000, score: 0, e: 0.2, reward: 39.575, seudo-rew:0.0 max: 45.6693000793457 init

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.0001\gpt\model-22000.keras


2025-07-18 05:41:48,280 - ADV - DEBUG - episode: 22099/50000, score: 0, e: 0.2, reward: 38.9, seudo-rew:0.0 max: 45.821231842041016 init-act: [29.066 46.6   29.016 46.845]
2025-07-18 05:41:56,600 - ADV - DEBUG - episode: 22199/50000, score: 0, e: 0.2, reward: 39.932, seudo-rew:0.0 max: 46.13124084472656 init-act: [29.049 46.655 29.186 46.86 ]
2025-07-18 05:42:04,914 - ADV - DEBUG - episode: 22299/50000, score: 0, e: 0.2, reward: 39.989, seudo-rew:0.0 max: 45.887420654296875 init-act: [29.132 46.66  29.402 46.805]
2025-07-18 05:42:13,527 - ADV - DEBUG - episode: 22399/50000, score: 0, e: 0.2, reward: 40.253, seudo-rew:0.0 max: 45.91523361206055 init-act: [29.232 46.828 29.45  46.987]
2025-07-18 05:42:21,926 - ADV - DEBUG - episode: 22499/50000, score: 0, e: 0.2, reward: 40.164, seudo-rew:0.0 max: 45.92467498779297 init-act: [29.129 46.887 29.315 46.88 ]
2025-07-18 05:42:30,309 - ADV - DEBUG - episode: 22599/50000, score: 0, e: 0.2, reward: 40.312, seudo-rew:0.0 max: 46.13004684448242 in

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.0001\gpt\model-24000.keras


2025-07-18 05:44:34,908 - ADV - DEBUG - episode: 24099/50000, score: 0, e: 0.2, reward: 39.765, seudo-rew:0.0 max: 45.72114562988281 init-act: [29.376 46.609 29.654 46.539]
2025-07-18 05:44:43,243 - ADV - DEBUG - episode: 24199/50000, score: 0, e: 0.2, reward: 39.966, seudo-rew:0.0 max: 45.82138442993164 init-act: [29.288 46.46  29.594 46.59 ]
2025-07-18 05:44:51,500 - ADV - DEBUG - episode: 24299/50000, score: 0, e: 0.2, reward: 40.43, seudo-rew:0.0 max: 45.799049377441406 init-act: [29.259 46.556 29.572 46.561]
2025-07-18 05:44:59,898 - ADV - DEBUG - episode: 24399/50000, score: 0, e: 0.2, reward: 40.433, seudo-rew:0.0 max: 45.78350830078125 init-act: [29.223 46.641 29.412 46.586]
2025-07-18 05:45:08,231 - ADV - DEBUG - episode: 24499/50000, score: 0, e: 0.2, reward: 39.981, seudo-rew:0.0 max: 45.804996490478516 init-act: [29.367 46.703 29.531 46.647]
2025-07-18 05:45:16,485 - ADV - DEBUG - episode: 24599/50000, score: 0, e: 0.2, reward: 40.219, seudo-rew:0.0 max: 45.8858642578125 in

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.0001\gpt\model-26000.keras


2025-07-18 05:47:21,175 - ADV - DEBUG - episode: 26099/50000, score: 0, e: 0.2, reward: 39.247, seudo-rew:0.0 max: 45.80004119873047 init-act: [29.578 46.527 30.057 46.723]
2025-07-18 05:47:29,491 - ADV - DEBUG - episode: 26199/50000, score: 0, e: 0.2, reward: 39.63, seudo-rew:0.0 max: 45.781131744384766 init-act: [29.574 46.405 29.986 46.771]
2025-07-18 05:47:37,825 - ADV - DEBUG - episode: 26299/50000, score: 0, e: 0.2, reward: 39.04, seudo-rew:0.0 max: 46.06391525268555 init-act: [29.36  46.514 29.62  46.71 ]
2025-07-18 05:47:46,101 - ADV - DEBUG - episode: 26399/50000, score: 0, e: 0.2, reward: 39.1, seudo-rew:0.0 max: 45.832950592041016 init-act: [29.517 46.617 29.881 46.806]
2025-07-18 05:47:54,286 - ADV - DEBUG - episode: 26499/50000, score: 0, e: 0.2, reward: 39.237, seudo-rew:0.0 max: 45.805973052978516 init-act: [29.707 46.706 30.334 46.807]
2025-07-18 05:48:02,653 - ADV - DEBUG - episode: 26599/50000, score: 0, e: 0.2, reward: 39.108, seudo-rew:0.0 max: 45.84444046020508 ini

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.0001\gpt\model-28000.keras


2025-07-18 05:50:07,105 - ADV - DEBUG - episode: 28099/50000, score: 0, e: 0.2, reward: 40.081, seudo-rew:0.0 max: 45.657955169677734 init-act: [29.783 46.32  29.682 46.532]
2025-07-18 05:50:15,432 - ADV - DEBUG - episode: 28199/50000, score: 0, e: 0.2, reward: 39.481, seudo-rew:0.0 max: 45.53844451904297 init-act: [30.047 46.237 30.094 46.552]
2025-07-18 05:50:23,819 - ADV - DEBUG - episode: 28299/50000, score: 0, e: 0.2, reward: 40.225, seudo-rew:0.0 max: 45.74940490722656 init-act: [29.956 45.903 29.961 46.365]
2025-07-18 05:50:32,083 - ADV - DEBUG - episode: 28399/50000, score: 0, e: 0.2, reward: 39.374, seudo-rew:0.0 max: 45.75069808959961 init-act: [30.198 46.013 30.162 46.491]
2025-07-18 05:50:40,371 - ADV - DEBUG - episode: 28499/50000, score: 0, e: 0.2, reward: 39.136, seudo-rew:0.0 max: 45.570613861083984 init-act: [30.223 46.201 30.093 46.517]
2025-07-18 05:50:48,843 - ADV - DEBUG - episode: 28599/50000, score: 0, e: 0.2, reward: 39.334, seudo-rew:0.0 max: 45.757850646972656

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.0001\gpt\model-30000.keras


2025-07-18 05:52:53,732 - ADV - DEBUG - episode: 30099/50000, score: 0, e: 0.2, reward: 39.666, seudo-rew:0.0 max: 45.571556091308594 init-act: [29.998 46.457 29.994 46.624]
2025-07-18 05:53:02,139 - ADV - DEBUG - episode: 30199/50000, score: 0, e: 0.2, reward: 39.601, seudo-rew:0.0 max: 45.633567810058594 init-act: [30.076 46.552 30.104 46.585]
2025-07-18 05:53:10,546 - ADV - DEBUG - episode: 30299/50000, score: 0, e: 0.2, reward: 39.509, seudo-rew:0.0 max: 45.94171142578125 init-act: [29.881 46.59  29.994 46.6  ]
2025-07-18 05:53:19,120 - ADV - DEBUG - episode: 30399/50000, score: 0, e: 0.2, reward: 39.433, seudo-rew:0.0 max: 45.633338928222656 init-act: [29.849 46.632 29.867 46.613]
2025-07-18 05:53:27,435 - ADV - DEBUG - episode: 30499/50000, score: 0, e: 0.2, reward: 39.646, seudo-rew:0.0 max: 46.00934982299805 init-act: [29.81  46.723 29.916 46.707]
2025-07-18 05:53:35,773 - ADV - DEBUG - episode: 30599/50000, score: 0, e: 0.2, reward: 39.482, seudo-rew:0.0 max: 45.96926498413086

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.0001\gpt\model-32000.keras


2025-07-18 05:55:40,315 - ADV - DEBUG - episode: 32099/50000, score: 0, e: 0.2, reward: 39.571, seudo-rew:0.0 max: 46.19404220581055 init-act: [30.104 46.761 30.263 46.884]
2025-07-18 05:55:48,681 - ADV - DEBUG - episode: 32199/50000, score: 0, e: 0.2, reward: 39.692, seudo-rew:0.0 max: 46.14098358154297 init-act: [30.078 46.703 30.018 46.798]
2025-07-18 05:55:57,034 - ADV - DEBUG - episode: 32299/50000, score: 0, e: 0.2, reward: 39.868, seudo-rew:0.0 max: 45.98184585571289 init-act: [30.106 46.763 30.132 46.953]
2025-07-18 05:56:05,332 - ADV - DEBUG - episode: 32399/50000, score: 0, e: 0.2, reward: 39.376, seudo-rew:0.0 max: 45.98992156982422 init-act: [30.27  46.91  30.521 46.97 ]
2025-07-18 05:56:13,615 - ADV - DEBUG - episode: 32499/50000, score: 0, e: 0.2, reward: 39.043, seudo-rew:0.0 max: 46.04941940307617 init-act: [30.139 47.035 30.268 47.019]
2025-07-18 05:56:21,964 - ADV - DEBUG - episode: 32599/50000, score: 0, e: 0.2, reward: 39.1, seudo-rew:0.0 max: 46.31782531738281 init

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.0001\gpt\model-34000.keras


2025-07-18 05:58:27,094 - ADV - DEBUG - episode: 34099/50000, score: 0, e: 0.2, reward: 39.323, seudo-rew:0.0 max: 46.53942108154297 init-act: [29.781 46.948 30.09  47.096]
2025-07-18 05:58:35,363 - ADV - DEBUG - episode: 34199/50000, score: 0, e: 0.2, reward: 38.996, seudo-rew:0.0 max: 46.4339485168457 init-act: [29.819 46.977 29.994 47.101]
2025-07-18 05:58:43,729 - ADV - DEBUG - episode: 34299/50000, score: 0, e: 0.2, reward: 39.249, seudo-rew:0.0 max: 46.41905212402344 init-act: [30.038 46.936 30.132 47.1  ]
2025-07-18 05:58:51,985 - ADV - DEBUG - episode: 34399/50000, score: 0, e: 0.2, reward: 39.298, seudo-rew:0.0 max: 46.343807220458984 init-act: [30.177 47.047 30.291 47.168]
2025-07-18 05:59:00,252 - ADV - DEBUG - episode: 34499/50000, score: 0, e: 0.2, reward: 39.992, seudo-rew:0.0 max: 46.16357421875 init-act: [30.202 47.11  30.203 47.218]
2025-07-18 05:59:08,566 - ADV - DEBUG - episode: 34599/50000, score: 0, e: 0.2, reward: 38.962, seudo-rew:0.0 max: 46.25123596191406 init-

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.0001\gpt\model-36000.keras


2025-07-18 06:01:13,651 - ADV - DEBUG - episode: 36099/50000, score: 0, e: 0.2, reward: 39.878, seudo-rew:0.0 max: 46.41265106201172 init-act: [30.975 47.164 30.619 47.114]
2025-07-18 06:01:21,971 - ADV - DEBUG - episode: 36199/50000, score: 0, e: 0.2, reward: 39.599, seudo-rew:0.0 max: 46.329322814941406 init-act: [31.003 47.174 30.793 47.133]
2025-07-18 06:01:30,234 - ADV - DEBUG - episode: 36299/50000, score: 0, e: 0.2, reward: 39.655, seudo-rew:0.0 max: 46.30101776123047 init-act: [31.011 47.027 30.604 46.999]
2025-07-18 06:01:38,551 - ADV - DEBUG - episode: 36399/50000, score: 0, e: 0.2, reward: 39.431, seudo-rew:0.0 max: 46.35722351074219 init-act: [31.223 47.199 30.916 47.211]
2025-07-18 06:01:46,855 - ADV - DEBUG - episode: 36499/50000, score: 0, e: 0.2, reward: 39.311, seudo-rew:0.0 max: 46.287330627441406 init-act: [31.266 47.103 30.929 47.024]
2025-07-18 06:01:55,149 - ADV - DEBUG - episode: 36599/50000, score: 0, e: 0.2, reward: 40.047, seudo-rew:0.0 max: 46.391815185546875

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.0001\gpt\model-38000.keras


2025-07-18 06:03:59,753 - ADV - DEBUG - episode: 38099/50000, score: 0, e: 0.2, reward: 39.849, seudo-rew:0.0 max: 45.9710578918457 init-act: [31.41  46.643 31.546 46.954]
2025-07-18 06:04:07,973 - ADV - DEBUG - episode: 38199/50000, score: 0, e: 0.2, reward: 40.131, seudo-rew:0.0 max: 46.19828796386719 init-act: [31.084 46.645 31.119 46.953]
2025-07-18 06:04:16,279 - ADV - DEBUG - episode: 38299/50000, score: 0, e: 0.2, reward: 39.558, seudo-rew:0.0 max: 45.94292449951172 init-act: [31.064 46.645 31.047 46.918]
2025-07-18 06:04:24,574 - ADV - DEBUG - episode: 38399/50000, score: 0, e: 0.2, reward: 39.465, seudo-rew:0.0 max: 46.30256271362305 init-act: [31.027 46.855 30.875 47.03 ]
2025-07-18 06:04:32,876 - ADV - DEBUG - episode: 38499/50000, score: 0, e: 0.2, reward: 39.279, seudo-rew:0.0 max: 46.3343620300293 init-act: [31.    46.848 30.863 46.977]
2025-07-18 06:04:41,189 - ADV - DEBUG - episode: 38599/50000, score: 0, e: 0.2, reward: 39.295, seudo-rew:0.0 max: 46.07173538208008 init

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.0001\gpt\model-40000.keras


2025-07-18 06:06:46,259 - ADV - DEBUG - episode: 40099/50000, score: 0, e: 0.2, reward: 39.333, seudo-rew:0.0 max: 46.451866149902344 init-act: [31.359 47.211 31.301 47.178]
2025-07-18 06:06:54,605 - ADV - DEBUG - episode: 40199/50000, score: 0, e: 0.2, reward: 39.23, seudo-rew:0.0 max: 46.63664245605469 init-act: [31.452 47.43  31.403 47.382]
2025-07-18 06:07:02,786 - ADV - DEBUG - episode: 40299/50000, score: 0, e: 0.2, reward: 39.434, seudo-rew:0.0 max: 46.50746536254883 init-act: [31.351 47.153 31.138 47.165]
2025-07-18 06:07:11,085 - ADV - DEBUG - episode: 40399/50000, score: 0, e: 0.2, reward: 39.846, seudo-rew:0.0 max: 46.589073181152344 init-act: [31.909 47.413 31.731 47.494]
2025-07-18 06:07:19,382 - ADV - DEBUG - episode: 40499/50000, score: 0, e: 0.2, reward: 39.129, seudo-rew:0.0 max: 46.64590835571289 init-act: [31.82  47.296 31.605 47.398]
2025-07-18 06:07:27,712 - ADV - DEBUG - episode: 40599/50000, score: 0, e: 0.2, reward: 39.932, seudo-rew:0.0 max: 46.652488708496094 

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.0001\gpt\model-42000.keras


2025-07-18 06:09:32,201 - ADV - DEBUG - episode: 42099/50000, score: 0, e: 0.2, reward: 40.039, seudo-rew:0.0 max: 46.40685272216797 init-act: [31.578 47.113 31.492 47.158]
2025-07-18 06:09:40,502 - ADV - DEBUG - episode: 42199/50000, score: 0, e: 0.2, reward: 39.831, seudo-rew:0.0 max: 46.425575256347656 init-act: [31.819 47.23  31.742 47.156]
2025-07-18 06:09:48,862 - ADV - DEBUG - episode: 42299/50000, score: 0, e: 0.2, reward: 40.032, seudo-rew:0.0 max: 46.501304626464844 init-act: [31.807 47.183 31.737 47.186]
2025-07-18 06:09:57,164 - ADV - DEBUG - episode: 42399/50000, score: 0, e: 0.2, reward: 40.167, seudo-rew:0.0 max: 46.445274353027344 init-act: [31.592 47.071 31.324 47.09 ]
2025-07-18 06:10:05,583 - ADV - DEBUG - episode: 42499/50000, score: 0, e: 0.2, reward: 40.06, seudo-rew:0.0 max: 46.540191650390625 init-act: [31.664 47.179 31.408 47.238]
2025-07-18 06:10:14,056 - ADV - DEBUG - episode: 42599/50000, score: 0, e: 0.2, reward: 39.354, seudo-rew:0.0 max: 46.48518371582031

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.0001\gpt\model-44000.keras


2025-07-18 06:12:19,709 - ADV - DEBUG - episode: 44099/50000, score: 0, e: 0.2, reward: 39.762, seudo-rew:0.0 max: 46.343318939208984 init-act: [31.672 47.159 31.717 47.098]
2025-07-18 06:12:28,014 - ADV - DEBUG - episode: 44199/50000, score: 0, e: 0.2, reward: 39.107, seudo-rew:0.0 max: 46.408775329589844 init-act: [31.598 47.105 31.594 47.194]
2025-07-18 06:12:36,346 - ADV - DEBUG - episode: 44299/50000, score: 0, e: 0.2, reward: 39.2, seudo-rew:0.0 max: 46.35737228393555 init-act: [31.541 47.067 31.645 47.094]
2025-07-18 06:12:44,632 - ADV - DEBUG - episode: 44399/50000, score: 0, e: 0.2, reward: 39.128, seudo-rew:0.0 max: 46.342166900634766 init-act: [31.648 46.972 31.76  47.069]
2025-07-18 06:12:52,921 - ADV - DEBUG - episode: 44499/50000, score: 0, e: 0.2, reward: 39.952, seudo-rew:0.0 max: 46.38053512573242 init-act: [31.856 46.987 31.91  47.104]
2025-07-18 06:13:01,267 - ADV - DEBUG - episode: 44599/50000, score: 0, e: 0.2, reward: 39.721, seudo-rew:0.0 max: 46.07889938354492 i

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.0001\gpt\model-46000.keras


2025-07-18 06:15:06,389 - ADV - DEBUG - episode: 46099/50000, score: 0, e: 0.2, reward: 39.912, seudo-rew:0.0 max: 46.66225051879883 init-act: [31.959 47.548 31.53  47.551]
2025-07-18 06:15:14,595 - ADV - DEBUG - episode: 46199/50000, score: 0, e: 0.2, reward: 40.461, seudo-rew:0.0 max: 46.68193817138672 init-act: [32.018 47.539 31.573 47.438]
2025-07-18 06:15:22,951 - ADV - DEBUG - episode: 46299/50000, score: 0, e: 0.2, reward: 40.097, seudo-rew:0.0 max: 46.70643997192383 init-act: [31.908 47.246 31.493 47.378]
2025-07-18 06:15:31,297 - ADV - DEBUG - episode: 46399/50000, score: 0, e: 0.2, reward: 39.897, seudo-rew:0.0 max: 46.66067123413086 init-act: [31.717 47.333 31.342 47.267]
2025-07-18 06:15:39,601 - ADV - DEBUG - episode: 46499/50000, score: 0, e: 0.2, reward: 39.811, seudo-rew:0.0 max: 46.75155258178711 init-act: [31.875 47.418 31.34  47.486]
2025-07-18 06:15:47,936 - ADV - DEBUG - episode: 46599/50000, score: 0, e: 0.2, reward: 40.009, seudo-rew:0.0 max: 46.50575637817383 in

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr0.0001\gpt\model-48000.keras


2025-07-18 06:17:53,024 - ADV - DEBUG - episode: 48099/50000, score: 0, e: 0.2, reward: 39.387, seudo-rew:0.0 max: 46.808067321777344 init-act: [31.719 47.74  31.427 47.685]
2025-07-18 06:18:01,337 - ADV - DEBUG - episode: 48199/50000, score: 0, e: 0.2, reward: 39.786, seudo-rew:0.0 max: 46.996788024902344 init-act: [31.549 47.775 31.339 47.629]
2025-07-18 06:18:09,639 - ADV - DEBUG - episode: 48299/50000, score: 0, e: 0.2, reward: 39.588, seudo-rew:0.0 max: 46.94729995727539 init-act: [31.432 47.55  31.189 47.577]
2025-07-18 06:18:17,926 - ADV - DEBUG - episode: 48399/50000, score: 0, e: 0.2, reward: 39.876, seudo-rew:0.0 max: 46.99844741821289 init-act: [31.625 47.924 31.84  47.84 ]
2025-07-18 06:18:26,242 - ADV - DEBUG - episode: 48499/50000, score: 0, e: 0.2, reward: 39.885, seudo-rew:0.0 max: 47.13491439819336 init-act: [31.654 47.996 31.539 48.057]
2025-07-18 06:18:34,471 - ADV - DEBUG - episode: 48599/50000, score: 0, e: 0.2, reward: 39.398, seudo-rew:0.0 max: 46.85823440551758 

400000 1e-05 0.01
📦 Training: buf=400000, lr=1e-05, eps=0.01 → trained_model\test_adv_RL_400000eps0.01lr1e-05\gpt


2025-07-18 06:20:30,731 - ADV - DEBUG - Trained model saved to: trained_model\test_adv_RL_400000eps0.01lr1e-05\gpt\model-0.keras


dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr1e-05\gpt\model-0.keras


2025-07-18 06:20:36,301 - ADV - DEBUG - episode: 99/50000, score: 0, e: 0.01, reward: 37.266, seudo-rew:0.0 max: 1.9708530902862549 init-act: [ 0.023  0.192 -0.238  0.917]
2025-07-18 06:20:41,784 - ADV - DEBUG - episode: 199/50000, score: 0, e: 0.01, reward: 37.177, seudo-rew:0.0 max: 2.306694984436035 init-act: [ 0.032  0.21  -0.221  1.016]
2025-07-18 06:20:47,437 - ADV - DEBUG - episode: 299/50000, score: 0, e: 0.01, reward: 37.264, seudo-rew:0.0 max: 2.371971845626831 init-act: [ 0.041  0.234 -0.197  1.13 ]
2025-07-18 06:20:53,134 - ADV - DEBUG - episode: 399/50000, score: 0, e: 0.01, reward: 37.236, seudo-rew:0.0 max: 2.512226104736328 init-act: [ 0.042  0.26  -0.176  1.252]
2025-07-18 06:21:01,551 - ADV - DEBUG - episode: 499/50000, score: 0, e: 0.01, reward: 37.264, seudo-rew:0.0 max: 2.661675214767456 init-act: [ 0.046  0.286 -0.154  1.376]
2025-07-18 06:21:09,951 - ADV - DEBUG - episode: 599/50000, score: 0, e: 0.01, reward: 37.177, seudo-rew:0.0 max: 2.746210813522339 init-act

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr1e-05\gpt\model-2000.keras


2025-07-18 06:23:14,790 - ADV - DEBUG - episode: 2099/50000, score: 0, e: 0.01, reward: 37.282, seudo-rew:0.0 max: 8.758818626403809 init-act: [0.359 1.12  0.397 4.821]
2025-07-18 06:23:23,014 - ADV - DEBUG - episode: 2199/50000, score: 0, e: 0.01, reward: 37.256, seudo-rew:0.0 max: 9.249387741088867 init-act: [0.402 1.194 0.429 5.112]
2025-07-18 06:23:31,337 - ADV - DEBUG - episode: 2299/50000, score: 0, e: 0.01, reward: 37.372, seudo-rew:0.0 max: 9.768543243408203 init-act: [0.459 1.273 0.493 5.407]
2025-07-18 06:23:39,689 - ADV - DEBUG - episode: 2399/50000, score: 0, e: 0.01, reward: 37.543, seudo-rew:0.0 max: 10.283726692199707 init-act: [0.515 1.356 0.562 5.71 ]
2025-07-18 06:23:47,987 - ADV - DEBUG - episode: 2499/50000, score: 0, e: 0.01, reward: 37.2, seudo-rew:0.0 max: 10.764993667602539 init-act: [0.587 1.448 0.624 6.025]
2025-07-18 06:23:56,284 - ADV - DEBUG - episode: 2599/50000, score: 0, e: 0.01, reward: 37.303, seudo-rew:0.0 max: 11.327672958374023 init-act: [0.66  1.55

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr1e-05\gpt\model-4000.keras


2025-07-18 06:26:01,243 - ADV - DEBUG - episode: 4099/50000, score: 0, e: 0.01, reward: 37.363, seudo-rew:0.0 max: 20.093137741088867 init-act: [ 2.288  3.362  2.282 11.93 ]
2025-07-18 06:26:09,836 - ADV - DEBUG - episode: 4199/50000, score: 0, e: 0.01, reward: 37.349, seudo-rew:0.0 max: 20.6509952545166 init-act: [ 2.433  3.512  2.405 12.347]
2025-07-18 06:26:18,147 - ADV - DEBUG - episode: 4299/50000, score: 0, e: 0.01, reward: 37.335, seudo-rew:0.0 max: 21.286535263061523 init-act: [ 2.603  3.657  2.561 12.778]
2025-07-18 06:26:26,372 - ADV - DEBUG - episode: 4399/50000, score: 0, e: 0.01, reward: 37.184, seudo-rew:0.0 max: 21.884910583496094 init-act: [ 2.767  3.792  2.707 13.204]
2025-07-18 06:26:34,648 - ADV - DEBUG - episode: 4499/50000, score: 0, e: 0.01, reward: 37.298, seudo-rew:0.0 max: 22.53185272216797 init-act: [ 2.891  3.949  2.829 13.639]
2025-07-18 06:26:42,960 - ADV - DEBUG - episode: 4599/50000, score: 0, e: 0.01, reward: 37.208, seudo-rew:0.0 max: 23.089885711669922

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr1e-05\gpt\model-6000.keras


2025-07-18 06:28:48,917 - ADV - DEBUG - episode: 6099/50000, score: 0, e: 0.01, reward: 37.465, seudo-rew:0.0 max: 31.011510848999023 init-act: [ 5.549  7.091  5.467 21.073]
2025-07-18 06:28:57,294 - ADV - DEBUG - episode: 6199/50000, score: 0, e: 0.01, reward: 37.244, seudo-rew:0.0 max: 31.481874465942383 init-act: [ 5.753  7.34   5.641 21.555]
2025-07-18 06:29:05,579 - ADV - DEBUG - episode: 6299/50000, score: 0, e: 0.01, reward: 37.231, seudo-rew:0.0 max: 31.863420486450195 init-act: [ 5.989  7.568  5.849 22.033]
2025-07-18 06:29:13,896 - ADV - DEBUG - episode: 6399/50000, score: 0, e: 0.01, reward: 37.289, seudo-rew:0.0 max: 32.25032043457031 init-act: [ 6.188  7.817  6.043 22.533]
2025-07-18 06:29:22,197 - ADV - DEBUG - episode: 6499/50000, score: 0, e: 0.01, reward: 37.409, seudo-rew:0.0 max: 32.67996597290039 init-act: [ 6.356  8.051  6.283 23.015]
2025-07-18 06:29:30,498 - ADV - DEBUG - episode: 6599/50000, score: 0, e: 0.01, reward: 37.387, seudo-rew:0.0 max: 33.09100723266601

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr1e-05\gpt\model-8000.keras


2025-07-18 06:31:35,426 - ADV - DEBUG - episode: 8099/50000, score: 0, e: 0.01, reward: 37.228, seudo-rew:0.0 max: 37.68190383911133 init-act: [ 9.579 12.318  9.744 30.506]
2025-07-18 06:31:43,745 - ADV - DEBUG - episode: 8199/50000, score: 0, e: 0.01, reward: 37.369, seudo-rew:0.0 max: 37.989654541015625 init-act: [ 9.807 12.591  9.966 30.981]
2025-07-18 06:31:52,063 - ADV - DEBUG - episode: 8299/50000, score: 0, e: 0.01, reward: 37.366, seudo-rew:0.0 max: 38.31946563720703 init-act: [10.048 12.838 10.181 31.428]
2025-07-18 06:32:00,400 - ADV - DEBUG - episode: 8399/50000, score: 0, e: 0.01, reward: 37.124, seudo-rew:0.0 max: 38.59720230102539 init-act: [10.303 13.105 10.389 31.912]
2025-07-18 06:32:08,779 - ADV - DEBUG - episode: 8499/50000, score: 0, e: 0.01, reward: 37.131, seudo-rew:0.0 max: 38.9356575012207 init-act: [10.517 13.378 10.623 32.369]
2025-07-18 06:32:17,042 - ADV - DEBUG - episode: 8599/50000, score: 0, e: 0.01, reward: 37.273, seudo-rew:0.0 max: 39.232177734375 init

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr1e-05\gpt\model-10000.keras


2025-07-18 06:34:22,005 - ADV - DEBUG - episode: 10099/50000, score: 0, e: 0.01, reward: 37.179, seudo-rew:0.0 max: 43.029212951660156 init-act: [13.834 17.313 14.126 39.166]
2025-07-18 06:34:30,280 - ADV - DEBUG - episode: 10199/50000, score: 0, e: 0.01, reward: 37.247, seudo-rew:0.0 max: 43.259769439697266 init-act: [13.989 17.528 14.307 39.508]
2025-07-18 06:34:38,577 - ADV - DEBUG - episode: 10299/50000, score: 0, e: 0.01, reward: 37.245, seudo-rew:0.0 max: 43.57307815551758 init-act: [14.168 17.73  14.468 39.814]
2025-07-18 06:34:46,908 - ADV - DEBUG - episode: 10399/50000, score: 0, e: 0.01, reward: 37.289, seudo-rew:0.0 max: 43.66421127319336 init-act: [14.375 17.904 14.584 40.117]
2025-07-18 06:34:55,296 - ADV - DEBUG - episode: 10499/50000, score: 0, e: 0.01, reward: 37.46, seudo-rew:0.0 max: 43.621620178222656 init-act: [14.563 18.074 14.735 40.4  ]
2025-07-18 06:35:03,589 - ADV - DEBUG - episode: 10599/50000, score: 0, e: 0.01, reward: 37.321, seudo-rew:0.0 max: 43.642341613

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr1e-05\gpt\model-12000.keras


2025-07-18 06:37:11,011 - ADV - DEBUG - episode: 12099/50000, score: 0, e: 0.01, reward: 37.368, seudo-rew:0.0 max: 44.916473388671875 init-act: [16.636 19.577 16.813 43.969]
2025-07-18 06:37:19,414 - ADV - DEBUG - episode: 12199/50000, score: 0, e: 0.01, reward: 37.202, seudo-rew:0.0 max: 45.231781005859375 init-act: [16.687 19.648 16.936 44.138]
2025-07-18 06:37:27,650 - ADV - DEBUG - episode: 12299/50000, score: 0, e: 0.01, reward: 37.374, seudo-rew:0.0 max: 45.05748748779297 init-act: [16.743 19.719 17.064 44.286]
2025-07-18 06:37:36,054 - ADV - DEBUG - episode: 12399/50000, score: 0, e: 0.01, reward: 37.341, seudo-rew:0.0 max: 45.35881805419922 init-act: [16.849 19.806 17.124 44.41 ]
2025-07-18 06:37:44,410 - ADV - DEBUG - episode: 12499/50000, score: 0, e: 0.01, reward: 37.48, seudo-rew:0.0 max: 45.168968200683594 init-act: [16.959 19.828 17.176 44.544]
2025-07-18 06:37:52,698 - ADV - DEBUG - episode: 12599/50000, score: 0, e: 0.01, reward: 37.106, seudo-rew:0.0 max: 45.355598449

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr1e-05\gpt\model-14000.keras


2025-07-18 06:39:57,376 - ADV - DEBUG - episode: 14099/50000, score: 0, e: 0.01, reward: 37.251, seudo-rew:0.0 max: 45.364627838134766 init-act: [17.928 20.896 18.028 45.817]
2025-07-18 06:40:05,732 - ADV - DEBUG - episode: 14199/50000, score: 0, e: 0.01, reward: 37.406, seudo-rew:0.0 max: 45.244049072265625 init-act: [17.901 20.944 18.064 45.843]
2025-07-18 06:40:14,048 - ADV - DEBUG - episode: 14299/50000, score: 0, e: 0.01, reward: 37.348, seudo-rew:0.0 max: 45.21134567260742 init-act: [17.904 21.017 18.112 45.865]
2025-07-18 06:40:22,324 - ADV - DEBUG - episode: 14399/50000, score: 0, e: 0.01, reward: 37.325, seudo-rew:0.0 max: 45.303768157958984 init-act: [17.912 21.071 18.147 45.867]
2025-07-18 06:40:30,619 - ADV - DEBUG - episode: 14499/50000, score: 0, e: 0.01, reward: 37.176, seudo-rew:0.0 max: 45.81612014770508 init-act: [17.957 21.159 18.131 45.876]
2025-07-18 06:40:38,982 - ADV - DEBUG - episode: 14599/50000, score: 0, e: 0.01, reward: 37.35, seudo-rew:0.0 max: 45.819717407

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr1e-05\gpt\model-16000.keras


2025-07-18 06:42:45,207 - ADV - DEBUG - episode: 16099/50000, score: 0, e: 0.01, reward: 37.268, seudo-rew:0.0 max: 44.81401062011719 init-act: [18.479 22.277 18.44  45.708]
2025-07-18 06:42:53,510 - ADV - DEBUG - episode: 16199/50000, score: 0, e: 0.01, reward: 37.281, seudo-rew:0.0 max: 45.6344108581543 init-act: [18.451 22.371 18.453 45.652]
2025-07-18 06:43:01,868 - ADV - DEBUG - episode: 16299/50000, score: 0, e: 0.01, reward: 37.259, seudo-rew:0.0 max: 44.70145034790039 init-act: [18.479 22.431 18.481 45.664]
2025-07-18 06:43:10,217 - ADV - DEBUG - episode: 16399/50000, score: 0, e: 0.01, reward: 37.379, seudo-rew:0.0 max: 44.746952056884766 init-act: [18.512 22.494 18.461 45.672]
2025-07-18 06:43:18,567 - ADV - DEBUG - episode: 16499/50000, score: 0, e: 0.01, reward: 37.449, seudo-rew:0.0 max: 44.64034652709961 init-act: [18.539 22.597 18.458 45.652]
2025-07-18 06:43:27,012 - ADV - DEBUG - episode: 16599/50000, score: 0, e: 0.01, reward: 37.552, seudo-rew:0.0 max: 44.59031295776

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr1e-05\gpt\model-18000.keras


2025-07-18 06:45:31,725 - ADV - DEBUG - episode: 18099/50000, score: 0, e: 0.01, reward: 37.304, seudo-rew:0.0 max: 44.15953826904297 init-act: [18.561 23.776 18.646 45.431]
2025-07-18 06:45:40,059 - ADV - DEBUG - episode: 18199/50000, score: 0, e: 0.01, reward: 37.215, seudo-rew:0.0 max: 45.099205017089844 init-act: [18.526 23.686 18.626 45.396]
2025-07-18 06:45:48,419 - ADV - DEBUG - episode: 18299/50000, score: 0, e: 0.01, reward: 37.343, seudo-rew:0.0 max: 44.46766662597656 init-act: [18.469 23.366 18.579 45.383]
2025-07-18 06:45:56,913 - ADV - DEBUG - episode: 18399/50000, score: 0, e: 0.01, reward: 37.13, seudo-rew:0.0 max: 44.5074462890625 init-act: [18.391 23.051 18.523 45.378]
2025-07-18 06:46:06,036 - ADV - DEBUG - episode: 18499/50000, score: 0, e: 0.01, reward: 37.361, seudo-rew:0.0 max: 44.377079010009766 init-act: [18.278 22.819 18.463 45.378]
2025-07-18 06:46:14,322 - ADV - DEBUG - episode: 18599/50000, score: 0, e: 0.01, reward: 37.34, seudo-rew:0.0 max: 44.205490112304

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr1e-05\gpt\model-20000.keras


2025-07-18 06:48:19,924 - ADV - DEBUG - episode: 20099/50000, score: 0, e: 0.01, reward: 37.169, seudo-rew:0.0 max: 43.8453369140625 init-act: [17.962 21.514 18.237 45.039]
2025-07-18 06:48:28,262 - ADV - DEBUG - episode: 20199/50000, score: 0, e: 0.01, reward: 37.332, seudo-rew:0.0 max: 43.79743576049805 init-act: [18.005 21.582 18.247 44.987]
2025-07-18 06:48:36,536 - ADV - DEBUG - episode: 20299/50000, score: 0, e: 0.01, reward: 37.488, seudo-rew:0.0 max: 45.33852005004883 init-act: [17.981 21.456 18.253 44.903]
2025-07-18 06:48:44,846 - ADV - DEBUG - episode: 20399/50000, score: 0, e: 0.01, reward: 37.418, seudo-rew:0.0 max: 45.29676818847656 init-act: [17.932 21.217 18.2   44.827]
2025-07-18 06:48:53,158 - ADV - DEBUG - episode: 20499/50000, score: 0, e: 0.01, reward: 37.187, seudo-rew:0.0 max: 44.563045501708984 init-act: [17.837 20.899 18.123 44.762]
2025-07-18 06:49:01,548 - ADV - DEBUG - episode: 20599/50000, score: 0, e: 0.01, reward: 37.218, seudo-rew:0.0 max: 45.24506378173

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr1e-05\gpt\model-22000.keras


2025-07-18 06:51:06,945 - ADV - DEBUG - episode: 22099/50000, score: 0, e: 0.01, reward: 37.134, seudo-rew:0.0 max: 43.40726852416992 init-act: [17.453 19.344 17.556 43.594]
2025-07-18 06:51:15,338 - ADV - DEBUG - episode: 22199/50000, score: 0, e: 0.01, reward: 37.311, seudo-rew:0.0 max: 44.15471649169922 init-act: [17.426 19.236 17.532 43.533]
2025-07-18 06:51:23,677 - ADV - DEBUG - episode: 22299/50000, score: 0, e: 0.01, reward: 37.252, seudo-rew:0.0 max: 42.42402267456055 init-act: [17.41  19.172 17.517 43.464]
2025-07-18 06:51:31,975 - ADV - DEBUG - episode: 22399/50000, score: 0, e: 0.01, reward: 37.34, seudo-rew:0.0 max: 42.350547790527344 init-act: [17.409 19.147 17.487 43.405]
2025-07-18 06:51:40,291 - ADV - DEBUG - episode: 22499/50000, score: 0, e: 0.01, reward: 37.419, seudo-rew:0.0 max: 43.060787200927734 init-act: [17.426 19.093 17.457 43.34 ]
2025-07-18 06:51:48,664 - ADV - DEBUG - episode: 22599/50000, score: 0, e: 0.01, reward: 37.266, seudo-rew:0.0 max: 43.8572731018

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr1e-05\gpt\model-24000.keras


2025-07-18 06:53:53,870 - ADV - DEBUG - episode: 24099/50000, score: 0, e: 0.01, reward: 37.536, seudo-rew:0.0 max: 40.88440704345703 init-act: [17.664 18.622 17.415 41.893]
2025-07-18 06:54:02,182 - ADV - DEBUG - episode: 24199/50000, score: 0, e: 0.01, reward: 37.366, seudo-rew:0.0 max: 42.258995056152344 init-act: [17.718 18.641 17.436 41.801]
2025-07-18 06:54:10,471 - ADV - DEBUG - episode: 24299/50000, score: 0, e: 0.01, reward: 37.305, seudo-rew:0.0 max: 42.15329360961914 init-act: [17.728 18.641 17.457 41.701]
2025-07-18 06:54:18,839 - ADV - DEBUG - episode: 24399/50000, score: 0, e: 0.01, reward: 37.288, seudo-rew:0.0 max: 40.591346740722656 init-act: [17.729 18.634 17.447 41.597]
2025-07-18 06:54:27,189 - ADV - DEBUG - episode: 24499/50000, score: 0, e: 0.01, reward: 37.444, seudo-rew:0.0 max: 40.501319885253906 init-act: [17.698 18.634 17.448 41.486]
2025-07-18 06:54:35,519 - ADV - DEBUG - episode: 24599/50000, score: 0, e: 0.01, reward: 37.585, seudo-rew:0.0 max: 40.39547729

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr1e-05\gpt\model-26000.keras


2025-07-18 06:56:42,433 - ADV - DEBUG - episode: 26099/50000, score: 0, e: 0.01, reward: 38.286, seudo-rew:0.0 max: 39.083553314208984 init-act: [18.366 19.684 18.088 40.03 ]
2025-07-18 06:56:50,810 - ADV - DEBUG - episode: 26199/50000, score: 0, e: 0.01, reward: 38.272, seudo-rew:0.0 max: 39.005306243896484 init-act: [18.408 19.828 18.139 39.962]
2025-07-18 06:56:59,189 - ADV - DEBUG - episode: 26299/50000, score: 0, e: 0.01, reward: 38.141, seudo-rew:0.0 max: 39.04444885253906 init-act: [18.479 19.933 18.209 39.904]
2025-07-18 06:57:07,544 - ADV - DEBUG - episode: 26399/50000, score: 0, e: 0.01, reward: 38.395, seudo-rew:0.0 max: 38.98052215576172 init-act: [18.515 20.032 18.282 39.86 ]
2025-07-18 06:57:15,931 - ADV - DEBUG - episode: 26499/50000, score: 0, e: 0.01, reward: 38.442, seudo-rew:0.0 max: 39.951454162597656 init-act: [18.568 20.15  18.35  39.805]
2025-07-18 06:57:25,005 - ADV - DEBUG - episode: 26599/50000, score: 0, e: 0.01, reward: 38.532, seudo-rew:0.0 max: 38.86836624

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr1e-05\gpt\model-28000.keras


2025-07-18 06:59:30,623 - ADV - DEBUG - episode: 28099/50000, score: 0, e: 0.01, reward: 39.325, seudo-rew:0.0 max: 38.44677734375 init-act: [19.281 21.944 19.049 39.42 ]
2025-07-18 06:59:38,920 - ADV - DEBUG - episode: 28199/50000, score: 0, e: 0.01, reward: 39.362, seudo-rew:0.0 max: 38.46455764770508 init-act: [19.34  22.084 19.094 39.439]
2025-07-18 06:59:47,268 - ADV - DEBUG - episode: 28299/50000, score: 0, e: 0.01, reward: 39.507, seudo-rew:0.0 max: 38.472286224365234 init-act: [19.369 22.176 19.103 39.429]
2025-07-18 06:59:55,579 - ADV - DEBUG - episode: 28399/50000, score: 0, e: 0.01, reward: 39.751, seudo-rew:0.0 max: 38.46704864501953 init-act: [19.423 22.309 19.167 39.428]
2025-07-18 07:00:03,876 - ADV - DEBUG - episode: 28499/50000, score: 0, e: 0.01, reward: 39.855, seudo-rew:0.0 max: 39.374061584472656 init-act: [19.449 22.384 19.201 39.437]
2025-07-18 07:00:12,172 - ADV - DEBUG - episode: 28599/50000, score: 0, e: 0.01, reward: 39.682, seudo-rew:0.0 max: 38.494037628173

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr1e-05\gpt\model-30000.keras


2025-07-18 07:02:17,836 - ADV - DEBUG - episode: 30099/50000, score: 0, e: 0.01, reward: 39.99, seudo-rew:0.0 max: 38.858245849609375 init-act: [20.32  24.248 20.158 39.823]
2025-07-18 07:02:26,136 - ADV - DEBUG - episode: 30199/50000, score: 0, e: 0.01, reward: 40.073, seudo-rew:0.0 max: 38.88919448852539 init-act: [20.355 24.393 20.232 39.849]
2025-07-18 07:02:34,481 - ADV - DEBUG - episode: 30299/50000, score: 0, e: 0.01, reward: 40.045, seudo-rew:0.0 max: 39.733924865722656 init-act: [20.445 24.523 20.33  39.893]
2025-07-18 07:02:42,836 - ADV - DEBUG - episode: 30399/50000, score: 0, e: 0.01, reward: 39.865, seudo-rew:0.0 max: 38.95802688598633 init-act: [20.451 24.617 20.356 39.902]
2025-07-18 07:02:51,146 - ADV - DEBUG - episode: 30499/50000, score: 0, e: 0.01, reward: 39.844, seudo-rew:0.0 max: 38.975738525390625 init-act: [20.512 24.721 20.404 39.939]
2025-07-18 07:02:59,484 - ADV - DEBUG - episode: 30599/50000, score: 0, e: 0.01, reward: 40.114, seudo-rew:0.0 max: 39.012905120

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr1e-05\gpt\model-32000.keras


2025-07-18 07:05:04,651 - ADV - DEBUG - episode: 32099/50000, score: 0, e: 0.01, reward: 40.696, seudo-rew:0.0 max: 39.5939826965332 init-act: [21.606 26.819 21.512 40.572]
2025-07-18 07:05:12,940 - ADV - DEBUG - episode: 32199/50000, score: 0, e: 0.01, reward: 40.671, seudo-rew:0.0 max: 40.40882873535156 init-act: [21.686 26.97  21.584 40.617]
2025-07-18 07:05:21,299 - ADV - DEBUG - episode: 32299/50000, score: 0, e: 0.01, reward: 40.872, seudo-rew:0.0 max: 39.708473205566406 init-act: [21.75  27.088 21.628 40.666]
2025-07-18 07:05:29,651 - ADV - DEBUG - episode: 32399/50000, score: 0, e: 0.01, reward: 40.833, seudo-rew:0.0 max: 39.740814208984375 init-act: [21.779 27.227 21.708 40.691]
2025-07-18 07:05:37,990 - ADV - DEBUG - episode: 32499/50000, score: 0, e: 0.01, reward: 40.74, seudo-rew:0.0 max: 39.777557373046875 init-act: [21.842 27.36  21.764 40.74 ]
2025-07-18 07:05:46,300 - ADV - DEBUG - episode: 32599/50000, score: 0, e: 0.01, reward: 40.918, seudo-rew:0.0 max: 39.8025512695

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr1e-05\gpt\model-34000.keras


2025-07-18 07:07:52,602 - ADV - DEBUG - episode: 34099/50000, score: 0, e: 0.01, reward: 41.366, seudo-rew:0.0 max: 40.2729377746582 init-act: [22.77  29.822 22.822 41.245]
2025-07-18 07:08:01,015 - ADV - DEBUG - episode: 34199/50000, score: 0, e: 0.01, reward: 41.828, seudo-rew:0.0 max: 40.958065032958984 init-act: [22.873 30.016 22.896 41.267]
2025-07-18 07:08:09,294 - ADV - DEBUG - episode: 34299/50000, score: 0, e: 0.01, reward: 41.804, seudo-rew:0.0 max: 40.96504211425781 init-act: [22.969 30.199 22.989 41.279]
2025-07-18 07:08:17,827 - ADV - DEBUG - episode: 34399/50000, score: 0, e: 0.01, reward: 41.735, seudo-rew:0.0 max: 40.999576568603516 init-act: [23.    30.379 23.072 41.331]
2025-07-18 07:08:26,377 - ADV - DEBUG - episode: 34499/50000, score: 0, e: 0.01, reward: 41.982, seudo-rew:0.0 max: 40.403099060058594 init-act: [23.099 30.582 23.156 41.385]
2025-07-18 07:08:34,620 - ADV - DEBUG - episode: 34599/50000, score: 0, e: 0.01, reward: 42.086, seudo-rew:0.0 max: 40.424625396

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr1e-05\gpt\model-36000.keras


2025-07-18 07:10:40,137 - ADV - DEBUG - episode: 36099/50000, score: 0, e: 0.01, reward: 43.398, seudo-rew:0.0 max: 41.903079986572266 init-act: [24.398 33.503 24.445 42.26 ]
2025-07-18 07:10:48,724 - ADV - DEBUG - episode: 36199/50000, score: 0, e: 0.01, reward: 43.359, seudo-rew:0.0 max: 41.9812126159668 init-act: [24.494 33.665 24.524 42.341]
2025-07-18 07:10:57,045 - ADV - DEBUG - episode: 36299/50000, score: 0, e: 0.01, reward: 43.373, seudo-rew:0.0 max: 41.43276596069336 init-act: [24.607 33.844 24.624 42.444]
2025-07-18 07:11:05,857 - ADV - DEBUG - episode: 36399/50000, score: 0, e: 0.01, reward: 43.592, seudo-rew:0.0 max: 41.525272369384766 init-act: [24.703 33.994 24.701 42.53 ]
2025-07-18 07:11:14,250 - ADV - DEBUG - episode: 36499/50000, score: 0, e: 0.01, reward: 43.595, seudo-rew:0.0 max: 42.30775833129883 init-act: [24.781 34.142 24.791 42.648]
2025-07-18 07:11:22,955 - ADV - DEBUG - episode: 36599/50000, score: 0, e: 0.01, reward: 43.616, seudo-rew:0.0 max: 42.4238853454

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr1e-05\gpt\model-38000.keras


2025-07-18 07:13:31,825 - ADV - DEBUG - episode: 38099/50000, score: 0, e: 0.01, reward: 43.636, seudo-rew:0.0 max: 43.27439880371094 init-act: [26.357 36.4   26.429 44.304]
2025-07-18 07:13:40,164 - ADV - DEBUG - episode: 38199/50000, score: 0, e: 0.01, reward: 43.623, seudo-rew:0.0 max: 44.17454528808594 init-act: [26.444 36.509 26.522 44.387]
2025-07-18 07:13:48,559 - ADV - DEBUG - episode: 38299/50000, score: 0, e: 0.01, reward: 43.563, seudo-rew:0.0 max: 44.237098693847656 init-act: [26.503 36.627 26.615 44.45 ]
2025-07-18 07:13:56,841 - ADV - DEBUG - episode: 38399/50000, score: 0, e: 0.01, reward: 43.639, seudo-rew:0.0 max: 43.49306106567383 init-act: [26.592 36.713 26.672 44.505]
2025-07-18 07:14:05,154 - ADV - DEBUG - episode: 38499/50000, score: 0, e: 0.01, reward: 43.557, seudo-rew:0.0 max: 43.52928161621094 init-act: [26.669 36.803 26.751 44.555]
2025-07-18 07:14:13,511 - ADV - DEBUG - episode: 38599/50000, score: 0, e: 0.01, reward: 43.781, seudo-rew:0.0 max: 43.5875358581

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr1e-05\gpt\model-40000.keras


2025-07-18 07:16:22,468 - ADV - DEBUG - episode: 40099/50000, score: 0, e: 0.01, reward: 43.605, seudo-rew:0.0 max: 44.228759765625 init-act: [27.726 38.172 28.165 45.237]
2025-07-18 07:16:31,346 - ADV - DEBUG - episode: 40199/50000, score: 0, e: 0.01, reward: 43.426, seudo-rew:0.0 max: 45.016822814941406 init-act: [27.696 38.231 28.207 45.269]
2025-07-18 07:16:40,083 - ADV - DEBUG - episode: 40299/50000, score: 0, e: 0.01, reward: 43.674, seudo-rew:0.0 max: 44.27057647705078 init-act: [27.705 38.314 28.264 45.277]
2025-07-18 07:16:48,802 - ADV - DEBUG - episode: 40399/50000, score: 0, e: 0.01, reward: 43.581, seudo-rew:0.0 max: 44.27888870239258 init-act: [27.756 38.4   28.291 45.301]
2025-07-18 07:16:57,519 - ADV - DEBUG - episode: 40499/50000, score: 0, e: 0.01, reward: 43.726, seudo-rew:0.0 max: 44.3099250793457 init-act: [27.811 38.488 28.34  45.336]
2025-07-18 07:17:06,414 - ADV - DEBUG - episode: 40599/50000, score: 0, e: 0.01, reward: 43.629, seudo-rew:0.0 max: 44.3568649291992

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr1e-05\gpt\model-42000.keras


2025-07-18 07:19:14,187 - ADV - DEBUG - episode: 42099/50000, score: 0, e: 0.01, reward: 43.377, seudo-rew:0.0 max: 44.522193908691406 init-act: [28.045 39.365 28.94  45.531]
2025-07-18 07:19:22,492 - ADV - DEBUG - episode: 42199/50000, score: 0, e: 0.01, reward: 43.4, seudo-rew:0.0 max: 44.51219940185547 init-act: [28.037 39.395 28.92  45.523]
2025-07-18 07:19:30,733 - ADV - DEBUG - episode: 42299/50000, score: 0, e: 0.01, reward: 43.624, seudo-rew:0.0 max: 44.521358489990234 init-act: [28.031 39.434 28.91  45.512]
2025-07-18 07:19:39,187 - ADV - DEBUG - episode: 42399/50000, score: 0, e: 0.01, reward: 43.623, seudo-rew:0.0 max: 44.55093002319336 init-act: [28.172 39.476 28.906 45.558]
2025-07-18 07:19:47,542 - ADV - DEBUG - episode: 42499/50000, score: 0, e: 0.01, reward: 43.498, seudo-rew:0.0 max: 45.2570686340332 init-act: [28.34  39.495 28.981 45.581]
2025-07-18 07:19:55,833 - ADV - DEBUG - episode: 42599/50000, score: 0, e: 0.01, reward: 43.51, seudo-rew:0.0 max: 44.5945816040039

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr1e-05\gpt\model-44000.keras


2025-07-18 07:22:03,574 - ADV - DEBUG - episode: 44099/50000, score: 0, e: 0.01, reward: 43.33, seudo-rew:0.0 max: 44.532981872558594 init-act: [28.804 39.855 29.855 45.509]
2025-07-18 07:22:12,023 - ADV - DEBUG - episode: 44199/50000, score: 0, e: 0.01, reward: 43.206, seudo-rew:0.0 max: 45.0321159362793 init-act: [28.899 39.84  29.871 45.485]
2025-07-18 07:22:20,372 - ADV - DEBUG - episode: 44299/50000, score: 0, e: 0.01, reward: 43.571, seudo-rew:0.0 max: 44.48052215576172 init-act: [28.996 39.851 29.845 45.473]
2025-07-18 07:22:28,617 - ADV - DEBUG - episode: 44399/50000, score: 0, e: 0.01, reward: 43.641, seudo-rew:0.0 max: 44.442832946777344 init-act: [29.016 39.852 29.854 45.447]
2025-07-18 07:22:37,038 - ADV - DEBUG - episode: 44499/50000, score: 0, e: 0.01, reward: 43.681, seudo-rew:0.0 max: 44.45335006713867 init-act: [28.991 39.873 29.94  45.438]
2025-07-18 07:22:45,396 - ADV - DEBUG - episode: 44599/50000, score: 0, e: 0.01, reward: 43.587, seudo-rew:0.0 max: 44.43595886230

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr1e-05\gpt\model-46000.keras


2025-07-18 07:24:55,050 - ADV - DEBUG - episode: 46099/50000, score: 0, e: 0.01, reward: 43.371, seudo-rew:0.0 max: 44.6050910949707 init-act: [29.858 39.81  31.122 45.114]
2025-07-18 07:25:03,749 - ADV - DEBUG - episode: 46199/50000, score: 0, e: 0.01, reward: 43.492, seudo-rew:0.0 max: 44.09146499633789 init-act: [29.948 39.82  31.142 45.087]
2025-07-18 07:25:12,083 - ADV - DEBUG - episode: 46299/50000, score: 0, e: 0.01, reward: 43.732, seudo-rew:0.0 max: 44.51040267944336 init-act: [30.045 39.83  31.121 45.058]
2025-07-18 07:25:20,423 - ADV - DEBUG - episode: 46399/50000, score: 0, e: 0.01, reward: 43.729, seudo-rew:0.0 max: 44.03190231323242 init-act: [30.139 39.818 31.139 45.022]
2025-07-18 07:25:28,742 - ADV - DEBUG - episode: 46499/50000, score: 0, e: 0.01, reward: 43.679, seudo-rew:0.0 max: 44.47686004638672 init-act: [30.149 39.816 31.217 44.996]
2025-07-18 07:25:37,120 - ADV - DEBUG - episode: 46599/50000, score: 0, e: 0.01, reward: 43.69, seudo-rew:0.0 max: 43.9658432006835

dir crearted here -- trained_model\test_adv_RL_400000eps0.01lr1e-05\gpt\model-48000.keras


2025-07-18 07:27:43,206 - ADV - DEBUG - episode: 48099/50000, score: 0, e: 0.01, reward: 44.464, seudo-rew:0.0 max: 44.32093048095703 init-act: [31.019 40.219 32.094 44.936]
2025-07-18 07:27:51,501 - ADV - DEBUG - episode: 48199/50000, score: 0, e: 0.01, reward: 44.497, seudo-rew:0.0 max: 43.97494125366211 init-act: [31.058 40.238 32.192 44.972]
2025-07-18 07:27:59,919 - ADV - DEBUG - episode: 48299/50000, score: 0, e: 0.01, reward: 44.442, seudo-rew:0.0 max: 44.04121780395508 init-act: [31.147 40.273 32.209 45.028]
2025-07-18 07:28:08,340 - ADV - DEBUG - episode: 48399/50000, score: 0, e: 0.01, reward: 44.393, seudo-rew:0.0 max: 44.049198150634766 init-act: [31.215 40.308 32.238 45.04 ]
2025-07-18 07:28:16,754 - ADV - DEBUG - episode: 48499/50000, score: 0, e: 0.01, reward: 44.613, seudo-rew:0.0 max: 44.04066467285156 init-act: [31.275 40.317 32.252 45.012]
2025-07-18 07:28:25,091 - ADV - DEBUG - episode: 48599/50000, score: 0, e: 0.01, reward: 44.704, seudo-rew:0.0 max: 44.3248214721

400000 1e-05 0.1
📦 Training: buf=400000, lr=1e-05, eps=0.1 → trained_model\test_adv_RL_400000eps0.1lr1e-05\gpt


2025-07-18 07:30:21,846 - ADV - DEBUG - Trained model saved to: trained_model\test_adv_RL_400000eps0.1lr1e-05\gpt\model-0.keras


dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr1e-05\gpt\model-0.keras


2025-07-18 07:30:27,506 - ADV - DEBUG - episode: 99/50000, score: 0, e: 0.1, reward: 31.664, seudo-rew:0.0 max: 2.0592379570007324 init-act: [ 0.206  1.235  0.66  -0.32 ]
2025-07-18 07:30:33,059 - ADV - DEBUG - episode: 199/50000, score: 0, e: 0.1, reward: 32.297, seudo-rew:0.0 max: 2.3395776748657227 init-act: [ 0.242  1.367  0.697 -0.281]
2025-07-18 07:30:38,594 - ADV - DEBUG - episode: 299/50000, score: 0, e: 0.1, reward: 35.129, seudo-rew:0.0 max: 2.652740955352783 init-act: [ 0.285  1.528  0.73  -0.227]
2025-07-18 07:30:44,148 - ADV - DEBUG - episode: 399/50000, score: 0, e: 0.1, reward: 40.29, seudo-rew:0.0 max: 2.9799044132232666 init-act: [ 0.338  1.73   0.758 -0.159]
2025-07-18 07:30:52,714 - ADV - DEBUG - episode: 499/50000, score: 0, e: 0.1, reward: 40.363, seudo-rew:0.0 max: 3.184567451477051 init-act: [ 0.402  1.95   0.785 -0.077]
2025-07-18 07:31:01,047 - ADV - DEBUG - episode: 599/50000, score: 0, e: 0.1, reward: 40.118, seudo-rew:0.0 max: 3.4905686378479004 init-act: [0

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr1e-05\gpt\model-2000.keras


2025-07-18 07:33:05,817 - ADV - DEBUG - episode: 2099/50000, score: 0, e: 0.1, reward: 40.233, seudo-rew:0.0 max: 10.085363388061523 init-act: [2.356 6.967 2.507 2.482]
2025-07-18 07:33:14,086 - ADV - DEBUG - episode: 2199/50000, score: 0, e: 0.1, reward: 39.925, seudo-rew:0.0 max: 10.60920238494873 init-act: [2.526 7.329 2.678 2.697]
2025-07-18 07:33:22,425 - ADV - DEBUG - episode: 2299/50000, score: 0, e: 0.1, reward: 40.145, seudo-rew:0.0 max: 11.180815696716309 init-act: [2.701 7.692 2.87  2.928]
2025-07-18 07:33:30,737 - ADV - DEBUG - episode: 2399/50000, score: 0, e: 0.1, reward: 40.086, seudo-rew:0.0 max: 11.76456356048584 init-act: [2.886 8.061 3.068 3.174]
2025-07-18 07:33:38,996 - ADV - DEBUG - episode: 2499/50000, score: 0, e: 0.1, reward: 40.126, seudo-rew:0.0 max: 12.38392162322998 init-act: [3.065 8.437 3.257 3.412]
2025-07-18 07:33:47,341 - ADV - DEBUG - episode: 2599/50000, score: 0, e: 0.1, reward: 40.473, seudo-rew:0.0 max: 12.933910369873047 init-act: [3.25  8.817 3.

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr1e-05\gpt\model-4000.keras


2025-07-18 07:35:52,471 - ADV - DEBUG - episode: 4099/50000, score: 0, e: 0.1, reward: 42.554, seudo-rew:0.0 max: 23.02046012878418 init-act: [ 6.591 15.869  7.532  8.458]
2025-07-18 07:36:00,812 - ADV - DEBUG - episode: 4199/50000, score: 0, e: 0.1, reward: 43.136, seudo-rew:0.0 max: 23.868030548095703 init-act: [ 6.87  16.434  7.879  8.831]
2025-07-18 07:36:09,214 - ADV - DEBUG - episode: 4299/50000, score: 0, e: 0.1, reward: 43.251, seudo-rew:0.0 max: 24.59576416015625 init-act: [ 7.205 17.011  8.244  9.24 ]
2025-07-18 07:36:17,868 - ADV - DEBUG - episode: 4399/50000, score: 0, e: 0.1, reward: 42.886, seudo-rew:0.0 max: 25.432756423950195 init-act: [ 7.511 17.554  8.589  9.667]
2025-07-18 07:36:26,126 - ADV - DEBUG - episode: 4499/50000, score: 0, e: 0.1, reward: 43.152, seudo-rew:0.0 max: 26.199129104614258 init-act: [ 7.825 18.128  8.947 10.118]
2025-07-18 07:36:34,489 - ADV - DEBUG - episode: 4599/50000, score: 0, e: 0.1, reward: 43.134, seudo-rew:0.0 max: 26.98807144165039 init-

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr1e-05\gpt\model-6000.keras


2025-07-18 07:38:39,927 - ADV - DEBUG - episode: 6099/50000, score: 0, e: 0.1, reward: 41.641, seudo-rew:0.0 max: 39.11354064941406 init-act: [14.16  27.795 15.388 18.654]
2025-07-18 07:38:48,147 - ADV - DEBUG - episode: 6199/50000, score: 0, e: 0.1, reward: 41.834, seudo-rew:0.0 max: 39.84392166137695 init-act: [14.621 28.41  15.815 19.236]
2025-07-18 07:38:56,572 - ADV - DEBUG - episode: 6299/50000, score: 0, e: 0.1, reward: 42.164, seudo-rew:0.0 max: 40.67835235595703 init-act: [15.05  29.064 16.253 19.84 ]
2025-07-18 07:39:04,956 - ADV - DEBUG - episode: 6399/50000, score: 0, e: 0.1, reward: 41.849, seudo-rew:0.0 max: 41.55392837524414 init-act: [15.467 29.715 16.669 20.436]
2025-07-18 07:39:13,313 - ADV - DEBUG - episode: 6499/50000, score: 0, e: 0.1, reward: 41.861, seudo-rew:0.0 max: 42.26980209350586 init-act: [15.897 30.367 17.097 21.029]
2025-07-18 07:39:21,552 - ADV - DEBUG - episode: 6599/50000, score: 0, e: 0.1, reward: 42.068, seudo-rew:0.0 max: 43.21829605102539 init-act

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr1e-05\gpt\model-8000.keras


2025-07-18 07:41:27,307 - ADV - DEBUG - episode: 8099/50000, score: 0, e: 0.1, reward: 41.897, seudo-rew:0.0 max: 55.836700439453125 init-act: [23.607 41.706 24.615 30.525]
2025-07-18 07:41:35,669 - ADV - DEBUG - episode: 8199/50000, score: 0, e: 0.1, reward: 41.873, seudo-rew:0.0 max: 56.6780891418457 init-act: [24.102 42.465 25.145 31.144]
2025-07-18 07:41:43,938 - ADV - DEBUG - episode: 8299/50000, score: 0, e: 0.1, reward: 41.739, seudo-rew:0.0 max: 57.679019927978516 init-act: [24.587 43.215 25.701 31.775]
2025-07-18 07:41:52,273 - ADV - DEBUG - episode: 8399/50000, score: 0, e: 0.1, reward: 41.757, seudo-rew:0.0 max: 58.53181457519531 init-act: [25.014 43.981 26.23  32.397]
2025-07-18 07:42:00,646 - ADV - DEBUG - episode: 8499/50000, score: 0, e: 0.1, reward: 41.726, seudo-rew:0.0 max: 59.453495025634766 init-act: [25.477 44.702 26.738 33.012]
2025-07-18 07:42:09,007 - ADV - DEBUG - episode: 8599/50000, score: 0, e: 0.1, reward: 41.881, seudo-rew:0.0 max: 60.32792282104492 init-a

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr1e-05\gpt\model-10000.keras


2025-07-18 07:44:14,651 - ADV - DEBUG - episode: 10099/50000, score: 0, e: 0.1, reward: 41.521, seudo-rew:0.0 max: 73.02659606933594 init-act: [33.578 56.544 34.173 42.75 ]
2025-07-18 07:44:22,952 - ADV - DEBUG - episode: 10199/50000, score: 0, e: 0.1, reward: 41.678, seudo-rew:0.0 max: 73.81163787841797 init-act: [34.006 57.228 34.666 43.292]
2025-07-18 07:44:31,238 - ADV - DEBUG - episode: 10299/50000, score: 0, e: 0.1, reward: 41.323, seudo-rew:0.0 max: 74.67267608642578 init-act: [34.441 58.03  35.244 43.98 ]
2025-07-18 07:44:39,529 - ADV - DEBUG - episode: 10399/50000, score: 0, e: 0.1, reward: 41.834, seudo-rew:0.0 max: 75.49594116210938 init-act: [34.878 58.749 35.666 44.549]
2025-07-18 07:44:47,725 - ADV - DEBUG - episode: 10499/50000, score: 0, e: 0.1, reward: 41.573, seudo-rew:0.0 max: 76.3237075805664 init-act: [35.32  59.473 36.049 45.136]
2025-07-18 07:44:56,042 - ADV - DEBUG - episode: 10599/50000, score: 0, e: 0.1, reward: 41.578, seudo-rew:0.0 max: 77.10343170166016 ini

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr1e-05\gpt\model-12000.keras


2025-07-18 07:47:01,408 - ADV - DEBUG - episode: 12099/50000, score: 0, e: 0.1, reward: 41.555, seudo-rew:0.0 max: 87.81573486328125 init-act: [42.208 70.485 42.224 54.144]
2025-07-18 07:47:09,758 - ADV - DEBUG - episode: 12199/50000, score: 0, e: 0.1, reward: 41.786, seudo-rew:0.0 max: 88.49530029296875 init-act: [42.563 71.149 42.612 54.694]
2025-07-18 07:47:18,047 - ADV - DEBUG - episode: 12299/50000, score: 0, e: 0.1, reward: 41.765, seudo-rew:0.0 max: 89.14437866210938 init-act: [42.912 71.837 43.052 55.28 ]
2025-07-18 07:47:26,394 - ADV - DEBUG - episode: 12399/50000, score: 0, e: 0.1, reward: 41.681, seudo-rew:0.0 max: 89.79666900634766 init-act: [43.187 72.497 43.481 55.931]
2025-07-18 07:47:34,805 - ADV - DEBUG - episode: 12499/50000, score: 0, e: 0.1, reward: 41.856, seudo-rew:0.0 max: 90.44132995605469 init-act: [43.517 73.164 43.801 56.414]
2025-07-18 07:47:43,160 - ADV - DEBUG - episode: 12599/50000, score: 0, e: 0.1, reward: 41.855, seudo-rew:0.0 max: 91.15557098388672 in

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr1e-05\gpt\model-14000.keras


2025-07-18 07:49:48,706 - ADV - DEBUG - episode: 14099/50000, score: 0, e: 0.1, reward: 41.655, seudo-rew:0.0 max: 99.93012237548828 init-act: [48.644 83.115 48.423 64.536]
2025-07-18 07:49:57,154 - ADV - DEBUG - episode: 14199/50000, score: 0, e: 0.1, reward: 41.711, seudo-rew:0.0 max: 100.4182357788086 init-act: [48.901 83.74  48.712 65.004]
2025-07-18 07:50:05,451 - ADV - DEBUG - episode: 14299/50000, score: 0, e: 0.1, reward: 41.774, seudo-rew:0.0 max: 100.91165924072266 init-act: [49.16  84.33  49.004 65.457]
2025-07-18 07:50:13,735 - ADV - DEBUG - episode: 14399/50000, score: 0, e: 0.1, reward: 41.496, seudo-rew:0.0 max: 101.37165069580078 init-act: [49.343 84.864 49.28  65.879]
2025-07-18 07:50:22,034 - ADV - DEBUG - episode: 14499/50000, score: 0, e: 0.1, reward: 41.897, seudo-rew:0.0 max: 101.82431030273438 init-act: [49.539 85.38  49.55  66.382]
2025-07-18 07:50:30,438 - ADV - DEBUG - episode: 14599/50000, score: 0, e: 0.1, reward: 41.61, seudo-rew:0.0 max: 102.21038818359375

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr1e-05\gpt\model-16000.keras


2025-07-18 07:52:35,711 - ADV - DEBUG - episode: 16099/50000, score: 0, e: 0.1, reward: 41.657, seudo-rew:0.0 max: 107.15565490722656 init-act: [52.489 92.943 52.63  73.029]
2025-07-18 07:52:44,100 - ADV - DEBUG - episode: 16199/50000, score: 0, e: 0.1, reward: 41.954, seudo-rew:0.0 max: 107.39710235595703 init-act: [52.71  93.479 52.806 73.45 ]
2025-07-18 07:52:52,387 - ADV - DEBUG - episode: 16299/50000, score: 0, e: 0.1, reward: 41.555, seudo-rew:0.0 max: 107.77392578125 init-act: [52.82  93.836 52.85  73.706]
2025-07-18 07:53:00,695 - ADV - DEBUG - episode: 16399/50000, score: 0, e: 0.1, reward: 41.533, seudo-rew:0.0 max: 107.90374755859375 init-act: [52.954 94.109 52.913 73.946]
2025-07-18 07:53:09,107 - ADV - DEBUG - episode: 16499/50000, score: 0, e: 0.1, reward: 41.399, seudo-rew:0.0 max: 107.9337387084961 init-act: [53.035 94.445 52.941 74.214]
2025-07-18 07:53:17,433 - ADV - DEBUG - episode: 16599/50000, score: 0, e: 0.1, reward: 41.675, seudo-rew:0.0 max: 108.07586669921875 

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr1e-05\gpt\model-18000.keras


2025-07-18 07:55:22,184 - ADV - DEBUG - episode: 18099/50000, score: 0, e: 0.1, reward: 41.892, seudo-rew:0.0 max: 108.65359497070312 init-act: [53.508 98.148 53.526 77.068]
2025-07-18 07:55:30,497 - ADV - DEBUG - episode: 18199/50000, score: 0, e: 0.1, reward: 41.83, seudo-rew:0.0 max: 108.65540313720703 init-act: [53.516 98.352 53.543 77.177]
2025-07-18 07:55:38,924 - ADV - DEBUG - episode: 18299/50000, score: 0, e: 0.1, reward: 41.468, seudo-rew:0.0 max: 108.64132690429688 init-act: [53.544 98.446 53.419 77.227]
2025-07-18 07:55:47,323 - ADV - DEBUG - episode: 18399/50000, score: 0, e: 0.1, reward: 41.639, seudo-rew:0.0 max: 108.52461242675781 init-act: [53.479 98.529 53.325 77.2  ]
2025-07-18 07:55:55,960 - ADV - DEBUG - episode: 18499/50000, score: 0, e: 0.1, reward: 41.719, seudo-rew:0.0 max: 108.43684387207031 init-act: [53.378 98.493 53.206 77.113]
2025-07-18 07:56:04,296 - ADV - DEBUG - episode: 18599/50000, score: 0, e: 0.1, reward: 41.921, seudo-rew:0.0 max: 108.154266357421

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr1e-05\gpt\model-20000.keras


2025-07-18 07:58:09,420 - ADV - DEBUG - episode: 20099/50000, score: 0, e: 0.1, reward: 41.893, seudo-rew:0.0 max: 105.3828125 init-act: [51.944 98.835 51.993 76.883]
2025-07-18 07:58:17,826 - ADV - DEBUG - episode: 20199/50000, score: 0, e: 0.1, reward: 41.416, seudo-rew:0.0 max: 105.18010711669922 init-act: [51.879 98.719 51.886 76.984]
2025-07-18 07:58:26,098 - ADV - DEBUG - episode: 20299/50000, score: 0, e: 0.1, reward: 41.937, seudo-rew:0.0 max: 104.88825225830078 init-act: [51.751 98.611 51.772 77.008]
2025-07-18 07:58:34,449 - ADV - DEBUG - episode: 20399/50000, score: 0, e: 0.1, reward: 41.803, seudo-rew:0.0 max: 104.64103698730469 init-act: [51.613 98.492 51.694 76.852]
2025-07-18 07:58:42,774 - ADV - DEBUG - episode: 20499/50000, score: 0, e: 0.1, reward: 41.761, seudo-rew:0.0 max: 104.3676528930664 init-act: [51.469 98.443 51.551 76.419]
2025-07-18 07:58:51,144 - ADV - DEBUG - episode: 20599/50000, score: 0, e: 0.1, reward: 41.8, seudo-rew:0.0 max: 104.09695434570312 init-a

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr1e-05\gpt\model-22000.keras


2025-07-18 08:00:56,493 - ADV - DEBUG - episode: 22099/50000, score: 0, e: 0.1, reward: 41.324, seudo-rew:0.0 max: 99.97576904296875 init-act: [49.394 96.182 49.254 73.286]
2025-07-18 08:01:04,942 - ADV - DEBUG - episode: 22199/50000, score: 0, e: 0.1, reward: 41.723, seudo-rew:0.0 max: 99.42671966552734 init-act: [49.34  95.835 49.199 73.612]
2025-07-18 08:01:13,398 - ADV - DEBUG - episode: 22299/50000, score: 0, e: 0.1, reward: 41.291, seudo-rew:0.0 max: 98.84009552001953 init-act: [49.138 95.56  49.059 73.562]
2025-07-18 08:01:21,684 - ADV - DEBUG - episode: 22399/50000, score: 0, e: 0.1, reward: 41.176, seudo-rew:0.0 max: 98.42825317382812 init-act: [48.927 95.26  48.75  72.997]
2025-07-18 08:01:30,050 - ADV - DEBUG - episode: 22499/50000, score: 0, e: 0.1, reward: 41.864, seudo-rew:0.0 max: 98.03968811035156 init-act: [48.727 95.071 48.513 72.487]
2025-07-18 08:01:38,378 - ADV - DEBUG - episode: 22599/50000, score: 0, e: 0.1, reward: 41.323, seudo-rew:0.0 max: 97.7618637084961 ini

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr1e-05\gpt\model-24000.keras


2025-07-18 08:03:43,061 - ADV - DEBUG - episode: 24099/50000, score: 0, e: 0.1, reward: 40.91, seudo-rew:0.0 max: 91.73988342285156 init-act: [45.793 90.124 45.73  67.806]
2025-07-18 08:03:51,308 - ADV - DEBUG - episode: 24199/50000, score: 0, e: 0.1, reward: 41.344, seudo-rew:0.0 max: 91.29920196533203 init-act: [45.781 89.727 45.609 67.949]
2025-07-18 08:03:59,615 - ADV - DEBUG - episode: 24299/50000, score: 0, e: 0.1, reward: 41.291, seudo-rew:0.0 max: 90.74469757080078 init-act: [45.691 89.366 45.465 67.95 ]
2025-07-18 08:04:07,976 - ADV - DEBUG - episode: 24399/50000, score: 0, e: 0.1, reward: 40.66, seudo-rew:0.0 max: 90.1936264038086 init-act: [45.556 89.054 45.31  67.63 ]
2025-07-18 08:04:16,324 - ADV - DEBUG - episode: 24499/50000, score: 0, e: 0.1, reward: 40.785, seudo-rew:0.0 max: 89.77590942382812 init-act: [45.375 88.727 45.067 66.961]
2025-07-18 08:04:24,604 - ADV - DEBUG - episode: 24599/50000, score: 0, e: 0.1, reward: 40.818, seudo-rew:0.0 max: 89.2985610961914 init-a

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr1e-05\gpt\model-26000.keras


2025-07-18 08:06:29,560 - ADV - DEBUG - episode: 26099/50000, score: 0, e: 0.1, reward: 41.32, seudo-rew:0.0 max: 82.49889373779297 init-act: [43.19  82.534 42.66  61.263]
2025-07-18 08:06:38,130 - ADV - DEBUG - episode: 26199/50000, score: 0, e: 0.1, reward: 40.867, seudo-rew:0.0 max: 82.12445831298828 init-act: [43.118 82.104 42.57  61.156]
2025-07-18 08:06:46,676 - ADV - DEBUG - episode: 26299/50000, score: 0, e: 0.1, reward: 40.806, seudo-rew:0.0 max: 81.6567153930664 init-act: [43.101 81.662 42.495 61.289]
2025-07-18 08:06:55,044 - ADV - DEBUG - episode: 26399/50000, score: 0, e: 0.1, reward: 40.965, seudo-rew:0.0 max: 81.15472412109375 init-act: [42.988 81.262 42.422 61.308]
2025-07-18 08:07:03,410 - ADV - DEBUG - episode: 26499/50000, score: 0, e: 0.1, reward: 40.856, seudo-rew:0.0 max: 80.73175811767578 init-act: [42.798 80.89  42.347 60.996]
2025-07-18 08:07:11,677 - ADV - DEBUG - episode: 26599/50000, score: 0, e: 0.1, reward: 40.868, seudo-rew:0.0 max: 80.29803466796875 init

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr1e-05\gpt\model-28000.keras


2025-07-18 08:09:16,586 - ADV - DEBUG - episode: 28099/50000, score: 0, e: 0.1, reward: 40.78, seudo-rew:0.0 max: 73.76615142822266 init-act: [41.57  74.667 40.918 56.301]
2025-07-18 08:09:24,951 - ADV - DEBUG - episode: 28199/50000, score: 0, e: 0.1, reward: 40.596, seudo-rew:0.0 max: 73.39674377441406 init-act: [41.449 74.326 40.792 55.753]
2025-07-18 08:09:33,363 - ADV - DEBUG - episode: 28299/50000, score: 0, e: 0.1, reward: 40.882, seudo-rew:0.0 max: 73.03160858154297 init-act: [41.425 73.946 40.703 55.473]
2025-07-18 08:09:41,729 - ADV - DEBUG - episode: 28399/50000, score: 0, e: 0.1, reward: 40.628, seudo-rew:0.0 max: 72.65169525146484 init-act: [41.436 73.581 40.661 55.418]
2025-07-18 08:09:50,100 - ADV - DEBUG - episode: 28499/50000, score: 0, e: 0.1, reward: 41.002, seudo-rew:0.0 max: 72.21603393554688 init-act: [41.435 73.146 40.567 55.394]
2025-07-18 08:09:58,394 - ADV - DEBUG - episode: 28599/50000, score: 0, e: 0.1, reward: 40.768, seudo-rew:0.0 max: 71.70128631591797 ini

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr1e-05\gpt\model-30000.keras


2025-07-18 08:12:04,304 - ADV - DEBUG - episode: 30099/50000, score: 0, e: 0.1, reward: 40.269, seudo-rew:0.0 max: 65.25025939941406 init-act: [40.268 66.722 39.316 52.035]
2025-07-18 08:12:12,537 - ADV - DEBUG - episode: 30199/50000, score: 0, e: 0.1, reward: 40.266, seudo-rew:0.0 max: 64.8661117553711 init-act: [40.127 66.403 39.336 51.961]
2025-07-18 08:12:20,788 - ADV - DEBUG - episode: 30299/50000, score: 0, e: 0.1, reward: 39.916, seudo-rew:0.0 max: 64.54029083251953 init-act: [40.059 66.032 39.35  51.825]
2025-07-18 08:12:29,148 - ADV - DEBUG - episode: 30399/50000, score: 0, e: 0.1, reward: 40.212, seudo-rew:0.0 max: 64.1426010131836 init-act: [40.058 65.624 39.181 51.633]
2025-07-18 08:12:37,492 - ADV - DEBUG - episode: 30499/50000, score: 0, e: 0.1, reward: 39.823, seudo-rew:0.0 max: 63.753414154052734 init-act: [40.139 65.153 38.996 51.46 ]
2025-07-18 08:12:46,074 - ADV - DEBUG - episode: 30599/50000, score: 0, e: 0.1, reward: 39.84, seudo-rew:0.0 max: 63.35025405883789 init

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr1e-05\gpt\model-32000.keras


2025-07-18 08:14:54,330 - ADV - DEBUG - episode: 32099/50000, score: 0, e: 0.1, reward: 38.917, seudo-rew:0.0 max: 58.11317443847656 init-act: [38.818 59.148 38.204 49.89 ]
2025-07-18 08:15:02,631 - ADV - DEBUG - episode: 32199/50000, score: 0, e: 0.1, reward: 38.367, seudo-rew:0.0 max: 57.784183502197266 init-act: [38.65  58.804 38.28  49.832]
2025-07-18 08:15:11,040 - ADV - DEBUG - episode: 32299/50000, score: 0, e: 0.1, reward: 38.031, seudo-rew:0.0 max: 57.483612060546875 init-act: [38.635 58.496 38.176 49.75 ]
2025-07-18 08:15:19,443 - ADV - DEBUG - episode: 32399/50000, score: 0, e: 0.1, reward: 38.03, seudo-rew:0.0 max: 57.17732238769531 init-act: [38.749 58.184 38.012 49.641]
2025-07-18 08:15:27,804 - ADV - DEBUG - episode: 32499/50000, score: 0, e: 0.1, reward: 38.019, seudo-rew:0.0 max: 56.87187194824219 init-act: [38.955 57.851 37.862 49.476]
2025-07-18 08:15:36,136 - ADV - DEBUG - episode: 32599/50000, score: 0, e: 0.1, reward: 38.243, seudo-rew:0.0 max: 56.59978485107422 i

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr1e-05\gpt\model-34000.keras


2025-07-18 08:17:41,602 - ADV - DEBUG - episode: 34099/50000, score: 0, e: 0.1, reward: 36.524, seudo-rew:0.0 max: 52.295188903808594 init-act: [38.741 53.039 37.766 47.031]
2025-07-18 08:17:49,967 - ADV - DEBUG - episode: 34199/50000, score: 0, e: 0.1, reward: 36.326, seudo-rew:0.0 max: 51.97627639770508 init-act: [38.613 52.773 37.816 46.868]
2025-07-18 08:17:58,282 - ADV - DEBUG - episode: 34299/50000, score: 0, e: 0.1, reward: 36.039, seudo-rew:0.0 max: 51.69534683227539 init-act: [38.492 52.56  37.87  46.735]
2025-07-18 08:18:06,835 - ADV - DEBUG - episode: 34399/50000, score: 0, e: 0.1, reward: 35.729, seudo-rew:0.0 max: 51.428932189941406 init-act: [38.389 52.298 37.822 46.586]
2025-07-18 08:18:15,332 - ADV - DEBUG - episode: 34499/50000, score: 0, e: 0.1, reward: 35.584, seudo-rew:0.0 max: 51.10685729980469 init-act: [38.319 51.968 37.669 46.385]
2025-07-18 08:18:24,051 - ADV - DEBUG - episode: 34599/50000, score: 0, e: 0.1, reward: 36.071, seudo-rew:0.0 max: 50.78716278076172 

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr1e-05\gpt\model-36000.keras


2025-07-18 08:20:28,751 - ADV - DEBUG - episode: 36099/50000, score: 0, e: 0.1, reward: 32.543, seudo-rew:0.0 max: 45.86305236816406 init-act: [36.549 46.89  36.25  43.372]
2025-07-18 08:20:37,000 - ADV - DEBUG - episode: 36199/50000, score: 0, e: 0.1, reward: 32.234, seudo-rew:0.0 max: 45.53718566894531 init-act: [36.414 46.586 36.09  43.176]
2025-07-18 08:20:45,350 - ADV - DEBUG - episode: 36299/50000, score: 0, e: 0.1, reward: 32.187, seudo-rew:0.0 max: 45.223873138427734 init-act: [36.267 46.293 35.934 42.961]
2025-07-18 08:20:53,955 - ADV - DEBUG - episode: 36399/50000, score: 0, e: 0.1, reward: 32.063, seudo-rew:0.0 max: 44.899253845214844 init-act: [36.153 45.944 35.795 42.748]
2025-07-18 08:21:02,534 - ADV - DEBUG - episode: 36499/50000, score: 0, e: 0.1, reward: 31.879, seudo-rew:0.0 max: 44.5406379699707 init-act: [35.981 45.59  35.591 42.505]
2025-07-18 08:21:10,895 - ADV - DEBUG - episode: 36599/50000, score: 0, e: 0.1, reward: 31.358, seudo-rew:0.0 max: 44.1915397644043 in

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr1e-05\gpt\model-38000.keras


2025-07-18 08:23:16,491 - ADV - DEBUG - episode: 38099/50000, score: 0, e: 0.1, reward: 30.004, seudo-rew:0.0 max: 39.9923210144043 init-act: [33.561 40.96  33.363 39.151]
2025-07-18 08:23:24,808 - ADV - DEBUG - episode: 38199/50000, score: 0, e: 0.1, reward: 29.793, seudo-rew:0.0 max: 39.73890686035156 init-act: [33.416 40.715 33.209 38.969]
2025-07-18 08:23:33,118 - ADV - DEBUG - episode: 38299/50000, score: 0, e: 0.1, reward: 29.835, seudo-rew:0.0 max: 39.4803581237793 init-act: [33.279 40.428 33.059 38.727]
2025-07-18 08:23:41,556 - ADV - DEBUG - episode: 38399/50000, score: 0, e: 0.1, reward: 29.664, seudo-rew:0.0 max: 39.19677734375 init-act: [33.127 40.117 32.908 38.5  ]
2025-07-18 08:23:49,933 - ADV - DEBUG - episode: 38499/50000, score: 0, e: 0.1, reward: 29.324, seudo-rew:0.0 max: 38.942970275878906 init-act: [32.984 39.879 32.795 38.318]
2025-07-18 08:23:58,207 - ADV - DEBUG - episode: 38599/50000, score: 0, e: 0.1, reward: 29.503, seudo-rew:0.0 max: 38.690025329589844 init-

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr1e-05\gpt\model-40000.keras


2025-07-18 08:26:04,109 - ADV - DEBUG - episode: 40099/50000, score: 0, e: 0.1, reward: 29.207, seudo-rew:0.0 max: 35.34774398803711 init-act: [30.924 36.375 30.738 35.468]
2025-07-18 08:26:12,549 - ADV - DEBUG - episode: 40199/50000, score: 0, e: 0.1, reward: 29.397, seudo-rew:0.0 max: 35.19580841064453 init-act: [30.841 36.203 30.652 35.316]
2025-07-18 08:26:20,933 - ADV - DEBUG - episode: 40299/50000, score: 0, e: 0.1, reward: 28.233, seudo-rew:0.0 max: 34.997154235839844 init-act: [30.699 35.979 30.531 35.147]
2025-07-18 08:26:29,281 - ADV - DEBUG - episode: 40399/50000, score: 0, e: 0.1, reward: 28.969, seudo-rew:0.0 max: 34.83715057373047 init-act: [30.604 35.857 30.427 35.031]
2025-07-18 08:26:37,776 - ADV - DEBUG - episode: 40499/50000, score: 0, e: 0.1, reward: 29.525, seudo-rew:0.0 max: 34.72731018066406 init-act: [30.541 35.609 30.326 34.91 ]
2025-07-18 08:26:46,164 - ADV - DEBUG - episode: 40599/50000, score: 0, e: 0.1, reward: 29.325, seudo-rew:0.0 max: 34.53420639038086 i

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr1e-05\gpt\model-42000.keras


2025-07-18 08:28:51,322 - ADV - DEBUG - episode: 42099/50000, score: 0, e: 0.1, reward: 27.921, seudo-rew:0.0 max: 32.23846435546875 init-act: [28.643 33.124 28.484 32.918]
2025-07-18 08:28:59,596 - ADV - DEBUG - episode: 42199/50000, score: 0, e: 0.1, reward: 27.572, seudo-rew:0.0 max: 32.11647033691406 init-act: [28.484 32.977 28.371 32.805]
2025-07-18 08:29:07,931 - ADV - DEBUG - episode: 42299/50000, score: 0, e: 0.1, reward: 27.612, seudo-rew:0.0 max: 31.965635299682617 init-act: [28.359 32.873 28.26  32.639]
2025-07-18 08:29:16,254 - ADV - DEBUG - episode: 42399/50000, score: 0, e: 0.1, reward: 27.686, seudo-rew:0.0 max: 31.760622024536133 init-act: [28.212 32.646 28.133 32.45 ]
2025-07-18 08:29:24,541 - ADV - DEBUG - episode: 42499/50000, score: 0, e: 0.1, reward: 27.526, seudo-rew:0.0 max: 31.617549896240234 init-act: [28.058 32.504 28.051 32.325]
2025-07-18 08:29:32,844 - ADV - DEBUG - episode: 42599/50000, score: 0, e: 0.1, reward: 27.482, seudo-rew:0.0 max: 31.51567459106445

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr1e-05\gpt\model-44000.keras


2025-07-18 08:31:38,607 - ADV - DEBUG - episode: 44099/50000, score: 0, e: 0.1, reward: 27.407, seudo-rew:0.0 max: 30.092424392700195 init-act: [26.564 31.043 26.705 30.937]
2025-07-18 08:31:46,912 - ADV - DEBUG - episode: 44199/50000, score: 0, e: 0.1, reward: 27.434, seudo-rew:0.0 max: 30.011207580566406 init-act: [26.478 30.872 26.609 30.84 ]
2025-07-18 08:31:55,167 - ADV - DEBUG - episode: 44299/50000, score: 0, e: 0.1, reward: 27.182, seudo-rew:0.0 max: 29.891504287719727 init-act: [26.404 30.821 26.521 30.767]
2025-07-18 08:32:03,428 - ADV - DEBUG - episode: 44399/50000, score: 0, e: 0.1, reward: 26.913, seudo-rew:0.0 max: 29.800500869750977 init-act: [26.29  30.716 26.453 30.646]
2025-07-18 08:32:11,785 - ADV - DEBUG - episode: 44499/50000, score: 0, e: 0.1, reward: 27.26, seudo-rew:0.0 max: 29.644081115722656 init-act: [26.207 30.555 26.352 30.519]
2025-07-18 08:32:20,116 - ADV - DEBUG - episode: 44599/50000, score: 0, e: 0.1, reward: 27.135, seudo-rew:0.0 max: 29.5176448822021

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr1e-05\gpt\model-46000.keras


2025-07-18 08:34:24,994 - ADV - DEBUG - episode: 46099/50000, score: 0, e: 0.1, reward: 26.825, seudo-rew:0.0 max: 27.95145034790039 init-act: [25.082 28.918 25.277 28.785]
2025-07-18 08:34:33,258 - ADV - DEBUG - episode: 46199/50000, score: 0, e: 0.1, reward: 26.708, seudo-rew:0.0 max: 27.876556396484375 init-act: [25.047 28.781 25.23  28.711]
2025-07-18 08:34:41,573 - ADV - DEBUG - episode: 46299/50000, score: 0, e: 0.1, reward: 26.881, seudo-rew:0.0 max: 27.793455123901367 init-act: [24.991 28.699 25.213 28.674]
2025-07-18 08:34:49,933 - ADV - DEBUG - episode: 46399/50000, score: 0, e: 0.1, reward: 26.775, seudo-rew:0.0 max: 27.742473602294922 init-act: [24.913 28.58  25.132 28.605]
2025-07-18 08:34:58,292 - ADV - DEBUG - episode: 46499/50000, score: 0, e: 0.1, reward: 26.472, seudo-rew:0.0 max: 27.671520233154297 init-act: [24.873 28.587 25.114 28.517]
2025-07-18 08:35:06,968 - ADV - DEBUG - episode: 46599/50000, score: 0, e: 0.1, reward: 26.599, seudo-rew:0.0 max: 27.5693855285644

dir crearted here -- trained_model\test_adv_RL_400000eps0.1lr1e-05\gpt\model-48000.keras


2025-07-18 08:37:12,399 - ADV - DEBUG - episode: 48099/50000, score: 0, e: 0.1, reward: 26.626, seudo-rew:0.0 max: 26.830278396606445 init-act: [24.252 27.775 24.365 27.686]
2025-07-18 08:37:20,832 - ADV - DEBUG - episode: 48199/50000, score: 0, e: 0.1, reward: 26.592, seudo-rew:0.0 max: 26.759246826171875 init-act: [24.205 27.662 24.316 27.631]
2025-07-18 08:37:29,156 - ADV - DEBUG - episode: 48299/50000, score: 0, e: 0.1, reward: 26.726, seudo-rew:0.0 max: 26.71950912475586 init-act: [24.193 27.66  24.298 27.587]
2025-07-18 08:37:37,476 - ADV - DEBUG - episode: 48399/50000, score: 0, e: 0.1, reward: 26.514, seudo-rew:0.0 max: 26.67494010925293 init-act: [24.175 27.593 24.27  27.565]
2025-07-18 08:37:45,924 - ADV - DEBUG - episode: 48499/50000, score: 0, e: 0.1, reward: 26.378, seudo-rew:0.0 max: 26.67240333557129 init-act: [24.166 27.584 24.259 27.533]
2025-07-18 08:37:54,316 - ADV - DEBUG - episode: 48599/50000, score: 0, e: 0.1, reward: 26.566, seudo-rew:0.0 max: 26.623441696166992

400000 1e-05 0.2
📦 Training: buf=400000, lr=1e-05, eps=0.2 → trained_model\test_adv_RL_400000eps0.2lr1e-05\gpt


2025-07-18 08:39:55,618 - ADV - DEBUG - Trained model saved to: trained_model\test_adv_RL_400000eps0.2lr1e-05\gpt\model-0.keras


dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr1e-05\gpt\model-0.keras


2025-07-18 08:40:01,271 - ADV - DEBUG - episode: 99/50000, score: 0, e: 0.2, reward: 20.355, seudo-rew:0.0 max: 2.0169894695281982 init-act: [ 0.185 -0.411  1.108 -0.103]
2025-07-18 08:40:06,806 - ADV - DEBUG - episode: 199/50000, score: 0, e: 0.2, reward: 20.599, seudo-rew:0.0 max: 2.2368104457855225 init-act: [ 0.204 -0.352  1.194 -0.044]
2025-07-18 08:40:12,605 - ADV - DEBUG - episode: 299/50000, score: 0, e: 0.2, reward: 20.255, seudo-rew:0.0 max: 2.341297149658203 init-act: [ 0.234 -0.284  1.288  0.016]
2025-07-18 08:40:18,212 - ADV - DEBUG - episode: 399/50000, score: 0, e: 0.2, reward: 20.176, seudo-rew:0.0 max: 2.542210578918457 init-act: [ 0.269 -0.219  1.395  0.086]
2025-07-18 08:40:26,683 - ADV - DEBUG - episode: 499/50000, score: 0, e: 0.2, reward: 20.391, seudo-rew:0.0 max: 2.741539478302002 init-act: [ 0.3   -0.155  1.498  0.162]
2025-07-18 08:40:35,081 - ADV - DEBUG - episode: 599/50000, score: 0, e: 0.2, reward: 20.476, seudo-rew:0.0 max: 2.8266780376434326 init-act: [ 

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr1e-05\gpt\model-2000.keras


2025-07-18 08:42:40,200 - ADV - DEBUG - episode: 2099/50000, score: 0, e: 0.2, reward: 20.631, seudo-rew:0.0 max: 6.113882541656494 init-act: [1.181 1.167 3.836 1.716]
2025-07-18 08:42:48,880 - ADV - DEBUG - episode: 2199/50000, score: 0, e: 0.2, reward: 20.708, seudo-rew:0.0 max: 6.185186386108398 init-act: [1.262 1.273 4.017 1.861]
2025-07-18 08:42:57,229 - ADV - DEBUG - episode: 2299/50000, score: 0, e: 0.2, reward: 20.758, seudo-rew:0.0 max: 6.550512790679932 init-act: [1.369 1.391 4.198 2.014]
2025-07-18 08:43:05,562 - ADV - DEBUG - episode: 2399/50000, score: 0, e: 0.2, reward: 20.55, seudo-rew:0.0 max: 6.709402084350586 init-act: [1.472 1.515 4.392 2.16 ]
2025-07-18 08:43:13,906 - ADV - DEBUG - episode: 2499/50000, score: 0, e: 0.2, reward: 20.64, seudo-rew:0.0 max: 7.072977066040039 init-act: [1.577 1.647 4.584 2.312]
2025-07-18 08:43:22,315 - ADV - DEBUG - episode: 2599/50000, score: 0, e: 0.2, reward: 20.775, seudo-rew:0.0 max: 7.2325215339660645 init-act: [1.685 1.788 4.78  

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr1e-05\gpt\model-4000.keras


2025-07-18 08:45:27,747 - ADV - DEBUG - episode: 4099/50000, score: 0, e: 0.2, reward: 20.981, seudo-rew:0.0 max: 11.722322463989258 init-act: [3.969 4.145 7.995 5.285]
2025-07-18 08:45:36,060 - ADV - DEBUG - episode: 4199/50000, score: 0, e: 0.2, reward: 20.883, seudo-rew:0.0 max: 11.756263732910156 init-act: [4.12  4.374 8.229 5.506]
2025-07-18 08:45:44,317 - ADV - DEBUG - episode: 4299/50000, score: 0, e: 0.2, reward: 21.112, seudo-rew:0.0 max: 11.871991157531738 init-act: [4.288 4.555 8.461 5.716]
2025-07-18 08:45:52,809 - ADV - DEBUG - episode: 4399/50000, score: 0, e: 0.2, reward: 21.046, seudo-rew:0.0 max: 12.250914573669434 init-act: [4.516 4.75  8.685 5.957]
2025-07-18 08:46:01,259 - ADV - DEBUG - episode: 4499/50000, score: 0, e: 0.2, reward: 20.86, seudo-rew:0.0 max: 12.744641304016113 init-act: [4.741 4.973 8.917 6.184]
2025-07-18 08:46:09,706 - ADV - DEBUG - episode: 4599/50000, score: 0, e: 0.2, reward: 21.092, seudo-rew:0.0 max: 12.97791862487793 init-act: [4.974 5.182 9

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr1e-05\gpt\model-6000.keras


2025-07-18 08:48:14,970 - ADV - DEBUG - episode: 6099/50000, score: 0, e: 0.2, reward: 21.382, seudo-rew:0.0 max: 17.53070640563965 init-act: [ 8.232  8.779 12.9   10.428]
2025-07-18 08:48:23,317 - ADV - DEBUG - episode: 6199/50000, score: 0, e: 0.2, reward: 21.313, seudo-rew:0.0 max: 17.443836212158203 init-act: [ 8.481  9.004 13.147 10.714]
2025-07-18 08:48:31,784 - ADV - DEBUG - episode: 6299/50000, score: 0, e: 0.2, reward: 21.329, seudo-rew:0.0 max: 18.13778305053711 init-act: [ 8.73   9.254 13.39  10.983]
2025-07-18 08:48:40,121 - ADV - DEBUG - episode: 6399/50000, score: 0, e: 0.2, reward: 21.264, seudo-rew:0.0 max: 18.614717483520508 init-act: [ 8.92   9.537 13.656 11.296]
2025-07-18 08:48:48,396 - ADV - DEBUG - episode: 6499/50000, score: 0, e: 0.2, reward: 21.379, seudo-rew:0.0 max: 18.508338928222656 init-act: [ 9.158  9.828 13.903 11.567]
2025-07-18 08:48:56,766 - ADV - DEBUG - episode: 6599/50000, score: 0, e: 0.2, reward: 21.152, seudo-rew:0.0 max: 18.83687973022461 init-

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr1e-05\gpt\model-8000.keras


2025-07-18 08:51:02,132 - ADV - DEBUG - episode: 8099/50000, score: 0, e: 0.2, reward: 21.544, seudo-rew:0.0 max: 24.08049201965332 init-act: [13.439 14.409 18.064 16.482]
2025-07-18 08:51:10,506 - ADV - DEBUG - episode: 8199/50000, score: 0, e: 0.2, reward: 21.487, seudo-rew:0.0 max: 24.179990768432617 init-act: [13.732 14.724 18.307 16.797]
2025-07-18 08:51:18,805 - ADV - DEBUG - episode: 8299/50000, score: 0, e: 0.2, reward: 21.464, seudo-rew:0.0 max: 23.45295524597168 init-act: [14.033 14.992 18.549 17.091]
2025-07-18 08:51:27,221 - ADV - DEBUG - episode: 8399/50000, score: 0, e: 0.2, reward: 21.457, seudo-rew:0.0 max: 24.206100463867188 init-act: [14.351 15.254 18.817 17.4  ]
2025-07-18 08:51:35,633 - ADV - DEBUG - episode: 8499/50000, score: 0, e: 0.2, reward: 21.525, seudo-rew:0.0 max: 24.98504066467285 init-act: [14.637 15.596 19.095 17.714]
2025-07-18 08:51:43,940 - ADV - DEBUG - episode: 8599/50000, score: 0, e: 0.2, reward: 21.645, seudo-rew:0.0 max: 24.865324020385742 init-

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr1e-05\gpt\model-10000.keras


2025-07-18 08:53:49,501 - ADV - DEBUG - episode: 10099/50000, score: 0, e: 0.2, reward: 35.722, seudo-rew:0.0 max: 37.50804138183594 init-act: [21.114 23.779 24.746 27.784]
2025-07-18 08:53:57,892 - ADV - DEBUG - episode: 10199/50000, score: 0, e: 0.2, reward: 35.385, seudo-rew:0.0 max: 38.732601165771484 init-act: [21.701 24.572 25.222 28.768]
2025-07-18 08:54:06,226 - ADV - DEBUG - episode: 10299/50000, score: 0, e: 0.2, reward: 35.205, seudo-rew:0.0 max: 40.03200912475586 init-act: [22.249 25.334 25.712 29.743]
2025-07-18 08:54:14,794 - ADV - DEBUG - episode: 10399/50000, score: 0, e: 0.2, reward: 35.665, seudo-rew:0.0 max: 41.3317985534668 init-act: [22.826 26.11  26.23  30.707]
2025-07-18 08:54:23,250 - ADV - DEBUG - episode: 10499/50000, score: 0, e: 0.2, reward: 35.431, seudo-rew:0.0 max: 42.66925811767578 init-act: [23.433 26.902 26.792 31.64 ]
2025-07-18 08:54:31,551 - ADV - DEBUG - episode: 10599/50000, score: 0, e: 0.2, reward: 35.628, seudo-rew:0.0 max: 43.85831069946289 in

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr1e-05\gpt\model-12000.keras


2025-07-18 08:56:36,704 - ADV - DEBUG - episode: 12099/50000, score: 0, e: 0.2, reward: 35.715, seudo-rew:0.0 max: 62.71941375732422 init-act: [33.23  39.902 36.408 47.285]
2025-07-18 08:56:45,099 - ADV - DEBUG - episode: 12199/50000, score: 0, e: 0.2, reward: 35.42, seudo-rew:0.0 max: 63.9179801940918 init-act: [33.855 40.793 37.056 48.288]
2025-07-18 08:56:53,507 - ADV - DEBUG - episode: 12299/50000, score: 0, e: 0.2, reward: 35.638, seudo-rew:0.0 max: 65.14736938476562 init-act: [34.483 41.674 37.67  49.325]
2025-07-18 08:57:01,861 - ADV - DEBUG - episode: 12399/50000, score: 0, e: 0.2, reward: 35.361, seudo-rew:0.0 max: 66.45459747314453 init-act: [35.106 42.535 38.294 50.368]
2025-07-18 08:57:10,146 - ADV - DEBUG - episode: 12499/50000, score: 0, e: 0.2, reward: 35.51, seudo-rew:0.0 max: 67.64949035644531 init-act: [35.734 43.442 38.982 51.422]
2025-07-18 08:57:18,567 - ADV - DEBUG - episode: 12599/50000, score: 0, e: 0.2, reward: 35.449, seudo-rew:0.0 max: 68.9058609008789 init-a

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr1e-05\gpt\model-14000.keras


2025-07-18 08:59:24,453 - ADV - DEBUG - episode: 14099/50000, score: 0, e: 0.2, reward: 35.598, seudo-rew:0.0 max: 87.35122680664062 init-act: [46.139 58.699 49.253 68.665]
2025-07-18 08:59:32,658 - ADV - DEBUG - episode: 14199/50000, score: 0, e: 0.2, reward: 35.625, seudo-rew:0.0 max: 88.49784851074219 init-act: [46.82  59.648 49.913 69.825]
2025-07-18 08:59:41,002 - ADV - DEBUG - episode: 14299/50000, score: 0, e: 0.2, reward: 35.42, seudo-rew:0.0 max: 89.8946762084961 init-act: [47.468 60.66  50.607 70.989]
2025-07-18 08:59:49,338 - ADV - DEBUG - episode: 14399/50000, score: 0, e: 0.2, reward: 35.421, seudo-rew:0.0 max: 91.06794738769531 init-act: [48.091 61.652 51.223 72.117]
2025-07-18 08:59:57,645 - ADV - DEBUG - episode: 14499/50000, score: 0, e: 0.2, reward: 35.555, seudo-rew:0.0 max: 92.47805786132812 init-act: [48.714 62.653 51.945 73.247]
2025-07-18 09:00:05,932 - ADV - DEBUG - episode: 14599/50000, score: 0, e: 0.2, reward: 35.474, seudo-rew:0.0 max: 93.66458129882812 init

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr1e-05\gpt\model-16000.keras


2025-07-18 09:02:11,473 - ADV - DEBUG - episode: 16099/50000, score: 0, e: 0.2, reward: 35.536, seudo-rew:0.0 max: 110.73770141601562 init-act: [58.837 78.617 62.375 91.118]
2025-07-18 09:02:19,864 - ADV - DEBUG - episode: 16199/50000, score: 0, e: 0.2, reward: 35.639, seudo-rew:0.0 max: 111.74555969238281 init-act: [59.417 79.601 62.953 92.25 ]
2025-07-18 09:02:28,227 - ADV - DEBUG - episode: 16299/50000, score: 0, e: 0.2, reward: 35.529, seudo-rew:0.0 max: 112.88646697998047 init-act: [60.142 80.532 63.612 93.419]
2025-07-18 09:02:36,544 - ADV - DEBUG - episode: 16399/50000, score: 0, e: 0.2, reward: 35.446, seudo-rew:0.0 max: 114.29794311523438 init-act: [60.961 81.563 64.266 94.541]
2025-07-18 09:02:44,930 - ADV - DEBUG - episode: 16499/50000, score: 0, e: 0.2, reward: 35.662, seudo-rew:0.0 max: 115.19839477539062 init-act: [61.667 82.648 64.903 95.646]
2025-07-18 09:02:53,255 - ADV - DEBUG - episode: 16599/50000, score: 0, e: 0.2, reward: 35.344, seudo-rew:0.0 max: 116.28601837158

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr1e-05\gpt\model-18000.keras


2025-07-18 09:04:58,500 - ADV - DEBUG - episode: 18099/50000, score: 0, e: 0.2, reward: 35.456, seudo-rew:0.0 max: 129.95556640625 init-act: [ 70.821  96.74   74.297 111.877]
2025-07-18 09:05:06,850 - ADV - DEBUG - episode: 18199/50000, score: 0, e: 0.2, reward: 35.479, seudo-rew:0.0 max: 130.54095458984375 init-act: [ 71.41   97.623  74.867 112.81 ]
2025-07-18 09:05:15,272 - ADV - DEBUG - episode: 18299/50000, score: 0, e: 0.2, reward: 35.573, seudo-rew:0.0 max: 131.30645751953125 init-act: [ 71.935  98.469  75.462 113.756]
2025-07-18 09:05:23,693 - ADV - DEBUG - episode: 18399/50000, score: 0, e: 0.2, reward: 35.261, seudo-rew:0.0 max: 132.11119079589844 init-act: [ 72.446  99.314  76.11  114.695]
2025-07-18 09:05:31,998 - ADV - DEBUG - episode: 18499/50000, score: 0, e: 0.2, reward: 35.531, seudo-rew:0.0 max: 132.88180541992188 init-act: [ 73.013 100.117  76.516 115.531]
2025-07-18 09:05:40,348 - ADV - DEBUG - episode: 18599/50000, score: 0, e: 0.2, reward: 35.592, seudo-rew:0.0 max

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr1e-05\gpt\model-20000.keras


2025-07-18 09:07:45,560 - ADV - DEBUG - episode: 20099/50000, score: 0, e: 0.2, reward: 35.188, seudo-rew:0.0 max: 142.1327362060547 init-act: [ 80.44  110.953  84.071 128.322]
2025-07-18 09:07:53,988 - ADV - DEBUG - episode: 20199/50000, score: 0, e: 0.2, reward: 35.485, seudo-rew:0.0 max: 142.2670440673828 init-act: [ 80.828 111.383  84.409 128.992]
2025-07-18 09:08:02,339 - ADV - DEBUG - episode: 20299/50000, score: 0, e: 0.2, reward: 35.125, seudo-rew:0.0 max: 143.4313201904297 init-act: [ 81.224 111.89   84.787 129.639]
2025-07-18 09:08:11,139 - ADV - DEBUG - episode: 20399/50000, score: 0, e: 0.2, reward: 35.421, seudo-rew:0.0 max: 143.7322998046875 init-act: [ 81.583 112.323  85.166 130.281]
2025-07-18 09:08:20,195 - ADV - DEBUG - episode: 20499/50000, score: 0, e: 0.2, reward: 35.5, seudo-rew:0.0 max: 143.99496459960938 init-act: [ 82.079 112.829  85.49  130.914]
2025-07-18 09:08:29,009 - ADV - DEBUG - episode: 20599/50000, score: 0, e: 0.2, reward: 35.589, seudo-rew:0.0 max: 1

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr1e-05\gpt\model-22000.keras


2025-07-18 09:10:35,827 - ADV - DEBUG - episode: 22099/50000, score: 0, e: 0.2, reward: 35.623, seudo-rew:0.0 max: 147.03756713867188 init-act: [ 86.951 117.299  90.297 138.327]
2025-07-18 09:10:44,393 - ADV - DEBUG - episode: 22199/50000, score: 0, e: 0.2, reward: 35.465, seudo-rew:0.0 max: 147.52308654785156 init-act: [ 87.283 117.853  90.513 138.492]
2025-07-18 09:10:52,860 - ADV - DEBUG - episode: 22299/50000, score: 0, e: 0.2, reward: 35.888, seudo-rew:0.0 max: 147.34031677246094 init-act: [ 87.523 118.14   90.627 138.764]
2025-07-18 09:11:01,217 - ADV - DEBUG - episode: 22399/50000, score: 0, e: 0.2, reward: 35.186, seudo-rew:0.0 max: 146.66310119628906 init-act: [ 87.596 117.813  90.703 139.076]
2025-07-18 09:11:09,572 - ADV - DEBUG - episode: 22499/50000, score: 0, e: 0.2, reward: 35.391, seudo-rew:0.0 max: 147.38648986816406 init-act: [ 87.677 117.367  90.8   139.349]
2025-07-18 09:11:18,192 - ADV - DEBUG - episode: 22599/50000, score: 0, e: 0.2, reward: 35.376, seudo-rew:0.0 

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr1e-05\gpt\model-24000.keras


2025-07-18 09:13:24,547 - ADV - DEBUG - episode: 24099/50000, score: 0, e: 0.2, reward: 35.48, seudo-rew:0.0 max: 141.59458923339844 init-act: [ 88.508 115.117  91.237 138.486]
2025-07-18 09:13:32,907 - ADV - DEBUG - episode: 24199/50000, score: 0, e: 0.2, reward: 35.453, seudo-rew:0.0 max: 141.3610382080078 init-act: [ 88.468 115.117  91.198 138.085]
2025-07-18 09:13:41,238 - ADV - DEBUG - episode: 24299/50000, score: 0, e: 0.2, reward: 35.564, seudo-rew:0.0 max: 139.9015350341797 init-act: [ 88.341 114.985  91.042 137.647]
2025-07-18 09:13:49,572 - ADV - DEBUG - episode: 24399/50000, score: 0, e: 0.2, reward: 35.716, seudo-rew:0.0 max: 139.88018798828125 init-act: [ 88.192 114.799  90.901 137.202]
2025-07-18 09:13:57,905 - ADV - DEBUG - episode: 24499/50000, score: 0, e: 0.2, reward: 35.45, seudo-rew:0.0 max: 138.389404296875 init-act: [ 88.055 114.217  90.758 136.75 ]
2025-07-18 09:14:06,229 - ADV - DEBUG - episode: 24599/50000, score: 0, e: 0.2, reward: 35.658, seudo-rew:0.0 max: 1

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr1e-05\gpt\model-26000.keras


2025-07-18 09:16:12,069 - ADV - DEBUG - episode: 26099/50000, score: 0, e: 0.2, reward: 35.865, seudo-rew:0.0 max: 122.44338989257812 init-act: [ 83.836 105.341  86.315 124.486]
2025-07-18 09:16:20,463 - ADV - DEBUG - episode: 26199/50000, score: 0, e: 0.2, reward: 36.353, seudo-rew:0.0 max: 121.02754974365234 init-act: [ 83.562 104.781  85.924 123.401]
2025-07-18 09:16:28,919 - ADV - DEBUG - episode: 26299/50000, score: 0, e: 0.2, reward: 36.429, seudo-rew:0.0 max: 119.64010620117188 init-act: [ 83.172 104.     85.362 122.248]
2025-07-18 09:16:37,394 - ADV - DEBUG - episode: 26399/50000, score: 0, e: 0.2, reward: 36.254, seudo-rew:0.0 max: 118.12055969238281 init-act: [ 82.773 103.328  85.029 121.037]
2025-07-18 09:16:45,754 - ADV - DEBUG - episode: 26499/50000, score: 0, e: 0.2, reward: 36.376, seudo-rew:0.0 max: 116.05897521972656 init-act: [ 82.25  102.557  84.453 119.812]
2025-07-18 09:16:54,480 - ADV - DEBUG - episode: 26599/50000, score: 0, e: 0.2, reward: 36.261, seudo-rew:0.0 

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr1e-05\gpt\model-28000.keras


2025-07-18 09:19:08,511 - ADV - DEBUG - episode: 28099/50000, score: 0, e: 0.2, reward: 38.197, seudo-rew:0.0 max: 90.29578399658203 init-act: [72.219 88.627 73.922 97.451]
2025-07-18 09:19:16,834 - ADV - DEBUG - episode: 28199/50000, score: 0, e: 0.2, reward: 38.416, seudo-rew:0.0 max: 88.53864288330078 init-act: [71.531 87.701 73.224 96.1  ]
2025-07-18 09:19:25,030 - ADV - DEBUG - episode: 28299/50000, score: 0, e: 0.2, reward: 38.356, seudo-rew:0.0 max: 87.33499908447266 init-act: [70.859 86.737 72.534 94.747]
2025-07-18 09:19:33,234 - ADV - DEBUG - episode: 28399/50000, score: 0, e: 0.2, reward: 38.092, seudo-rew:0.0 max: 85.88379669189453 init-act: [70.128 85.773 71.797 93.378]
2025-07-18 09:19:41,415 - ADV - DEBUG - episode: 28499/50000, score: 0, e: 0.2, reward: 37.926, seudo-rew:0.0 max: 84.48771667480469 init-act: [69.394 84.784 71.034 92.023]
2025-07-18 09:19:49,648 - ADV - DEBUG - episode: 28599/50000, score: 0, e: 0.2, reward: 37.926, seudo-rew:0.0 max: 83.1612548828125 ini

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr1e-05\gpt\model-30000.keras


2025-07-18 09:21:53,059 - ADV - DEBUG - episode: 30099/50000, score: 0, e: 0.2, reward: 35.823, seudo-rew:0.0 max: 68.14824676513672 init-act: [58.602 70.719 59.679 73.886]
2025-07-18 09:22:01,274 - ADV - DEBUG - episode: 30199/50000, score: 0, e: 0.2, reward: 35.812, seudo-rew:0.0 max: 67.31922149658203 init-act: [58.039 70.02  59.048 72.866]
2025-07-18 09:22:09,546 - ADV - DEBUG - episode: 30299/50000, score: 0, e: 0.2, reward: 35.926, seudo-rew:0.0 max: 66.52123260498047 init-act: [57.442 69.352 58.425 71.934]
2025-07-18 09:22:17,960 - ADV - DEBUG - episode: 30399/50000, score: 0, e: 0.2, reward: 35.848, seudo-rew:0.0 max: 65.78666687011719 init-act: [56.887 68.68  57.828 71.013]
2025-07-18 09:22:26,551 - ADV - DEBUG - episode: 30499/50000, score: 0, e: 0.2, reward: 35.735, seudo-rew:0.0 max: 65.58204650878906 init-act: [56.368 68.008 57.206 70.082]
2025-07-18 09:22:34,728 - ADV - DEBUG - episode: 30599/50000, score: 0, e: 0.2, reward: 35.301, seudo-rew:0.0 max: 64.20957946777344 in

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr1e-05\gpt\model-32000.keras


2025-07-18 09:24:39,861 - ADV - DEBUG - episode: 32099/50000, score: 0, e: 0.2, reward: 31.81, seudo-rew:0.0 max: 54.61082077026367 init-act: [50.41  57.976 50.    58.026]
2025-07-18 09:24:48,154 - ADV - DEBUG - episode: 32199/50000, score: 0, e: 0.2, reward: 31.3, seudo-rew:0.0 max: 53.95248031616211 init-act: [50.168 57.347 49.589 57.391]
2025-07-18 09:24:56,419 - ADV - DEBUG - episode: 32299/50000, score: 0, e: 0.2, reward: 31.03, seudo-rew:0.0 max: 52.995399475097656 init-act: [49.88  56.672 49.254 56.703]
2025-07-18 09:25:04,667 - ADV - DEBUG - episode: 32399/50000, score: 0, e: 0.2, reward: 30.143, seudo-rew:0.0 max: 52.71117401123047 init-act: [49.612 56.016 48.84  55.968]
2025-07-18 09:25:13,193 - ADV - DEBUG - episode: 32499/50000, score: 0, e: 0.2, reward: 29.742, seudo-rew:0.0 max: 52.13282775878906 init-act: [49.352 55.387 48.5   55.321]
2025-07-18 09:25:21,528 - ADV - DEBUG - episode: 32599/50000, score: 0, e: 0.2, reward: 29.444, seudo-rew:0.0 max: 51.554908752441406 init

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr1e-05\gpt\model-34000.keras


2025-07-18 09:27:26,118 - ADV - DEBUG - episode: 34099/50000, score: 0, e: 0.2, reward: 27.351, seudo-rew:0.0 max: 48.09519577026367 init-act: [46.257 49.517 45.105 49.642]
2025-07-18 09:27:34,418 - ADV - DEBUG - episode: 34199/50000, score: 0, e: 0.2, reward: 27.021, seudo-rew:0.0 max: 48.098026275634766 init-act: [46.274 49.465 45.097 49.603]
2025-07-18 09:27:42,628 - ADV - DEBUG - episode: 34299/50000, score: 0, e: 0.2, reward: 26.884, seudo-rew:0.0 max: 48.15148162841797 init-act: [46.289 49.449 45.139 49.54 ]
2025-07-18 09:27:51,126 - ADV - DEBUG - episode: 34399/50000, score: 0, e: 0.2, reward: 26.961, seudo-rew:0.0 max: 48.2074089050293 init-act: [46.282 49.438 45.188 49.418]
2025-07-18 09:27:59,801 - ADV - DEBUG - episode: 34499/50000, score: 0, e: 0.2, reward: 27.071, seudo-rew:0.0 max: 48.29587173461914 init-act: [46.289 49.441 45.246 49.387]
2025-07-18 09:28:08,082 - ADV - DEBUG - episode: 34599/50000, score: 0, e: 0.2, reward: 27.299, seudo-rew:0.0 max: 48.36439514160156 in

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr1e-05\gpt\model-36000.keras


2025-07-18 09:30:13,133 - ADV - DEBUG - episode: 36099/50000, score: 0, e: 0.2, reward: 26.295, seudo-rew:0.0 max: 49.80613327026367 init-act: [47.193 50.196 46.079 50.379]
2025-07-18 09:30:21,512 - ADV - DEBUG - episode: 36199/50000, score: 0, e: 0.2, reward: 25.876, seudo-rew:0.0 max: 49.41917037963867 init-act: [47.234 50.301 46.167 50.472]
2025-07-18 09:30:29,791 - ADV - DEBUG - episode: 36299/50000, score: 0, e: 0.2, reward: 26.448, seudo-rew:0.0 max: 49.51949691772461 init-act: [47.263 50.314 46.164 50.538]
2025-07-18 09:30:38,046 - ADV - DEBUG - episode: 36399/50000, score: 0, e: 0.2, reward: 26.405, seudo-rew:0.0 max: 49.577022552490234 init-act: [47.277 50.332 46.171 50.583]
2025-07-18 09:30:46,312 - ADV - DEBUG - episode: 36499/50000, score: 0, e: 0.2, reward: 26.363, seudo-rew:0.0 max: 50.118045806884766 init-act: [47.275 50.332 46.145 50.629]
2025-07-18 09:30:54,794 - ADV - DEBUG - episode: 36599/50000, score: 0, e: 0.2, reward: 26.121, seudo-rew:0.0 max: 50.164527893066406

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr1e-05\gpt\model-38000.keras


2025-07-18 09:32:59,653 - ADV - DEBUG - episode: 38099/50000, score: 0, e: 0.2, reward: 26.465, seudo-rew:0.0 max: 50.19464111328125 init-act: [46.69  50.188 45.511 51.136]
2025-07-18 09:33:07,947 - ADV - DEBUG - episode: 38199/50000, score: 0, e: 0.2, reward: 27.518, seudo-rew:0.0 max: 50.60282516479492 init-act: [46.578 50.122 45.418 51.125]
2025-07-18 09:33:16,222 - ADV - DEBUG - episode: 38299/50000, score: 0, e: 0.2, reward: 27.422, seudo-rew:0.0 max: 50.825931549072266 init-act: [46.461 50.027 45.246 51.107]
2025-07-18 09:33:24,574 - ADV - DEBUG - episode: 38399/50000, score: 0, e: 0.2, reward: 28.067, seudo-rew:0.0 max: 50.26919937133789 init-act: [46.364 49.973 45.119 51.079]
2025-07-18 09:33:33,003 - ADV - DEBUG - episode: 38499/50000, score: 0, e: 0.2, reward: 28.169, seudo-rew:0.0 max: 50.248260498046875 init-act: [46.235 49.992 44.993 51.047]
2025-07-18 09:33:41,429 - ADV - DEBUG - episode: 38599/50000, score: 0, e: 0.2, reward: 28.556, seudo-rew:0.0 max: 50.777706146240234

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr1e-05\gpt\model-40000.keras


2025-07-18 09:35:46,206 - ADV - DEBUG - episode: 40099/50000, score: 0, e: 0.2, reward: 28.109, seudo-rew:0.0 max: 48.73333740234375 init-act: [43.331 48.132 42.715 49.527]
2025-07-18 09:35:54,569 - ADV - DEBUG - episode: 40199/50000, score: 0, e: 0.2, reward: 27.76, seudo-rew:0.0 max: 48.50747299194336 init-act: [43.107 47.952 42.533 49.38 ]
2025-07-18 09:36:02,901 - ADV - DEBUG - episode: 40299/50000, score: 0, e: 0.2, reward: 27.85, seudo-rew:0.0 max: 48.30545425415039 init-act: [42.897 47.796 42.31  49.203]
2025-07-18 09:36:11,268 - ADV - DEBUG - episode: 40399/50000, score: 0, e: 0.2, reward: 27.9, seudo-rew:0.0 max: 48.56479263305664 init-act: [42.624 47.601 42.084 48.981]
2025-07-18 09:36:19,633 - ADV - DEBUG - episode: 40499/50000, score: 0, e: 0.2, reward: 27.919, seudo-rew:0.0 max: 48.17399215698242 init-act: [42.378 47.407 41.896 48.804]
2025-07-18 09:36:27,879 - ADV - DEBUG - episode: 40599/50000, score: 0, e: 0.2, reward: 27.934, seudo-rew:0.0 max: 48.1069450378418 init-ac

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr1e-05\gpt\model-42000.keras


2025-07-18 09:38:35,617 - ADV - DEBUG - episode: 42099/50000, score: 0, e: 0.2, reward: 27.183, seudo-rew:0.0 max: 43.25362014770508 init-act: [39.101 43.703 39.14  44.364]
2025-07-18 09:38:43,926 - ADV - DEBUG - episode: 42199/50000, score: 0, e: 0.2, reward: 27.344, seudo-rew:0.0 max: 43.41073989868164 init-act: [38.983 43.481 39.027 44.057]
2025-07-18 09:38:52,205 - ADV - DEBUG - episode: 42299/50000, score: 0, e: 0.2, reward: 26.781, seudo-rew:0.0 max: 43.116554260253906 init-act: [38.854 43.281 38.935 43.775]
2025-07-18 09:39:00,522 - ADV - DEBUG - episode: 42399/50000, score: 0, e: 0.2, reward: 26.809, seudo-rew:0.0 max: 42.41584014892578 init-act: [38.73  43.094 38.839 43.506]
2025-07-18 09:39:08,894 - ADV - DEBUG - episode: 42499/50000, score: 0, e: 0.2, reward: 26.889, seudo-rew:0.0 max: 42.22362518310547 init-act: [38.609 42.897 38.742 43.288]
2025-07-18 09:39:17,217 - ADV - DEBUG - episode: 42599/50000, score: 0, e: 0.2, reward: 26.226, seudo-rew:0.0 max: 42.40614700317383 i

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr1e-05\gpt\model-44000.keras


2025-07-18 09:41:22,444 - ADV - DEBUG - episode: 44099/50000, score: 0, e: 0.2, reward: 27.636, seudo-rew:0.0 max: 40.45881652832031 init-act: [37.174 41.281 37.348 41.258]
2025-07-18 09:41:30,857 - ADV - DEBUG - episode: 44199/50000, score: 0, e: 0.2, reward: 27.908, seudo-rew:0.0 max: 40.32008743286133 init-act: [37.04  41.149 37.19  41.086]
2025-07-18 09:41:39,156 - ADV - DEBUG - episode: 44299/50000, score: 0, e: 0.2, reward: 27.672, seudo-rew:0.0 max: 40.211936950683594 init-act: [36.891 41.017 37.041 40.921]
2025-07-18 09:41:47,483 - ADV - DEBUG - episode: 44399/50000, score: 0, e: 0.2, reward: 27.672, seudo-rew:0.0 max: 40.1016960144043 init-act: [36.738 40.83  36.875 40.779]
2025-07-18 09:41:55,754 - ADV - DEBUG - episode: 44499/50000, score: 0, e: 0.2, reward: 27.958, seudo-rew:0.0 max: 39.94548416137695 init-act: [36.606 40.734 36.754 40.699]
2025-07-18 09:42:04,110 - ADV - DEBUG - episode: 44599/50000, score: 0, e: 0.2, reward: 27.925, seudo-rew:0.0 max: 39.81277084350586 in

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr1e-05\gpt\model-46000.keras


2025-07-18 09:44:09,734 - ADV - DEBUG - episode: 46099/50000, score: 0, e: 0.2, reward: 27.887, seudo-rew:0.0 max: 37.31062698364258 init-act: [34.164 38.17  34.293 38.055]
2025-07-18 09:44:18,004 - ADV - DEBUG - episode: 46199/50000, score: 0, e: 0.2, reward: 27.863, seudo-rew:0.0 max: 37.10636520385742 init-act: [34.042 38.016 34.168 37.914]
2025-07-18 09:44:26,311 - ADV - DEBUG - episode: 46299/50000, score: 0, e: 0.2, reward: 27.979, seudo-rew:0.0 max: 36.93326950073242 init-act: [33.857 37.773 34.004 37.69 ]
2025-07-18 09:44:34,659 - ADV - DEBUG - episode: 46399/50000, score: 0, e: 0.2, reward: 27.813, seudo-rew:0.0 max: 36.7044563293457 init-act: [33.67  37.544 33.824 37.502]
2025-07-18 09:44:42,926 - ADV - DEBUG - episode: 46499/50000, score: 0, e: 0.2, reward: 28.08, seudo-rew:0.0 max: 36.49033737182617 init-act: [33.521 37.349 33.672 37.307]
2025-07-18 09:44:51,171 - ADV - DEBUG - episode: 46599/50000, score: 0, e: 0.2, reward: 28.048, seudo-rew:0.0 max: 36.32480239868164 init

dir crearted here -- trained_model\test_adv_RL_400000eps0.2lr1e-05\gpt\model-48000.keras


2025-07-18 09:46:56,669 - ADV - DEBUG - episode: 48099/50000, score: 0, e: 0.2, reward: 28.673, seudo-rew:0.0 max: 34.2121467590332 init-act: [31.432 35.021 31.519 34.961]
2025-07-18 09:47:05,070 - ADV - DEBUG - episode: 48199/50000, score: 0, e: 0.2, reward: 28.77, seudo-rew:0.0 max: 34.070152282714844 init-act: [31.301 34.894 31.389 34.798]
2025-07-18 09:47:13,400 - ADV - DEBUG - episode: 48299/50000, score: 0, e: 0.2, reward: 28.746, seudo-rew:0.0 max: 33.979305267333984 init-act: [31.211 34.805 31.266 34.717]
2025-07-18 09:47:21,632 - ADV - DEBUG - episode: 48399/50000, score: 0, e: 0.2, reward: 28.956, seudo-rew:0.0 max: 33.83966827392578 init-act: [31.084 34.652 31.128 34.582]
2025-07-18 09:47:29,996 - ADV - DEBUG - episode: 48499/50000, score: 0, e: 0.2, reward: 28.674, seudo-rew:0.0 max: 33.70817184448242 init-act: [30.984 34.573 31.047 34.488]
2025-07-18 09:47:38,308 - ADV - DEBUG - episode: 48599/50000, score: 0, e: 0.2, reward: 28.809, seudo-rew:0.0 max: 33.619266510009766 i

## Keras to H5

In [1]:
import os
from tensorflow.keras.models import load_model

def convert_keras_to_h5(keras_model_path, h5_output_path=None):
    """
    Converts a .keras model file to .h5 format.

    Args:
        keras_model_path (str): Path to the input .keras file.
        h5_output_path (str): Output path for the .h5 file.
    """
    if not keras_model_path.endswith('.keras'):
        raise ValueError("Input file must be a '.keras' file")

    if h5_output_path is None:
        h5_output_path = keras_model_path.replace('.keras', '.h5')

    # Load and save
    model = load_model(keras_model_path, compile=False)
    model.save(h5_output_path, save_format='h5')
    print(f"[✓] Model converted and saved to: {h5_output_path}")

# === Example Usage ===
if __name__ == '__main__':
    input_keras_path = 'trained_model/adv_RL_400000_eps_0.01_lr_0.001/gpt/model-248000.keras'
    output_h5_path = 'trained_model/adv_RL_400000_eps_0.01_lr_0.001/gpt/model-248000_final.h5'

    os.makedirs(os.path.dirname(output_h5_path), exist_ok=True)
    convert_keras_to_h5(input_keras_path, output_h5_path)




[✓] Model converted and saved to: trained_model/adv_RL_400000_eps_0.01_lr_0.001/gpt/model-248000_final.h5
